From 1ab6c1011b258e6529c2665ad132e03e7141c998 Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Wed, 27 Apr 2022 15:19:30 +0100 Subject: [PATCH 1/3] Final changes to VF that deal with issue https://github.com/openvar/variantValidator/issues/360 --- .DS_Store | Bin 8196 -> 10244 bytes VariantFormatter/formatter.py | 1 + VariantFormatter/variantformatter.py | 11 +++++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.DS_Store b/.DS_Store index 46b5e806a32a4a357a9ad1d96df3f8eade2bf5c6..32ed29759392aaa215f4ae53dd15e96680056169 100644 GIT binary patch delta 1232 zcmZp1XbF&DU|?W$DortDU{C-uIe-{M3-C-V6q~50D69hHFar4u48=L=hQZ1Cxdj^w zr?F3L;NQ&7!NO4wl4oVmV@PF42ST_SpeO?a)9GSV+1z{=m!zEhB%mlqOUP1@C;W#T z(N&}nP*IQpaRpEx(B}F85XAy?KsrMvLkY}@AVCJ9mq3Rj)1&R`Wmwz9W!sXHjefOG*R5a~2aUW|PoR6+p74)zBO zlNX2?GfGYt5KvG9#ycc~>H__m$B@R5kK$Vfu{A*J!Ai9-l_I<3&dpYyIUf&$k_K+& z2=}2YSHv_O#nlYJ!bN~ZkQrz^5GZg130F{Iw6X9z^JIRRKoKTL5v2i=2Ig)95Iwm- Lrg!sPL1tzE5eFB( delta 141 zcmZn(XmOBWU|?W$DortDU;r^WfEYvza8E20o2aKK$^w!H@);Ns8S)rP7~B~08HzR* zPGg_gz`L29gN1{UWwVX29OLFbky)&h&q(oaEHPmeWCrR20ts#);R-TsW8rt^$^0sT Y96+-`b}&qi=b5@WQQ|lwM2{gO0M+ap#Q*>R diff --git a/VariantFormatter/formatter.py b/VariantFormatter/formatter.py index e9effac..6197b3c 100644 --- a/VariantFormatter/formatter.py +++ b/VariantFormatter/formatter.py @@ -74,6 +74,7 @@ def vcf2hgvs_genomic(pseudo_vcf, genome_build, vfo): pos = vcf_list[1] ref = vcf_list[2] alt = vcf_list[3] + # assemble the HGVS genomic description ac = chr_dict.to_accession(chrom, genome_build) if ac is None: diff --git a/VariantFormatter/variantformatter.py b/VariantFormatter/variantformatter.py index 2ebc8ef..cab0279 100644 --- a/VariantFormatter/variantformatter.py +++ b/VariantFormatter/variantformatter.py @@ -64,7 +64,7 @@ def __init__(self, p_vcf, g_hgvs, un_norm_hgvs, hgvs_ref_bases, gen_error, genom # Warn incorrect m. accession for hg19 try: - if ("NC_012920.1" in str(g_hgvs) or "NC_001807.4" in gen_error) and "hg19" in genome_build: + if ("NC_012920.1" in str(g_hgvs)) and "hg19" in genome_build: gen_error = "NC_012920.1 is not associated with genome build hg19, instead use genome build GRCh37" except TypeError: pass @@ -189,8 +189,13 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None # Continuation - No exception try: vcf_dictionary = formatter.hgvs_genomic2vcf(hgvs_genomic, self.genome_build, self.vfo) - vcf_list = [vcf_dictionary['grc_chr'], vcf_dictionary['pos'], vcf_dictionary['ref'], + if vcf_dictionary['grc_chr'] == "NC_001807.4" and genome_build == "hg19": + chr_num = vcf_dictionary['ucsc_chr'] + else: + chr_num = vcf_dictionary['grc_chr'] + vcf_list = [chr_num, vcf_dictionary['pos'], vcf_dictionary['ref'], vcf_dictionary['alt']] + p_vcf = ':'.join(vcf_list) except Exception as e: if "Variant span is outside sequence bounds" in str(e): @@ -206,6 +211,7 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None self.genomic_descriptions = gds self.warning_level = 'genomic_variant_warning' return + try: genomic_level = formatter.vcf2hgvs_genomic(p_vcf, self.genome_build, self.vfo) except Exception as e: @@ -434,6 +440,7 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None elif self.genomic_descriptions.selected_build == 'hg38' or self.genomic_descriptions.selected_build \ == 'GRCh38': build_to = 'GRCh37' + current_lift = lo.liftover(self.genomic_descriptions.g_hgvs, self.genomic_descriptions.selected_build, build_to, From 4bed18f64a9b1d51e38d96ea435ccf4eae57cf3f Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Thu, 28 Apr 2022 15:23:22 +0100 Subject: [PATCH 2/3] minor edits --- .DS_Store | Bin 10244 -> 10244 bytes VariantFormatter/variantformatter.py | 2 ++ 2 files changed, 2 insertions(+) diff --git a/.DS_Store b/.DS_Store index 32ed29759392aaa215f4ae53dd15e96680056169..a2308d9d6fd27542c522dd7a34187e9746506307 100644 GIT binary patch delta 271 zcmZn(XbIS`RG?m1_BsOt0}F#5LpnnyLrHGFi%U{YeiBfOV^yk^^%0&Uj;Qh}aQU|` z{g&wR1sR6H$@#ejKs^i$$_^ls6=-%QLkdGGLt;)kk}N~A6azy7vNW>giGT7Eb9up* zBN>pJkFfst%apP#3>7F2U|7H~SxiW7@*=_gjCq@rgjkuFe*K+nBdoHyS7bRWn@mWO MXEyI Date: Thu, 12 May 2022 09:25:22 +0100 Subject: [PATCH 3/3] tweak --- VariantFormatter/simpleVariantFormatter.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/VariantFormatter/simpleVariantFormatter.py b/VariantFormatter/simpleVariantFormatter.py index 36186c8..65185e7 100644 --- a/VariantFormatter/simpleVariantFormatter.py +++ b/VariantFormatter/simpleVariantFormatter.py @@ -55,9 +55,8 @@ def format(batch_input, genome_build, transcript_model=None, specify_transcripts vcf_list = pseudo_vcf.split('-') delimiter = '-' if len(vcf_list) != 4: - formatted_variants[variant]['errors'].append( - '%s is an unsupported format: For assistance, submit variant description to ' - 'https://rest.variantvalidator.org') % pseudo_vcf + error = '%s is an unsupported format: For assistance, submit variant description to https://rest.variantvalidator.org' % str(pseudo_vcf) + formatted_variants[variant]['errors'].append(error) formatted_variants[variant]['flag'] = 'submission_warning' continue if ',' in str(vcf_list[-1]): @@ -71,9 +70,8 @@ def format(batch_input, genome_build, transcript_model=None, specify_transcripts try: format_these.append(variant) except Exception: - formatted_variants[variant]['errors'].append( - '%s is an unsupported format: For assistance, submit variant description to ' - 'https://rest.variantvalidator.org') % variant + error = '%s is an unsupported format: For assistance, submit variant description to https://rest.variantvalidator.org' % variant + formatted_variants[variant]['errors'].append(error) formatted_variants[variant]['flag'] = 'submission_warning' continue