Skip to content

Commit

Permalink
Resolve issue #25
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Causey-Freeman committed Nov 19, 2019
1 parent 2d9522d commit d246dba
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
33 changes: 23 additions & 10 deletions VariantValidator/modules/format_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,22 @@ def vcf2hgvs_stage2(variant, validator):
The reference sequence type is also assigned.
"""
skipvar = False
if re.search(r'\w+:', variant.quibble) and not re.search(r'\w+:[gcnmrp]\.', variant.quibble):
if (re.search(r'\w+:', variant.quibble) or re.search(r'\w+\(\w+\):', variant.quibble)) and not \
(re.search(r'\w+:[gcnmrp]\.', variant.quibble) or re.search(r'\w+\(\w+\):[gcnmrp]\.', variant.quibble)):
if re.search(r'\w+:[gcnmrp]', variant.quibble) and not re.search(r'\w+:[gcnmrp]\.', variant.quibble):
# Missing dot
pass
else:
try:
if 'GRCh37' in variant.quibble or 'hg19' in variant.quibble:
variant.primary_assembly = 'GRCh37'
validator.selected_assembly = 'GRCh37'
variant.quibble.format_quibble()
elif 'GRCh38' in variant.quibble or 'hg38' in variant.quibble:
variant.primary_assembly = 'GRCh38'
validator.selected_assembly = 'GRCh38'
variant.format_quibble()
# Remove all content in brackets
input_list = variant.quibble.split(':')
pos_ref_alt = str(input_list[1])
position_and_edit = input_list[1]
Expand Down Expand Up @@ -216,7 +222,8 @@ def vcf2hgvs_stage3(variant, validator):
software
"""
skipvar = False
if re.search(r'\w+:[gcnmrp]\.', variant.quibble) and not re.match(r'N[CGTWMRP]_', variant.quibble):
if (re.search(r'\w+:[gcnmrp]\.', variant.quibble) or re.search(r'\w+\(\w+\):[gcnmrp]\.', variant.quibble)) \
and not re.match(r'N[CGTWMRP]_', variant.quibble):
# Take out lowercase Accession characters
lower_cased_list = variant.quibble.split(':')
if re.search('LRG', lower_cased_list[0], re.IGNORECASE):
Expand All @@ -231,10 +238,16 @@ def vcf2hgvs_stage3(variant, validator):
variant.quibble = lower_case_accession + ':' + variant.quibble
if 'LRG_' not in variant.quibble and 'ENS' not in variant.quibble and not re.match('N[MRPC]_', variant.quibble):
try:
if 'GRCh37' in variant.quibble or 'hg19' in variant.quibble:
if re.search('GRCh37', variant.quibble, re.IGNORECASE) or \
re.search('hg19', variant.quibble, re.IGNORECASE):
variant.primary_assembly = 'GRCh37'
elif 'GRCh38' in variant.quibble or 'hg38' in variant.quibble:
validator.selected_assembly = 'GRCh37'
variant.format_quibble()
if re.search('GRCh38', variant.quibble, re.IGNORECASE) or \
re.search('hg38', variant.quibble, re.IGNORECASE):
variant.primary_assembly = 'GRCh38'
validator.selected_assembly = 'GRCh38'
variant.format_quibble()
input_list = variant.quibble.split(':')
query_a_symbol = input_list[0]
is_it_a_gene = validator.db.get_hgnc_symbol(query_a_symbol)
Expand Down Expand Up @@ -302,9 +315,9 @@ def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version):
variant.quibble + ' and specify transcripts from the following: ' +
'select_transcripts=' + select_from_these_transcripts)
logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' +
query_a_symbol + ') in place of a valid reference sequence: Re-submit ' +
variant.quibble + ' and specify transcripts from the following: ' +
'select_transcripts=' + select_from_these_transcripts)
query_a_symbol + ') in place of a valid reference sequence: Re-submit ' +
variant.quibble + ' and specify transcripts from the following: ' +
'select_transcripts=' + select_from_these_transcripts)
skipvar = True
except Exception as e:
logger.debug("Except passed, %s", e)
Expand Down Expand Up @@ -356,9 +369,9 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version):
' but also specify transcripts from the following: ' +
'select_transcripts=' + select_from_these_transcripts)
logger.warning('A transcript reference sequence has not been provided e.g. '
'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also '
'specify transcripts from the following: select_transcripts=' +
select_from_these_transcripts)
'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also '
'specify transcripts from the following: select_transcripts=' +
select_from_these_transcripts)
skipvar = True
else:
variant.warnings.append('A transcript reference sequence has not been provided e.g. '
Expand Down
6 changes: 2 additions & 4 deletions VariantValidator/modules/vvMixinCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
self.batch_list = []
for queries in batch_queries:
queries = queries.strip()
queries = queries.replace('"', '')
queries = queries.replace("'", "")
query = Variant(queries)
self.batch_list.append(query)
logger.info("Submitting variant with format %s", queries)
Expand Down Expand Up @@ -137,7 +135,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
logger.warning(error)
continue

# Remove whitespace
# Remove whitespace and quotes
my_variant.remove_whitespace()
my_variant.remove_quotes()

Expand Down Expand Up @@ -905,7 +903,7 @@ def gene2transcripts(self, query):
except fn.DatabaseConnectionError as e:
error = 'Currently unable to update gene_ids or transcript information records because ' \
'VariantValidator %s' % str(e)
my_variant.warnings.append(error)
# my_variant.warnings.append(error)
logger.warning(error)
tx_description = self.db.get_transcript_description(tx)
# Check for duplicates
Expand Down

0 comments on commit d246dba

Please sign in to comment.