Skip to content

Commit

Permalink
add transcript id to annotated xml output
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Dec 11, 2023
1 parent db26e08 commit 7716caa
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
11 changes: 8 additions & 3 deletions cmat/output_generation/annotated_clinvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ def annotate(self, record):
self.overall_counts['both_measure_and_trait'] += 1

def annotate_and_count_measure(self, record):
# TODO include transcript if present in variant_to_gene_mappings
consequence_types, variant_category = get_consequence_types(record.measure, self.variant_to_gene_mappings)
record.measure.add_ensembl_annotations(consequence_types)

Expand Down Expand Up @@ -242,8 +241,14 @@ def add_ensembl_annotations(self, consequences):
attribute_elt.text = consequence_attributes.so_term.so_name.replace('_', ' ')
so_elt = ET.Element('XRef', attrib={'ID': self.format_so_term(consequence_attributes.so_term),
'DB': 'Sequence Ontology'})
ensembl_elt = ET.Element('XRef', attrib={'ID': consequence_attributes.ensembl_gene_id, 'DB': 'Ensembl'})
attr_set_elt.extend((attribute_elt, so_elt, ensembl_elt))
ensembl_gene_elt = ET.Element('XRef', attrib={'ID': consequence_attributes.ensembl_gene_id,
'DB': 'Ensembl Gene'})
attr_set_elt.extend((attribute_elt, so_elt, ensembl_gene_elt))
# Add transcript if present
if consequence_attributes.ensembl_transcript_id:
ensembl_transcript_elt = ET.Element('XRef', attrib={'ID': consequence_attributes.ensembl_transcript_id,
'DB': 'Ensembl Transcript'})
attr_set_elt.append(ensembl_transcript_elt)
consequence_elts.append(attr_set_elt)
self.measure_xml.extend(consequence_elts)

Expand Down
17 changes: 11 additions & 6 deletions cmat/output_generation/consequence_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
logger = logging.getLogger(__package__)


def process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term):
consequence_type_dict[variant_id].append(ConsequenceType(ensembl_gene_id, SoTerm(so_term)))
def process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term, ensembl_transcript_id=None):
consequence_type_dict[variant_id].append(ConsequenceType(ensembl_gene_id, SoTerm(so_term), ensembl_transcript_id))


def process_consequence_type_file(snp_2_gene_file, consequence_type_dict=None):
# TODO adapt for transcripts if present
"""
Return a dictionary of consequence information extracted from the given file.
If consequence_type_dict is provided then the information will be merge into this dictionary.
If consequence_type_dict is provided then the information will be merged into this dictionary.
"""
logger.info('Loading mapping rs -> ENSG/SOterms')
if consequence_type_dict is None:
Expand All @@ -41,7 +40,12 @@ def process_consequence_type_file(snp_2_gene_file, consequence_type_dict=None):
logger.warning('Skip line with missing gene ID: {}'.format(line))
continue

process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term)
# Include transcript if present
if len(line_list) >= 5:
ensembl_transcript_id = line_list[4]
process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term, ensembl_transcript_id)
else:
process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term)

logger.info('{} rs->ENSG/SOterms mappings loaded'.format(len(consequence_type_dict)))
return consequence_type_dict
Expand Down Expand Up @@ -112,9 +116,10 @@ class ConsequenceType:
with relationship to ensembl gene IDs and SO terms
"""

def __init__(self, ensembl_gene_id, so_term):
def __init__(self, ensembl_gene_id, so_term, ensembl_transcript_id=None):
self.ensembl_gene_id = ensembl_gene_id
self.so_term = so_term
self.ensembl_transcript_id = ensembl_transcript_id

def __eq__(self, other):
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
Expand Down
Binary file not shown.

0 comments on commit 7716caa

Please sign in to comment.