Skip to content

Commit

Permalink
OWL on FHIR
Browse files Browse the repository at this point in the history
- Add: Param: --native-uri-stems: A comma-separated list of URI stems that will be used to determine whether a concept is native to the CodeSystem. As of 2023-01-15, there is still a bug in the Obographs spec and/or `robot` where certain nodes are not being converted. This converter adds back the nodes, but to know which ones belong to the CodeSystem itself and are not foreign concepts, this parameter is necessary. OAK also makes use of this parameter. See also: geneontology/obographs#90
  • Loading branch information
joeflack4 committed Jan 15, 2023
1 parent 4b2dbcc commit c9e84ae
Showing 1 changed file with 53 additions and 74 deletions.
127 changes: 53 additions & 74 deletions bin/convert_owl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Convert OWL to FHIR"""
import json
import os
import subprocess
from argparse import ArgumentParser
from typing import Dict
from typing import Dict, List

import curies
import requests
Expand Down Expand Up @@ -40,28 +41,33 @@
'url': 'https://github.com/monarch-initiative/mondo/releases/latest/download/mondo.owl',
'input_path': os.path.join(OWL_ON_FHIR_CONTENT_REPO_PATH, 'input', 'mondo.owl'),
'id': 'mondo',
'native_uri_stems': ['http://purl.obolibrary.org/obo/MONDO_'],
},
'comp-loinc': {
'url': 'https://github.com/loinc/comp-loinc/releases/latest/download/merged_reasoned_loinc.owl',
'input_path': os.path.join(OWL_ON_FHIR_CONTENT_REPO_PATH, 'input', 'comploinc.owl'),
'id': 'comp-loinc',
'native_uri_stems': ['https://loinc.org/'],
},
'HPO': {
'url': 'https://github.com/obophenotype/human-phenotype-ontology/releases/latest/download/hp-full.owl',
'input_path': os.path.join(OWL_ON_FHIR_CONTENT_REPO_PATH, 'input', 'hpo.owl'),
'id': 'HPO',
'native_uri_stems': ['http://purl.obolibrary.org/obo/HP_'],
},
'rxnorm': {
'url': 'https://data.bioontology.org/'
'ontologies/RXNORM/submissions/23/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb',
'input_path': os.path.join(OWL_ON_FHIR_CONTENT_REPO_PATH, 'input', 'RXNORM.ttl'),
'id': 'rxnorm',
'native_uri_stems': ['http://purl.bioontology.org/ontology/RXNORM/'],
},
'sequence-ontology': {
'url': 'https://data.bioontology.org/'
'ontologies/SO/submissions/304/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb',
'input_path': os.path.join(OWL_ON_FHIR_CONTENT_REPO_PATH, 'input', 'so.owl'),
'id': 'sequence-ontology',
'native_uri_stems': ['http://purl.obolibrary.org/obo/SO_'],
},
}

Expand Down Expand Up @@ -143,7 +149,7 @@ def owl_to_semsql(inpath: str, use_cache=False) -> str:
return outpath


def owl_to_obograph(inpath: str, use_cache=False) -> str:
def owl_to_obograph(inpath: str, native_uri_stems: List[str] = None, use_cache=False) -> str:
"""Convert OWL to Obograph
# todo: TTL and RDF also supported? not just OWL?"""
# Vars
Expand All @@ -162,67 +168,27 @@ def owl_to_obograph(inpath: str, use_cache=False) -> str:
# graph = parse_results.graph_document.graphs[0]
_run_shell_command(command)

# todo: might want to add this patch back and open up an issue, because nico said in issue below shouldn't happen
# - issue would be in OAK, regarding the 'cooked' error
# Patch missing roots / etc issue (until resolved: https://github.com/ontodev/robot/issues/1082)
# ! - Deactivated this because I was getting an error about the very same IDs that Chris R was asking for
# Try uploading and see if it works.
#
# - This appears to be mostly a problem in FHIR (and maybe just Obographs) if subClassOf or variation missing, but
# not 100% sure
#
# missing_nodes_from_important_edge_preds = [
# 'is_a',
# 'http://purl.bioontology.org/ontology/RXNORM/isa',
# 'rdfs:subClassOf',
# 'http://www.w3.org/2000/01/rdf-schema#subClassOf'
# ]
# with open(outpath, 'r') as f:
# data = json.load(f)
# nodes = data['graphs'][0]['nodes']
# node_ids = set([node['id'] for node in nodes])
# edges = data['graphs'][0]['edges']
# edges = [x for x in edges if x['pred'] in missing_nodes_from_important_edge_preds]
# edge_subs = set([edge['sub'] for edge in edges])
# edge_objs = set([edge['obj'] for edge in edges])
# edge_ids = edge_subs.union(edge_objs)
# missing = set([x for x in edge_ids if x not in node_ids])

# Edge case exclusions
# - This was causing the following error in OAK (I have not made a GH issue):
# - This example was from Mondo
# cooked_entry = Node(id="JsonObj(id='http://www.geneontology.org/formats/oboInOwl#Subset')", ...
# if cooked_entry[key_name] != key:
# > raise ...
# E ValueError: Slot: nodes - attribute id value (JsonObj(
# id='http://www.geneontology.org/formats/oboInOwl#Subset'))
# does not match key (http://www.geneontology.org/formats/oboInOwl#Subset)
#
# Method A: Remove cases
# id_exclusions = [
# 'http://www.geneontology.org/formats/oboInOwl#Subset'
# ]
# uri_stem_exclusions = [
# 'http://purl.obolibrary.org/obo/CARO_'
# ]
# for case in id_exclusions:
# if case in missing:
# missing.remove(case)
# missing2 = []
# for node_id in missing:
# if not any([node_id.startswith(x) for x in uri_stem_exclusions]):
# missing2.append(node_id)
#
# Method B: Keep only dominant IDs
# - Opted not to do

# if missing2:
# print(f'INFO: The following nodes were found in Obographs edges, but not nodes. Adding missing declarations: '
# f'{missing}')
# for node_id in missing:
# nodes.append({'id': node_id})
# with open(outpath, 'w') as f:
# json.dump(data, f)
if native_uri_stems:
with open(outpath, 'r') as f:
data = json.load(f)
nodes = data['graphs'][0]['nodes']
node_ids = set([node['id'] for node in nodes])
edges = data['graphs'][0]['edges']
# edges = [x for x in edges if x['pred'] in missing_nodes_from_important_edge_preds]
edge_subs = set([edge['sub'] for edge in edges])
edge_objs = set([edge['obj'] for edge in edges])
edge_ids = edge_subs.union(edge_objs)
missing = set([x for x in edge_ids if x not in node_ids]) # all missing
missing = [x for x in missing if any([x.startswith(y) for y in native_uri_stems])] # filter

if missing:
print(f'INFO: The following nodes were found in Obographs edges, but not nodes. Adding missing declarations: '
f'{missing}')
for node_id in missing:
nodes.append({'id': node_id})
with open(outpath, 'w') as f:
json.dump(data, f)

return outpath

Expand All @@ -231,15 +197,24 @@ def owl_to_obograph(inpath: str, use_cache=False) -> str:
# - https://github.com/linkml/linkml/issues/1156
# - https://github.com/ontodev/robot/issues/1079
# - https://github.com/geneontology/obographs/issues/89
def obograph_to_fhir(inpath: str, out_dir: str, out_filename: str = None, include_all_predicates=False) -> str:
def obograph_to_fhir(
inpath: str, out_dir: str, out_filename: str = None, include_all_predicates=False,
native_uri_stems: List[str] = None
) -> str:
"""Convert Obograph to FHIR"""
converter = OboGraphToFHIRConverter()
converter.curie_converter = curies.Converter.from_prefix_map(get_default_prefix_map())
gd: GraphDocument = json_loader.load(inpath, target_class=GraphDocument)
out_path = os.path.join(out_dir, out_filename)
if not os.path.exists(out_dir):
os.makedirs(out_dir)
converter.dump(gd, out_path, include_all_predicates=include_all_predicates)
# todo: this try/except is only temporary until the latest params from dev version of OAK get released
try:
converter.dump(gd, out_path, include_all_predicates=include_all_predicates, native_uri_stems=native_uri_stems)
except Exception as e:
# TODO: find out what exception it is and handle it
print(e)
converter.dump(gd, out_path, include_all_predicates=include_all_predicates)
return out_path


Expand All @@ -260,7 +235,7 @@ def semsql_to_fhir(inpath: str, out_dir: str, out_filename: str = None, include_
def owl_to_fhir(
input_path_or_url: str, out_dir: str = OUTDIR, out_filename: str = None, include_all_predicates=False,
retain_intermediaries=False, intermediary_type=['obographs', 'semsql'][0], use_cached_intermediaries=False,
intermediary_outdir: str = None, convert_intermediaries_only=False
intermediary_outdir: str = None, convert_intermediaries_only=False, native_uri_stems: List[str] = None,
) -> str:
"""Run conversion"""
# Download if necessary & determine outpaths
Expand All @@ -285,19 +260,14 @@ def owl_to_fhir(

# Convert
if intermediary_type == 'obographs' or input_path.endswith('.ttl'): # semsql only supports .owl
intermediary_path = owl_to_obograph(input_path, use_cached_intermediaries)
intermediary_path = owl_to_obograph(input_path, native_uri_stems, use_cached_intermediaries)
obograph_to_fhir(
inpath=intermediary_path,
out_dir=intermediary_outdir,
out_filename=out_filename,
include_all_predicates=include_all_predicates)
inpath=intermediary_path, out_dir=intermediary_outdir, out_filename=out_filename,
native_uri_stems=native_uri_stems, include_all_predicates=include_all_predicates)
else: # semsql
intermediary_path = owl_to_semsql(input_path, use_cached_intermediaries)
semsql_to_fhir(
inpath=intermediary_path,
out_dir=intermediary_outdir,
out_filename=out_filename,
include_all_predicates=include_all_predicates)
inpath=intermediary_path, out_dir=intermediary_outdir, out_filename=out_filename, include_all_predicates=include_all_predicates)
if convert_intermediaries_only:
return intermediary_path

Expand Down Expand Up @@ -354,6 +324,15 @@ def cli():
parser.add_argument(
'-p', '--include-all-predicates', action='store_true', required=False, default=False,
help='Include all predicates in CodeSystem.property and CodeSystem.concept.property, or just is_a/parent?')
parser.add_argument(
'-u', '--native-uri-stems', required=False, nargs='+',
help='A comma-separated list of URI stems that will be used to determine whether a concept is native to '
'the CodeSystem. For example, for OMIM, the following URI stems are native: '
'https://omim.org/entry/,https://omim.org/phenotypicSeries/PS"'
'As of 2023-01-15, there is still a bug in the Obographs spec and/or `robot` where certain nodes are not'
' being converted. This converter adds back the nodes, but to know which ones belong to the CodeSystem '
'itself and are not foreign concepts, this parameter is necessary. OAK also makes use of this parameter. '
'See also: https://github.com/geneontology/obographs/issues/90')
parser.add_argument(
'-t', '--intermediary-type', choices=INTERMEDIARY_TYPES, default='obographs', required=False,
help='Which type of intermediary to use? First, we convert OWL to that intermediary format, and then we '
Expand Down

0 comments on commit c9e84ae

Please sign in to comment.