From d8d279c848acb480569b49d5cc7c835c46db8778 Mon Sep 17 00:00:00 2001 From: joeflack4 Date: Sat, 15 Apr 2023 14:41:53 -0400 Subject: [PATCH] Initial stable release: 1.0.0 - Update: Upgraded oaklib (OAK). This was a necessary, requisite upgrade, and now owl-on-fhir should be functional. - Bugfix: Removed some Obographs JSON patching that I believe was necessary prior. I think what changed is that now OAK is handling something that was causing an error before, but now that it is handling it, the patching I'm doing is causing an OAK error. See: https://github.com/ontodev/robot/issues/1082 - Bugfix: Fixed outdir if it started with ~ (tilde; 'home' shortcut character') - Bugfix: If --retain-intermediaries is present, will now save contents in --out-dir location, rather than cache/ - Update: .gitignore: Added some missing things --- .gitignore | 4 +- owl_on_fhir/__main__.py | 75 ++++++++++++------------ requirements.txt | 124 ++++++++++++++++++++++++++-------------- setup.py | 4 +- 4 files changed, 121 insertions(+), 86 deletions(-) diff --git a/.gitignore b/.gitignore index 7bc089e..e75ca98 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,12 @@ # Boilerplate /*.json +/*.owl /env/* !/env/.env.example /.idea/ /.run/ /venv/ -*/cache/* -!*/cache/.keep +cache/* __pycache__/ _archive/ _dev/ diff --git a/owl_on_fhir/__main__.py b/owl_on_fhir/__main__.py index dbb3a0d..932b147 100644 --- a/owl_on_fhir/__main__.py +++ b/owl_on_fhir/__main__.py @@ -1,6 +1,7 @@ """Convert OWL to FHIR""" import json import os +import shutil import subprocess from argparse import ArgumentParser from typing import Dict, List @@ -61,7 +62,7 @@ def _preprocess_rxnorm(path: str) -> str: return outpath -def download(url: str, path: str, download_if_cached=True): +def download(url: str, path: str, save_to_cache=False, download_if_cached=True): """Download file at url to local path :param download_if_cached: If True and file at `path` already exists, download anyway.""" @@ -72,8 +73,12 @@ def download(url: str, path: str, download_if_cached=True): with open(path, 'wb') as f: response = requests.get(url, verify=False) f.write(response.content) + if save_to_cache: + cache_path = os.path.join(CACHE_DIR, os.path.basename(path)) + shutil.copy(path, cache_path) +# todo: owl_to_semsql: this may need similar updates to caching that were done for obographs on 2023/04/15 def owl_to_semsql(inpath: str, use_cache=False) -> str: """Converts OWL (or RDF, I think) to a SemanticSQL sqlite DB. Docs: https://incatools.github.io/ontology-access-kit/intro/tutorial07.html?highlight=semsql @@ -101,46 +106,28 @@ def owl_to_semsql(inpath: str, use_cache=False) -> str: return outpath -def owl_to_obograph(inpath: str, native_uri_stems: List[str] = None, use_cache=False) -> str: +def owl_to_obograph(inpath: str, out_dir: str, use_cache=False, cache_output=False) -> str: """Convert OWL to Obograph - # todo: TTL and RDF also supported? not just OWL?""" + todo: TTL and RDF also supported? not just OWL?""" # Vars - outpath = os.path.join(CACHE_DIR, inpath + '.obographs.json') - outdir = os.path.realpath(os.path.dirname(outpath)) + infile = os.path.basename(inpath) + cache_path = os.path.join(CACHE_DIR, infile + '.obographs.json') + outpath = os.path.join(out_dir, infile + '.obographs.json') command = f'java -jar {ROBOT_PATH}.jar convert -i {inpath} -o {outpath} --format json' # Convert - if not os.path.exists(outdir): - os.makedirs(outdir) - if use_cache and os.path.exists(outpath): - return outpath + if not os.path.exists(out_dir): + os.makedirs(out_dir) + if use_cache and os.path.exists(cache_path): + return cache_path # todo: Switch back to `bioontologies` when complete: https://github.com/biopragmatics/bioontologies/issues/9 # from bioontologies import robot # parse_results: robot.ParseResults = robot.convert_to_obograph_local(inpath) # graph = parse_results.graph_document.graphs[0] _run_shell_command(command) - # Patch missing roots / etc issue (until resolved: https://github.com/ontodev/robot/issues/1082) - if native_uri_stems: - with open(outpath, 'r') as f: - data = json.load(f) - nodes = data['graphs'][0]['nodes'] - node_ids = set([node['id'] for node in nodes]) - edges = data['graphs'][0]['edges'] - # edges = [x for x in edges if x['pred'] in missing_nodes_from_important_edge_preds] - edge_subs = set([edge['sub'] for edge in edges]) - edge_objs = set([edge['obj'] for edge in edges]) - edge_ids = edge_subs.union(edge_objs) - missing = set([x for x in edge_ids if x not in node_ids]) # all missing - missing = [x for x in missing if any([x.startswith(y) for y in native_uri_stems])] # filter - - if missing: - print(f'INFO: The following nodes were found in Obographs edges, but not nodes. Adding missing ' - f'declarations: {missing}') - for node_id in missing: - nodes.append({'id': node_id}) - with open(outpath, 'w') as f: - json.dump(data, f) + if cache_output: + shutil.copy(outpath, cache_path) return outpath @@ -151,7 +138,7 @@ def owl_to_obograph(inpath: str, native_uri_stems: List[str] = None, use_cache=F # - https://github.com/geneontology/obographs/issues/89 def obograph_to_fhir( inpath: str, out_dir: str, out_filename: str = None, code_system_id: str = None, code_system_url: str = None, - include_all_predicates=False, native_uri_stems: List[str] = None, dev_oak_path: str = None, + include_all_predicates=True, native_uri_stems: List[str] = None, dev_oak_path: str = None, dev_oak_interpreter_path: str = None ) -> str: """Convert Obograph to FHIR""" @@ -176,9 +163,15 @@ def obograph_to_fhir( else: converter = OboGraphToFHIRConverter() converter.curie_converter = curies.Converter.from_prefix_map(get_default_prefix_map()) - gd: GraphDocument = json_loader.load(inpath, target_class=GraphDocument) - converter.dump(gd, out_path, include_all_predicates=include_all_predicates) - # todo: update w/ these params when released + gd: GraphDocument = json_loader.load(str(inpath), target_class=GraphDocument) + converter.dump( + gd, + out_path, + code_system_id=code_system_id, + code_system_url=code_system_url, + include_all_predicates=include_all_predicates, + native_uri_stems=native_uri_stems) + # TODO: add these params once supported: use_curies_native_concepts, use_curies_foreign_concepts # converter.dump( # gd, out_path, code_system_id='', code_system_url='', include_all_predicates=include_all_predicates, # native_uri_stems=native_uri_stems, use_curies_native_concepts=False, use_curies_foreign_concepts=True) @@ -219,18 +212,21 @@ def owl_to_fhir( input_path = input_path_or_url url = None maybe_url = urlparse(input_path_or_url) + out_dir = out_dir if out_dir else os.getcwd() + if out_dir.startswith('~'): + out_dir = os.path.expanduser('~/Desktop') if maybe_url.scheme and maybe_url.netloc: url = input_path_or_url if url: - input_path = os.path.join(CACHE_DIR, out_filename.replace('.json', '.owl')) - download(url, input_path) + download_path = os.path.join(out_dir, out_filename.replace('.json', '.owl')) + input_path = download_path + download(url, download_path, use_cached_intermediaries) if not out_filename: if not code_system_id: code_system_id = '.'.join(os.path.basename(input_path).split('.')[0:-1]) # removes file extension out_filename = f'CodeSystem-{code_system_id}.json' if not code_system_id and out_filename and out_filename.startswith('CodeSystem-'): code_system_id = out_filename.split('-')[1].split('.')[0] - input_path = input_path if os.path.exists(input_path) else os.path.join(os.getcwd(), input_path) out_dir = os.path.realpath(out_dir if out_dir else os.path.dirname(input_path)) intermediary_outdir = intermediary_outdir if intermediary_outdir else out_dir @@ -240,13 +236,14 @@ def owl_to_fhir( # Convert if intermediary_type == 'obographs' or input_path.endswith('.ttl'): # semsql only supports .owl - intermediary_path = owl_to_obograph(input_path, native_uri_stems, use_cached_intermediaries) + intermediary_path = owl_to_obograph(input_path, out_dir, use_cached_intermediaries, use_cached_intermediaries) obograph_to_fhir( inpath=intermediary_path, out_dir=intermediary_outdir, out_filename=out_filename, code_system_id=code_system_id, code_system_url=code_system_url, native_uri_stems=native_uri_stems, include_all_predicates=include_all_predicates, dev_oak_path=dev_oak_path, dev_oak_interpreter_path=dev_oak_interpreter_path) else: # semsql + # todo: owl_to_semsql: this may need similar updates to caching that were done for obographs on 2023/04/15 intermediary_path = owl_to_semsql(input_path, use_cached_intermediaries) semsql_to_fhir( inpath=intermediary_path, out_dir=intermediary_outdir, out_filename=out_filename, @@ -304,7 +301,7 @@ def cli(): 'convert that to FHIR.') parser.add_argument( '-c', '--use-cached-intermediaries', action='store_true', required=False, default=False, - help='Use cached intermediaries if they exist?') + help='Use cached intermediaries if they exist? Also will save intermediaries to owl-on-fhir\'s cache/ dir.') parser.add_argument( '-r', '--retain-intermediaries', action='store_true', default=False, required=False, help='Retain intermediary files created during conversion process (e.g. Obograph JSON)?') diff --git a/requirements.txt b/requirements.txt index 3716289..75c2437 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,45 +1,57 @@ aiohttp==3.8.3 aiosignal==1.3.1 -alabaster==0.7.12 +airium==0.2.5 +alabaster==0.7.13 antlr4-python3-runtime==4.9.3 appdirs==1.4.4 arrow==1.2.3 async-timeout==4.0.2 -attrs==22.1.0 +attrs==22.2.0 Babel==2.11.0 bcp47==0.0.4 +beautifulsoup4==4.12.2 bioontologies==0.2.1 -bioregistry==0.5.143 -certifi==2022.9.24 +bioregistry==0.6.99 +bleach==6.0.0 +certifi==2022.12.7 CFGraph==0.2.1 -chardet==4.0.0 +chardet==5.1.0 charset-normalizer==2.1.1 -class-resolver==0.3.10 +class-resolver==0.4.2 click==8.1.3 -curies==0.4.0 +colorama==0.4.6 +contourpy==1.0.7 +curies==0.4.2 +cycler==0.11.0 decorator==5.1.1 Deprecated==1.2.13 deprecation==2.1.0 distlib==0.3.6 docutils==0.17.1 +EditorConfig==0.12.3 et-xmlfile==1.1.0 -exceptiongroup==1.0.4 +exceptiongroup==1.1.0 fastobo==0.12.2 -filelock==3.8.2 +filelock==3.9.0 +fonttools==4.39.3 fqdn==1.5.1 frozenlist==1.3.3 -fsspec==2022.11.0 -funowl==0.1.12 +fsspec==2023.1.0 +funowl==0.1.13 +ghp-import==2.1.0 graphviz==0.20.1 -greenlet==2.0.1 +greenlet==2.0.2 hbreader==0.9.1 idna==3.4 +ijson==3.2.0.post0 imagesize==1.4.1 importlib-metadata==4.13.0 -iniconfig==1.1.1 +iniconfig==2.0.0 isodate==0.6.1 isoduration==20.11.0 +jaraco.classes==3.2.3 Jinja2==3.1.2 +jsbeautifier==1.14.7 json-flattener==0.1.9 jsonasobj==1.3.1 jsonasobj2==1.0.4 @@ -47,94 +59,120 @@ jsonpatch==1.32 jsonpath-ng==1.5.3 jsonpointer==2.3 jsonschema==4.17.3 +keyring==23.13.1 kgcl-rdflib==0.3.0 -kgcl-schema==0.3.1 +kgcl-schema==0.3.6 +kiwisolver==1.4.4 lark==1.1.5 -linkml==1.3.15 +linkml==1.4.3 linkml-dataops==0.1.0 -linkml-runtime==1.3.7 -markdown-it-py==2.1.0 -MarkupSafe==2.1.1 +linkml-renderer==0.1.2 +linkml-runtime==1.4.3 +Markdown==3.3.7 +markdown-it-py==2.2.0 +MarkupSafe==2.1.2 +matplotlib==3.7.1 mdit-py-plugins==0.3.3 mdurl==0.1.2 +mergedeep==1.3.4 +mkdocs==1.4.2 +mkdocs-material==9.1.6 +mkdocs-material-extensions==1.1.1 +mkdocs-mermaid2-plugin==0.6.0 more-click==0.1.2 -multidict==6.0.3 +more-itertools==9.1.0 +multidict==6.0.4 myst-parser==0.18.1 +ndex2==3.5.1 networkx==2.8.8 -numpy==1.23.5 +numpy==1.24.1 nxontology==0.4.1 -oaklib==0.1.67 +oaklib==0.5.1 ols-client==0.1.2 ontoportal-client==0.0.3 openpyxl==3.0.10 -packaging==21.3 -pandas==1.5.2 +packaging==23.0 +pandas==1.5.3 pandasql==0.7.3 +pansql==0.0.1 parse==1.19.0 -pbr==5.11.0 -platformdirs==2.5.4 +pbr==5.11.1 +Pillow==9.5.0 +pkginfo==1.9.6 +platformdirs==2.6.2 pluggy==1.0.0 ply==3.11 prefixcommons==0.1.12 prefixmaps==0.1.4 -pronto==2.5.1 +pronto==2.5.3 py==1.11.0 -pydantic==1.10.2 -Pygments==2.13.0 +pydantic==1.10.4 +Pygments==2.14.0 PyJSG==0.11.10 +pymdown-extensions==9.11 pyparsing==3.0.9 -pyrsistent==0.19.2 +pyrsistent==0.19.3 PyShEx==0.8.1 PyShExC==0.9.1 -pystow==0.4.7 -pytest==7.2.0 +pystow==0.5.0 +pytest==7.2.1 pytest-logging==2015.11.4 python-dateutil==2.8.2 PyTrie==0.4.0 -pytz==2022.6 +pytz==2022.7.1 PyYAML==6.0 +pyyaml_env_tag==0.1 ratelimit==2.2.1 rdflib==6.2.0 rdflib-jsonld==0.6.1 rdflib-shim==1.0.3 -requests==2.28.1 +readme-renderer==37.3 +regex==2023.3.23 +requests==2.28.2 +requests-toolbelt==0.10.1 rfc3339-validator==0.1.4 +rfc3986==2.0.0 rfc3987==1.3.8 +rich==13.3.2 ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.7 -scipy==1.9.3 -semsql==0.2.5 +scipy==1.10.0 +semsql==0.3.2 ShExJSG==0.8.2 six==1.16.0 snowballstemmer==2.2.0 sortedcontainers==2.4.0 +soupsieve==2.4 sparqlslurper==0.5.1 SPARQLWrapper==2.0.0 Sphinx==5.3.0 sphinx-click==4.4.0 sphinx-rtd-theme==1.1.1 -sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-applehelp==1.0.4 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-htmlhelp==2.0.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 -SQLAlchemy==1.4.44 +SQLAlchemy==1.4.46 SQLAlchemy-Utils==0.38.3 -sssom==0.3.17 -sssom-schema==0.9.4 +sssom==0.3.28 +sssom-schema==0.11.0 stevedore==4.1.1 tomli==2.0.1 -tox==3.27.1 +tox==3.28.0 tqdm==4.64.1 +twine==4.0.2 typing_extensions==4.4.0 +UpSetPlot==0.8.0 uri-template==1.2.0 -urllib3==1.26.13 +urllib3==1.26.14 validators==0.20.0 virtualenv==20.17.1 virtualenv-clone==0.5.7 -watchdog==2.2.0 +watchdog==2.2.1 webcolors==1.12 +webencodings==0.5.1 wrapt==1.14.1 yarl==1.8.2 -zipp==3.11.0 +zipp==3.12.0 diff --git a/setup.py b/setup.py index d888c03..dc6cd52 100644 --- a/setup.py +++ b/setup.py @@ -15,12 +15,12 @@ EMAIL = 'jflack@jhu.edu' AUTHOR = 'Joe Flack' REQUIRES_PYTHON = '>=3.9.0' -VERSION = '0.1.11' +VERSION = '1.0.0' # Requirements REQUIRED = [ 'bioontologies', - 'oaklib>=0.1.58', + 'oaklib>=0.5.1', 'requests', ]