diff --git a/.gitignore b/.gitignore index 7bc089e..e75ca98 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,12 @@ # Boilerplate /*.json +/*.owl /env/* !/env/.env.example /.idea/ /.run/ /venv/ -*/cache/* -!*/cache/.keep +cache/* __pycache__/ _archive/ _dev/ diff --git a/owl_on_fhir/__main__.py b/owl_on_fhir/__main__.py index dbb3a0d..932b147 100644 --- a/owl_on_fhir/__main__.py +++ b/owl_on_fhir/__main__.py @@ -1,6 +1,7 @@ """Convert OWL to FHIR""" import json import os +import shutil import subprocess from argparse import ArgumentParser from typing import Dict, List @@ -61,7 +62,7 @@ def _preprocess_rxnorm(path: str) -> str: return outpath -def download(url: str, path: str, download_if_cached=True): +def download(url: str, path: str, save_to_cache=False, download_if_cached=True): """Download file at url to local path :param download_if_cached: If True and file at `path` already exists, download anyway.""" @@ -72,8 +73,12 @@ def download(url: str, path: str, download_if_cached=True): with open(path, 'wb') as f: response = requests.get(url, verify=False) f.write(response.content) + if save_to_cache: + cache_path = os.path.join(CACHE_DIR, os.path.basename(path)) + shutil.copy(path, cache_path) +# todo: owl_to_semsql: this may need similar updates to caching that were done for obographs on 2023/04/15 def owl_to_semsql(inpath: str, use_cache=False) -> str: """Converts OWL (or RDF, I think) to a SemanticSQL sqlite DB. Docs: https://incatools.github.io/ontology-access-kit/intro/tutorial07.html?highlight=semsql @@ -101,46 +106,28 @@ def owl_to_semsql(inpath: str, use_cache=False) -> str: return outpath -def owl_to_obograph(inpath: str, native_uri_stems: List[str] = None, use_cache=False) -> str: +def owl_to_obograph(inpath: str, out_dir: str, use_cache=False, cache_output=False) -> str: """Convert OWL to Obograph - # todo: TTL and RDF also supported? not just OWL?""" + todo: TTL and RDF also supported? not just OWL?""" # Vars - outpath = os.path.join(CACHE_DIR, inpath + '.obographs.json') - outdir = os.path.realpath(os.path.dirname(outpath)) + infile = os.path.basename(inpath) + cache_path = os.path.join(CACHE_DIR, infile + '.obographs.json') + outpath = os.path.join(out_dir, infile + '.obographs.json') command = f'java -jar {ROBOT_PATH}.jar convert -i {inpath} -o {outpath} --format json' # Convert - if not os.path.exists(outdir): - os.makedirs(outdir) - if use_cache and os.path.exists(outpath): - return outpath + if not os.path.exists(out_dir): + os.makedirs(out_dir) + if use_cache and os.path.exists(cache_path): + return cache_path # todo: Switch back to `bioontologies` when complete: https://github.com/biopragmatics/bioontologies/issues/9 # from bioontologies import robot # parse_results: robot.ParseResults = robot.convert_to_obograph_local(inpath) # graph = parse_results.graph_document.graphs[0] _run_shell_command(command) - # Patch missing roots / etc issue (until resolved: https://github.com/ontodev/robot/issues/1082) - if native_uri_stems: - with open(outpath, 'r') as f: - data = json.load(f) - nodes = data['graphs'][0]['nodes'] - node_ids = set([node['id'] for node in nodes]) - edges = data['graphs'][0]['edges'] - # edges = [x for x in edges if x['pred'] in missing_nodes_from_important_edge_preds] - edge_subs = set([edge['sub'] for edge in edges]) - edge_objs = set([edge['obj'] for edge in edges]) - edge_ids = edge_subs.union(edge_objs) - missing = set([x for x in edge_ids if x not in node_ids]) # all missing - missing = [x for x in missing if any([x.startswith(y) for y in native_uri_stems])] # filter - - if missing: - print(f'INFO: The following nodes were found in Obographs edges, but not nodes. Adding missing ' - f'declarations: {missing}') - for node_id in missing: - nodes.append({'id': node_id}) - with open(outpath, 'w') as f: - json.dump(data, f) + if cache_output: + shutil.copy(outpath, cache_path) return outpath @@ -151,7 +138,7 @@ def owl_to_obograph(inpath: str, native_uri_stems: List[str] = None, use_cache=F # - https://github.com/geneontology/obographs/issues/89 def obograph_to_fhir( inpath: str, out_dir: str, out_filename: str = None, code_system_id: str = None, code_system_url: str = None, - include_all_predicates=False, native_uri_stems: List[str] = None, dev_oak_path: str = None, + include_all_predicates=True, native_uri_stems: List[str] = None, dev_oak_path: str = None, dev_oak_interpreter_path: str = None ) -> str: """Convert Obograph to FHIR""" @@ -176,9 +163,15 @@ def obograph_to_fhir( else: converter = OboGraphToFHIRConverter() converter.curie_converter = curies.Converter.from_prefix_map(get_default_prefix_map()) - gd: GraphDocument = json_loader.load(inpath, target_class=GraphDocument) - converter.dump(gd, out_path, include_all_predicates=include_all_predicates) - # todo: update w/ these params when released + gd: GraphDocument = json_loader.load(str(inpath), target_class=GraphDocument) + converter.dump( + gd, + out_path, + code_system_id=code_system_id, + code_system_url=code_system_url, + include_all_predicates=include_all_predicates, + native_uri_stems=native_uri_stems) + # TODO: add these params once supported: use_curies_native_concepts, use_curies_foreign_concepts # converter.dump( # gd, out_path, code_system_id='', code_system_url='', include_all_predicates=include_all_predicates, # native_uri_stems=native_uri_stems, use_curies_native_concepts=False, use_curies_foreign_concepts=True) @@ -219,18 +212,21 @@ def owl_to_fhir( input_path = input_path_or_url url = None maybe_url = urlparse(input_path_or_url) + out_dir = out_dir if out_dir else os.getcwd() + if out_dir.startswith('~'): + out_dir = os.path.expanduser('~/Desktop') if maybe_url.scheme and maybe_url.netloc: url = input_path_or_url if url: - input_path = os.path.join(CACHE_DIR, out_filename.replace('.json', '.owl')) - download(url, input_path) + download_path = os.path.join(out_dir, out_filename.replace('.json', '.owl')) + input_path = download_path + download(url, download_path, use_cached_intermediaries) if not out_filename: if not code_system_id: code_system_id = '.'.join(os.path.basename(input_path).split('.')[0:-1]) # removes file extension out_filename = f'CodeSystem-{code_system_id}.json' if not code_system_id and out_filename and out_filename.startswith('CodeSystem-'): code_system_id = out_filename.split('-')[1].split('.')[0] - input_path = input_path if os.path.exists(input_path) else os.path.join(os.getcwd(), input_path) out_dir = os.path.realpath(out_dir if out_dir else os.path.dirname(input_path)) intermediary_outdir = intermediary_outdir if intermediary_outdir else out_dir @@ -240,13 +236,14 @@ def owl_to_fhir( # Convert if intermediary_type == 'obographs' or input_path.endswith('.ttl'): # semsql only supports .owl - intermediary_path = owl_to_obograph(input_path, native_uri_stems, use_cached_intermediaries) + intermediary_path = owl_to_obograph(input_path, out_dir, use_cached_intermediaries, use_cached_intermediaries) obograph_to_fhir( inpath=intermediary_path, out_dir=intermediary_outdir, out_filename=out_filename, code_system_id=code_system_id, code_system_url=code_system_url, native_uri_stems=native_uri_stems, include_all_predicates=include_all_predicates, dev_oak_path=dev_oak_path, dev_oak_interpreter_path=dev_oak_interpreter_path) else: # semsql + # todo: owl_to_semsql: this may need similar updates to caching that were done for obographs on 2023/04/15 intermediary_path = owl_to_semsql(input_path, use_cached_intermediaries) semsql_to_fhir( inpath=intermediary_path, out_dir=intermediary_outdir, out_filename=out_filename, @@ -304,7 +301,7 @@ def cli(): 'convert that to FHIR.') parser.add_argument( '-c', '--use-cached-intermediaries', action='store_true', required=False, default=False, - help='Use cached intermediaries if they exist?') + help='Use cached intermediaries if they exist? Also will save intermediaries to owl-on-fhir\'s cache/ dir.') parser.add_argument( '-r', '--retain-intermediaries', action='store_true', default=False, required=False, help='Retain intermediary files created during conversion process (e.g. Obograph JSON)?') diff --git a/requirements.txt b/requirements.txt index 3716289..75c2437 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,45 +1,57 @@ aiohttp==3.8.3 aiosignal==1.3.1 -alabaster==0.7.12 +airium==0.2.5 +alabaster==0.7.13 antlr4-python3-runtime==4.9.3 appdirs==1.4.4 arrow==1.2.3 async-timeout==4.0.2 -attrs==22.1.0 +attrs==22.2.0 Babel==2.11.0 bcp47==0.0.4 +beautifulsoup4==4.12.2 bioontologies==0.2.1 -bioregistry==0.5.143 -certifi==2022.9.24 +bioregistry==0.6.99 +bleach==6.0.0 +certifi==2022.12.7 CFGraph==0.2.1 -chardet==4.0.0 +chardet==5.1.0 charset-normalizer==2.1.1 -class-resolver==0.3.10 +class-resolver==0.4.2 click==8.1.3 -curies==0.4.0 +colorama==0.4.6 +contourpy==1.0.7 +curies==0.4.2 +cycler==0.11.0 decorator==5.1.1 Deprecated==1.2.13 deprecation==2.1.0 distlib==0.3.6 docutils==0.17.1 +EditorConfig==0.12.3 et-xmlfile==1.1.0 -exceptiongroup==1.0.4 +exceptiongroup==1.1.0 fastobo==0.12.2 -filelock==3.8.2 +filelock==3.9.0 +fonttools==4.39.3 fqdn==1.5.1 frozenlist==1.3.3 -fsspec==2022.11.0 -funowl==0.1.12 +fsspec==2023.1.0 +funowl==0.1.13 +ghp-import==2.1.0 graphviz==0.20.1 -greenlet==2.0.1 +greenlet==2.0.2 hbreader==0.9.1 idna==3.4 +ijson==3.2.0.post0 imagesize==1.4.1 importlib-metadata==4.13.0 -iniconfig==1.1.1 +iniconfig==2.0.0 isodate==0.6.1 isoduration==20.11.0 +jaraco.classes==3.2.3 Jinja2==3.1.2 +jsbeautifier==1.14.7 json-flattener==0.1.9 jsonasobj==1.3.1 jsonasobj2==1.0.4 @@ -47,94 +59,120 @@ jsonpatch==1.32 jsonpath-ng==1.5.3 jsonpointer==2.3 jsonschema==4.17.3 +keyring==23.13.1 kgcl-rdflib==0.3.0 -kgcl-schema==0.3.1 +kgcl-schema==0.3.6 +kiwisolver==1.4.4 lark==1.1.5 -linkml==1.3.15 +linkml==1.4.3 linkml-dataops==0.1.0 -linkml-runtime==1.3.7 -markdown-it-py==2.1.0 -MarkupSafe==2.1.1 +linkml-renderer==0.1.2 +linkml-runtime==1.4.3 +Markdown==3.3.7 +markdown-it-py==2.2.0 +MarkupSafe==2.1.2 +matplotlib==3.7.1 mdit-py-plugins==0.3.3 mdurl==0.1.2 +mergedeep==1.3.4 +mkdocs==1.4.2 +mkdocs-material==9.1.6 +mkdocs-material-extensions==1.1.1 +mkdocs-mermaid2-plugin==0.6.0 more-click==0.1.2 -multidict==6.0.3 +more-itertools==9.1.0 +multidict==6.0.4 myst-parser==0.18.1 +ndex2==3.5.1 networkx==2.8.8 -numpy==1.23.5 +numpy==1.24.1 nxontology==0.4.1 -oaklib==0.1.67 +oaklib==0.5.1 ols-client==0.1.2 ontoportal-client==0.0.3 openpyxl==3.0.10 -packaging==21.3 -pandas==1.5.2 +packaging==23.0 +pandas==1.5.3 pandasql==0.7.3 +pansql==0.0.1 parse==1.19.0 -pbr==5.11.0 -platformdirs==2.5.4 +pbr==5.11.1 +Pillow==9.5.0 +pkginfo==1.9.6 +platformdirs==2.6.2 pluggy==1.0.0 ply==3.11 prefixcommons==0.1.12 prefixmaps==0.1.4 -pronto==2.5.1 +pronto==2.5.3 py==1.11.0 -pydantic==1.10.2 -Pygments==2.13.0 +pydantic==1.10.4 +Pygments==2.14.0 PyJSG==0.11.10 +pymdown-extensions==9.11 pyparsing==3.0.9 -pyrsistent==0.19.2 +pyrsistent==0.19.3 PyShEx==0.8.1 PyShExC==0.9.1 -pystow==0.4.7 -pytest==7.2.0 +pystow==0.5.0 +pytest==7.2.1 pytest-logging==2015.11.4 python-dateutil==2.8.2 PyTrie==0.4.0 -pytz==2022.6 +pytz==2022.7.1 PyYAML==6.0 +pyyaml_env_tag==0.1 ratelimit==2.2.1 rdflib==6.2.0 rdflib-jsonld==0.6.1 rdflib-shim==1.0.3 -requests==2.28.1 +readme-renderer==37.3 +regex==2023.3.23 +requests==2.28.2 +requests-toolbelt==0.10.1 rfc3339-validator==0.1.4 +rfc3986==2.0.0 rfc3987==1.3.8 +rich==13.3.2 ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.7 -scipy==1.9.3 -semsql==0.2.5 +scipy==1.10.0 +semsql==0.3.2 ShExJSG==0.8.2 six==1.16.0 snowballstemmer==2.2.0 sortedcontainers==2.4.0 +soupsieve==2.4 sparqlslurper==0.5.1 SPARQLWrapper==2.0.0 Sphinx==5.3.0 sphinx-click==4.4.0 sphinx-rtd-theme==1.1.1 -sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-applehelp==1.0.4 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-htmlhelp==2.0.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 -SQLAlchemy==1.4.44 +SQLAlchemy==1.4.46 SQLAlchemy-Utils==0.38.3 -sssom==0.3.17 -sssom-schema==0.9.4 +sssom==0.3.28 +sssom-schema==0.11.0 stevedore==4.1.1 tomli==2.0.1 -tox==3.27.1 +tox==3.28.0 tqdm==4.64.1 +twine==4.0.2 typing_extensions==4.4.0 +UpSetPlot==0.8.0 uri-template==1.2.0 -urllib3==1.26.13 +urllib3==1.26.14 validators==0.20.0 virtualenv==20.17.1 virtualenv-clone==0.5.7 -watchdog==2.2.0 +watchdog==2.2.1 webcolors==1.12 +webencodings==0.5.1 wrapt==1.14.1 yarl==1.8.2 -zipp==3.11.0 +zipp==3.12.0 diff --git a/setup.py b/setup.py index d888c03..dc6cd52 100644 --- a/setup.py +++ b/setup.py @@ -15,12 +15,12 @@ EMAIL = 'jflack@jhu.edu' AUTHOR = 'Joe Flack' REQUIRES_PYTHON = '>=3.9.0' -VERSION = '0.1.11' +VERSION = '1.0.0' # Requirements REQUIRED = [ 'bioontologies', - 'oaklib>=0.1.58', + 'oaklib>=0.5.1', 'requests', ]