From a984f588e5068081999c86af77073a07e2978806 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 15 Feb 2024 16:59:44 -0500 Subject: [PATCH 01/18] Fixed another typo in docs --- docs/developer/add-new-source.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer/add-new-source.md b/docs/developer/add-new-source.md index f31b9f90..a06c56fe 100644 --- a/docs/developer/add-new-source.md +++ b/docs/developer/add-new-source.md @@ -30,7 +30,7 @@ Add prefixes. Update the Website Table of Contents in [mkdocs.yaml](https://github.com/monarch-initiative/mondo-ingest/blob/main/mkdocs.yaml) ### 3.2. `docs/sources/*.md` -Run `sh run.sh make ../../docs/sources/*.md` from `src/ontology`. Then edit it manually to add any more informatoin. +Run `sh run.sh make ../../docs/sources/*.md` from `src/ontology`. Then edit it manually to add any more information. ### 3.3. `docs/sources.md` Add a link to your new `.md` file created in the last step. From 30b3f1c75a2da99a9601cedc3598832b9534743d Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Sat, 17 Feb 2024 18:03:58 -0500 Subject: [PATCH 02/18] Bugfix tmp/ not exist If run-command.sh runs on a fresh clone, it will fail because it tries to append to a debug log, but it can't do that because the directory for that file doesn't exist. --- .gitignore | 1 + src/ontology/tmp/.gitkeep | 0 2 files changed, 1 insertion(+) create mode 100644 src/ontology/tmp/.gitkeep diff --git a/.gitignore b/.gitignore index ee6c2728..f392a564 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,7 @@ src/ontology/target/ # src/ontology/tmp/ src/ontology/tmp/* +!src/ontology/tmp/.gitkeep !src/ontology/tmp/README.md # src/patterns/ diff --git a/src/ontology/tmp/.gitkeep b/src/ontology/tmp/.gitkeep new file mode 100644 index 00000000..e69de29b From 2ed40db83f0a4ba8115649bb6f4a8ae4e4d12d79 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Wed, 21 Feb 2024 16:20:33 -0500 Subject: [PATCH 03/18] Resolved double echo - UX improvement / fix of logging from mondo-ingest.Makefile, where echo's would be printed twice because they did not start with @. --- src/ontology/mondo-ingest.Makefile | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 8a7d7293..1d8ef5cb 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -271,10 +271,10 @@ $(REPORTDIR)/%_excluded_terms_in_mondo_xrefs.tsv $(REPORTDIR)/%_excluded_terms_i # Exclusions: all artefacts for single ontology .PHONY: exclusions-% exclusions-%: reports/%_exclusion_reasons.ttl reports/%_excluded_terms_in_mondo_xrefs.tsv $(REPORTDIR)/%_term_exclusions.txt - echo "$@ completed" + @echo "$@ completed" exclusions-all: $(foreach n,$(ALL_COMPONENT_IDS), exclusions-$(n)) - echo "$@ completed" + @echo "$@ completed" # Exclusions: running for all ontologies # todo: change '> $(REPORTDIR)/excluded_terms.txt' to '> $@' like in goal '$(REPORTDIR)/excluded_terms_in_mondo_xrefs.tsv'? @@ -336,7 +336,7 @@ build-mondo-ingest: mapped-deprecated-terms mapping-progress-report \ recreate-unmapped-components sync documentation \ prepare_release - echo "Mondo Ingest has been fully completed" + @echo "Mondo Ingest has been fully completed" .PHONY: build-mondo-ingest-no-imports build-mondo-ingest-no-imports: @@ -344,7 +344,7 @@ build-mondo-ingest-no-imports: mapped-deprecated-terms mapping-progress-report \ recreate-unmapped-components sync documentation \ prepare_release - echo "Mondo Ingest (fast) has been fully completed" + @echo "Mondo Ingest (fast) has been fully completed" DEPLOY_ASSETS_MONDO_INGEST=$(OTHER_SRC) $(ALL_MAPPINGS) ../../mondo-ingest.owl ../../mondo-ingest.obo @@ -400,7 +400,7 @@ ALL_COMPONENT_SIGNTAURE_REPORTS=$(foreach n,$(ALL_COMPONENT_IDS), reports/mirror .PHONY: signature_reports signature_reports: $(ALL_MIRROR_SIGNTAURE_REPORTS) $(ALL_COMPONENT_SIGNTAURE_REPORTS) - echo "Finished running signature reports.." + @echo "Finished running signature reports." ############################# #### Lexical matching ####### @@ -488,11 +488,11 @@ slurp/%.tsv: $(COMPONENTSDIR)/%.owl $(TMPDIR)/mondo.sssom.tsv $(REPORTDIR)/%_map .PHONY: slurp-% slurp-%: slurp/%.tsv - echo "$@ completed". + @echo "$@ completed". .PHONY: slurp-no-updates-% slurp-no-updates-%: slurp/%.tsv - echo "$@ completed". + @echo "$@ completed". .PHONY: slurp-docs slurp-docs: @@ -500,11 +500,11 @@ slurp-docs: .PHONY: slurp-all-no-updates slurp-all-no-updates: $(foreach n,$(ALL_COMPONENT_IDS), slurp-no-updates-$(n)) - echo "$@ ($^) completed". + @echo "$@ ($^) completed". .PHONY: slurp-all slurp-all: $(foreach n,$(ALL_COMPONENT_IDS), slurp-$(n)) - echo "$@ ($^) completed". + @echo "$@ ($^) completed". ############################# @@ -519,7 +519,7 @@ sync-subclassof: $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv # todo: drop this? This is really just an alias here for quality of life, but not used by anything. .PHONY: sync-subclassof-% sync-subclassof-%: $(REPORTDIR)/%.subclass.direct-in-mondo-only.tsv - echo "$@ completed" + @echo "$@ completed" # Side effects: Deletes SOURCE.subclass.direct-in-mondo-only.tsv's from which the combination is made. $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv: $(foreach n,$(ALL_COMPONENT_IDS), sync-subclassof-$(n)) tmp/mondo.db From 5f6faa7893bb5e42fb44e19fe78095006bc94ce5 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Tue, 30 Jan 2024 19:51:32 -0500 Subject: [PATCH 04/18] ICD11 config & docs - Update: mondo-ingest-odk.yaml: New entry for ICD11 in 'components' - Updates from running 'make update_repo': - modified: docs/odk-workflows/ManageDocumentation.md - modified: docs/odk-workflows/RepositoryFileStructure.md - modified: src/ontology/Makefile - modified: src/ontology/run.sh - new file: src/scripts/run-command.sh - modified: src/scripts/update_repo.sh - Add: metadata/icd11.yml - Update: prefixes.csv, mondo.sssom.config.yml: Delete old prefix ICD11, add new ICD11_FOUNDATION --- docs/metrics.md | 1 + docs/metrics/icd11.md | 52 +++++++++++++++ docs/odk-workflows/RepositoryFileStructure.md | 1 + docs/sources.md | 1 + docs/sources/icd11.md | 19 ++++++ mkdocs.yaml | 2 + src/ontology/Makefile | 18 +++++- src/ontology/config/prefixes.csv | 1 + src/ontology/metadata/icd11-metrics.json | 64 +++++++++++++++++++ src/ontology/metadata/icd11.yml | 13 ++++ src/ontology/metadata/mondo.sssom.config.yml | 6 +- src/ontology/mondo-ingest-odk.yaml | 2 + 12 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 docs/metrics/icd11.md create mode 100644 docs/sources/icd11.md create mode 100644 src/ontology/metadata/icd11-metrics.json create mode 100644 src/ontology/metadata/icd11.yml diff --git a/docs/metrics.md b/docs/metrics.md index eaebfec4..6ce57b4e 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -6,6 +6,7 @@ You can find information about the source modules ingested below. Remember that - [GARD](metrics/gard.md) - [ICD10CM](metrics/icd10cm.md) - [ICD10WHO](metrics/icd10who.md) +- [ICD11](metrics/icd11.md) - [NCIT](metrics/ncit.md) - [OMIM](metrics/omim.md) - [ORDO](metrics/ordo.md) diff --git a/docs/metrics/icd11.md b/docs/metrics/icd11.md new file mode 100644 index 00000000..0ee59ee0 --- /dev/null +++ b/docs/metrics/icd11.md @@ -0,0 +1,52 @@ +# Metrics HTTP://PURL.OBOLIBRARY.ORG/OBO/MONDO-INGEST/COMPONENTS/ICD11 + +**IRI:** http://purl.obolibrary.org/obo/mondo-ingest/components/icd11.owl + +**Version IRI:** no.iri + +### Entities and axioms + +| Metric | Value | +| ------ | ----- | +| Annotation properties | 0 | +| Axioms | 0 | +| Logical axioms | 0 | +| Classes | 0 | +| Object properties | 0 | +| Data properties | 0 | +| Individuals | 0 | + + +### Expressivity + +| Metric | Value | +| ------ | ----- | +| Expressivity | | +| OWL2 | True | +| OWL2 DL | True | +| OWL2 EL | True | +| OWL2 QL | True | +| OWL2 RL | True | + +#### Axiom types + +| Metric | Value | +| ------ | ----- | + + +#### Entity namespaces: axiom counts by namespace + +| Metric | Value | +| ------ | ----- | + + +#### Class expressions used + +| Metric | Value | +| ------ | ----- | + + +More information about the source can be found [in the documentation](../sources.md). The raw data (ontology metrics) can be found [on GitHub](https://github.com/monarch-initiative/mondo-ingest/tree/main/src/ontology/metadata). + +You can make issues or ask questions about this source [here](https://github.com/monarch-initiative/mondo-ingest/issues). + diff --git a/docs/odk-workflows/RepositoryFileStructure.md b/docs/odk-workflows/RepositoryFileStructure.md index c941f173..864e41c6 100644 --- a/docs/odk-workflows/RepositoryFileStructure.md +++ b/docs/odk-workflows/RepositoryFileStructure.md @@ -34,6 +34,7 @@ These are the components in MONDO-INGEST | gard.owl | https://github.com/monarch-initiative/gard/releases/latest/download/gard.owl | | icd10cm.owl | https://data.bioontology.org/ontologies/ICD10CM/submissions/23/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb | | icd10who.owl | https://github.com/monarch-initiative/icd10who/releases/latest/download/icd10who.ttl | +| icd11.owl | https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz | | ncit.owl | http://purl.obolibrary.org/obo/ncit.owl | | omim.owl | https://github.com/monarch-initiative/omim/releases/latest/download/omim.owl | | ordo.owl | http://www.orphadata.org/data/ORDO/ordo_orphanet.owl | diff --git a/docs/sources.md b/docs/sources.md index 93403623..a0b36af8 100644 --- a/docs/sources.md +++ b/docs/sources.md @@ -4,6 +4,7 @@ - [GARD](sources/gard.md) - [ICD10CM](sources/icd10cm.md) - [ICD10WHO](sources/icd10who.md) +- [ICD11](sources/icd11.md) - [NCIT](sources/ncit.md) - [OMIM](sources/omim.md) - [ORDO](sources/ordo.md) diff --git a/docs/sources/icd11.md b/docs/sources/icd11.md new file mode 100644 index 00000000..1229b716 --- /dev/null +++ b/docs/sources/icd11.md @@ -0,0 +1,19 @@ +# MONDO - ICD11 Alignment + +**Source name:** International Classification of Diseases 11th Revision + +**Source description:** The International Classification of Diseases (ICD) provides a common language that allows health professionals to share standardized information across the world. The eleventh revision contains around 17 000 unique codes, more than 120 000 codable terms and is now entirely digital.Feb 11, 2022 +This data source in particular is the ICD11 foundation, not one of its linearizations. + + +**Homepage:** https://icd.who.int/ + +**Comments about this source:** None + + + + + +The data pipeline that generates the source is implemented in `make`, in this source file: [src/ontology/mondo-ingest.Makefile](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/ontology/mondo-ingest.Makefile). + +You can make issues or ask questions about this source [here](https://github.com/monarch-initiative/mondo-ingest/issues). diff --git a/mkdocs.yaml b/mkdocs.yaml index 5a7f3523..1e8d8231 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -28,6 +28,7 @@ nav: - GARD: sources/gard.md - ICD10CM: sources/icd10cm.md - ICD10WHO: sources/icd10who.md + - ICD11: sources/icd11.md - NCIT: sources/ncit.md - OMIM: sources/omim.md - ORDO: sources/ordo.md @@ -37,6 +38,7 @@ nav: - GARD: metrics/gard.md - ICD10CM: metrics/icd10cm.md - ICD10WHO: metrics/icd10who.md + - ICD11: metrics/icd11.md - NCIT: metrics/ncit.md - OMIM: metrics/omim.md - ORDO: metrics/ordo.md diff --git a/src/ontology/Makefile b/src/ontology/Makefile index 3e5cd8a6..e01f2bc9 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -10,7 +10,7 @@ # More information: https://github.com/INCATools/ontology-development-kit/ # Fingerprint of the configuration file when this Makefile was last generated -CONFIG_HASH= 7e46e2aae3d97f90d3901bf0f67d6b8673defa3e391aab5f4d26c3412861e875 +CONFIG_HASH= af087eeec39b0ba73668a942a0dc34ad1e989b35f240d6466b4098dcbc8945eb # ---------------------------------------- @@ -54,7 +54,7 @@ OBODATE ?= $(shell date +'%d:%m:%Y %H:%M') VERSION= $(TODAY) ANNOTATE_ONTOLOGY_VERSION = annotate -V $(ONTBASE)/releases/$(VERSION)/$@ --annotation owl:versionInfo $(VERSION) ANNOTATE_CONVERT_FILE = annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) convert -f ofn --output $@.tmp.owl && mv $@.tmp.owl $@ -OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl +OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl ONTOLOGYTERMS = $(TMPDIR)/ontologyterms.txt EDIT_PREPROCESSED = $(TMPDIR)/$(ONT)-preprocess.owl @@ -478,6 +478,20 @@ $(COMPONENTSDIR)/icd10who.owl: component-download-icd10who.owl .PRECIOUS: $(COMPONENTSDIR)/icd10who.owl +.PHONY: component-download-icd11.owl +component-download-icd11.owl: | $(TMPDIR) + if [ $(MIR) = true ] && [ $(COMP) = true ]; then $(ROBOT) merge -I https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz \ + annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $(TMPDIR)/$@.owl; fi + +$(COMPONENTSDIR)/icd11.owl: component-download-icd11.owl + if [ $(COMP) = true ]; then if cmp -s $(TMPDIR)/component-download-icd11.owl.owl $(TMPDIR)/component-download-icd11.owl.tmp.owl ; then echo "Component identical."; \ + else echo "Component is different, updating." &&\ + cp $(TMPDIR)/component-download-icd11.owl.owl $(TMPDIR)/component-download-icd11.owl.tmp.owl &&\ + $(ROBOT) annotate -i $(TMPDIR)/component-download-icd11.owl.owl --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@; fi; fi + +.PRECIOUS: $(COMPONENTSDIR)/icd11.owl + + .PHONY: component-download-ncit.owl component-download-ncit.owl: | $(TMPDIR) if [ $(MIR) = true ] && [ $(COMP) = true ]; then $(ROBOT) merge -I http://purl.obolibrary.org/obo/ncit.owl \ diff --git a/src/ontology/config/prefixes.csv b/src/ontology/config/prefixes.csv index 1322fd01..d0a833cf 100644 --- a/src/ontology/config/prefixes.csv +++ b/src/ontology/config/prefixes.csv @@ -227,6 +227,7 @@ ICD10CM,http://purl.bioontology.org/ontology/ICD10CM/ ICD10CM2,https://icd.codes/icd10cm/ ICD10WHO,https://icd.who.int/browse10/2019/en#/ ICD10WHO2010,http://apps.who.int/classifications/icd10/browse/2010/en#/ +ICD11_FOUNDATION,http://id.who.int/icd/entity/ OMIMPS,https://omim.org/phenotypicSeries/PS OMIM,https://omim.org/entry/ Orphanet,http://www.orpha.net/ORDO/Orphanet_ diff --git a/src/ontology/metadata/icd11-metrics.json b/src/ontology/metadata/icd11-metrics.json new file mode 100644 index 00000000..9f4d6b26 --- /dev/null +++ b/src/ontology/metadata/icd11-metrics.json @@ -0,0 +1,64 @@ +{ + "metrics": { + "abox_axiom_count": 0, + "abox_axiom_count_incl": 0, + "annotation_property_count": 0, + "annotation_property_count_incl": 0, + "axiom_count": 0, + "axiom_count_incl": 0, + "class_count": 0, + "class_count_incl": 0, + "dataproperty_count": 0, + "dataproperty_count_incl": 0, + "datatypes_count": 0, + "datatypes_count_incl": 0, + "dt_builtin_count": 0, + "dt_builtin_count_incl": 0, + "dt_notbuiltin_count": 0, + "dt_notbuiltin_count_incl": 0, + "expressivity": "", + "expressivity_incl": "", + "individual_count": 0, + "individual_count_incl": 0, + "logical_axiom_count": 0, + "logical_axiom_count_incl": 0, + "obj_property_count": 0, + "obj_property_count_incl": 0, + "ontology_anno_count": 0, + "ontology_iri": "http://purl.obolibrary.org/obo/mondo-ingest/components/icd11.owl", + "ontology_version_iri": "no.iri", + "owl2": true, + "owl2_dl": true, + "owl2_el": true, + "owl2_ql": true, + "owl2_rl": true, + "rbox_axiom_count": 0, + "rbox_axiom_count_incl": 0, + "rdfs": true, + "rule_count": 0, + "rule_count_incl": 0, + "signature_entity_count": 0, + "signature_entity_count_incl": 0, + "syntax": "RDF/XML Syntax", + "tbox_axiom_count": 0, + "tbox_axiom_count_incl": 0, + "tboxrbox_axiom_count": 0, + "tboxrbox_axiom_count_incl": 0, + "axiom_types": [], + "axiom_types_incl": [], + "constructs": [], + "constructs_incl": [], + "valid_imports": [], + "valid_imports_incl": [], + "axiom_type_count": {}, + "axiom_type_count_incl": {}, + "class_expression_count": {}, + "class_expression_count_incl": {}, + "curie_map": {}, + "namespace_axiom_count": {}, + "namespace_axiom_count_incl": {}, + "namespace_entity_count": {}, + "namespace_entity_count_incl": {}, + "owl2dl_profile_violation": {} + } +} \ No newline at end of file diff --git a/src/ontology/metadata/icd11.yml b/src/ontology/metadata/icd11.yml new file mode 100644 index 00000000..05b0df61 --- /dev/null +++ b/src/ontology/metadata/icd11.yml @@ -0,0 +1,13 @@ +id: ICD11 +label: International Classification of Diseases 11th Revision +prefix_map: + ICD11_FOUNDATION: http://id.who.int/icd/entity/ +description: > + The International Classification of Diseases (ICD) provides a common language that allows health professionals to + share standardized information across the world. The eleventh revision contains around 17 000 unique codes, more than + 120 000 codable terms and is now entirely digital.Feb 11, 2022 + + This data source in particular is the ICD11 foundation, not one of its linearizations. +homepage: https://icd.who.int/ +base_prefix_map: + ICD11_FOUNDATION: http://id.who.int/icd/entity/ diff --git a/src/ontology/metadata/mondo.sssom.config.yml b/src/ontology/metadata/mondo.sssom.config.yml index 173ac8fe..cb59cf6d 100644 --- a/src/ontology/metadata/mondo.sssom.config.yml +++ b/src/ontology/metadata/mondo.sssom.config.yml @@ -57,7 +57,7 @@ curie_map: IEDB: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/IEDB/" PMID: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/PMID/" KEGG: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/KEGG/" - ICD11: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/" + ICD11_FOUNDATION: "http://id.who.int/icd/entity/" DECIPHER: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/DECIPHER/" CSP: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/CSP/" Wikipedia: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/Wikipedia/" @@ -276,9 +276,9 @@ extended_prefix_map: prefix_synonyms: [] uri_prefix: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/KEGG/ uri_prefix_synonyms: [] - - prefix: ICD11 + - prefix: ICD11_FOUNDATION prefix_synonyms: [] - uri_prefix: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/ + uri_prefix: http://id.who.int/icd/entity/ uri_prefix_synonyms: [] - prefix: DECIPHER prefix_synonyms: [] diff --git a/src/ontology/mondo-ingest-odk.yaml b/src/ontology/mondo-ingest-odk.yaml index 9476ceed..90ebf531 100644 --- a/src/ontology/mondo-ingest-odk.yaml +++ b/src/ontology/mondo-ingest-odk.yaml @@ -33,6 +33,8 @@ components: source: https://data.bioontology.org/ontologies/ICD10CM/submissions/23/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb - filename: icd10who.owl source: https://github.com/monarch-initiative/icd10who/releases/latest/download/icd10who.ttl + - filename: icd11.owl + source: https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz - filename: ncit.owl source: http://purl.obolibrary.org/obo/ncit.owl - filename: omim.owl From 44e2e57d7b845d1228f66c892673d85bc99160a5 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 15 Feb 2024 17:33:54 -0500 Subject: [PATCH 05/18] ICD11 config & docs - Rename: ICD11 -> ICD11Foundation - Add prefixes: icd11.foundation, icd11.schema, icd11.z - Add: intensional exclusions TSV (currently empty) - Add: SPARQL query for selecting all diseases - Update: ICD11 docs: Explanation about problematic inferred equivalence axioms General - Update: Several SPARQL files with incorrect comments in regards to which ontology the file was for, or what was being selected. --- docs/metrics.md | 2 +- docs/metrics/{icd11.md => icd11foundation.md} | 36 +++-- docs/odk-workflows/RepositoryFileStructure.md | 2 +- docs/sources.md | 2 +- docs/sources/{icd11.md => icd11foundation.md} | 7 +- mkdocs.yaml | 4 +- src/ontology/Makefile | 20 +-- .../config/icd11foundation_exclusions.tsv | 1 + src/ontology/config/prefixes.csv | 5 +- src/ontology/metadata/icd11-metrics.json | 64 --------- .../metadata/icd11foundation-metrics.json | 132 ++++++++++++++++++ .../{icd11.yml => icd11foundation.yml} | 10 +- src/ontology/metadata/mondo.sssom.config.yml | 19 ++- src/ontology/mondo-ingest-odk.yaml | 4 +- src/sparql/icd10cm-relevant-diseases.sparql | 2 +- src/sparql/icd10cm-relevant-signature.sparql | 2 +- src/sparql/icd10who-relevant-diseases.sparql | 2 +- src/sparql/icd10who-relevant-signature.sparql | 2 +- .../icd11foundation-relevant-signature.sparql | 18 +++ src/sparql/medgen-relevant-diseases.sparql | 2 +- src/sparql/medgen-relevant-signature.sparql | 2 +- src/sparql/omim-relevant-diseases.sparql | 2 +- src/sparql/omim-relevant-signature.sparql | 3 +- 23 files changed, 234 insertions(+), 109 deletions(-) rename docs/metrics/{icd11.md => icd11foundation.md} (58%) rename docs/sources/{icd11.md => icd11foundation.md} (71%) create mode 100644 src/ontology/config/icd11foundation_exclusions.tsv delete mode 100644 src/ontology/metadata/icd11-metrics.json create mode 100644 src/ontology/metadata/icd11foundation-metrics.json rename src/ontology/metadata/{icd11.yml => icd11foundation.yml} (65%) create mode 100644 src/sparql/icd11foundation-relevant-signature.sparql diff --git a/docs/metrics.md b/docs/metrics.md index 6ce57b4e..ecb4c3a1 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -6,7 +6,7 @@ You can find information about the source modules ingested below. Remember that - [GARD](metrics/gard.md) - [ICD10CM](metrics/icd10cm.md) - [ICD10WHO](metrics/icd10who.md) -- [ICD11](metrics/icd11.md) +- [ICD11Foundation](metrics/icd11foundation.md) - [NCIT](metrics/ncit.md) - [OMIM](metrics/omim.md) - [ORDO](metrics/ordo.md) diff --git a/docs/metrics/icd11.md b/docs/metrics/icd11foundation.md similarity index 58% rename from docs/metrics/icd11.md rename to docs/metrics/icd11foundation.md index 0ee59ee0..d92bca68 100644 --- a/docs/metrics/icd11.md +++ b/docs/metrics/icd11foundation.md @@ -1,18 +1,18 @@ -# Metrics HTTP://PURL.OBOLIBRARY.ORG/OBO/MONDO-INGEST/COMPONENTS/ICD11 +# Metrics HTTP://PURL.OBOLIBRARY.ORG/OBO/MONDO-INGEST/COMPONENTS/ICD11FOUNDATION -**IRI:** http://purl.obolibrary.org/obo/mondo-ingest/components/icd11.owl +**IRI:** http://purl.obolibrary.org/obo/mondo-ingest/components/icd11foundation.owl -**Version IRI:** no.iri +**Version IRI:** http://purl.obolibrary.org/obo/mondo-ingest/releases/2024-02-17/components/icd11foundation.owl ### Entities and axioms | Metric | Value | | ------ | ----- | -| Annotation properties | 0 | -| Axioms | 0 | -| Logical axioms | 0 | -| Classes | 0 | -| Object properties | 0 | +| Annotation properties | 21 | +| Axioms | 570662 | +| Logical axioms | 130473 | +| Classes | 100002 | +| Object properties | 70 | | Data properties | 0 | | Individuals | 0 | @@ -21,29 +21,43 @@ | Metric | Value | | ------ | ----- | -| Expressivity | | +| Expressivity | CINTEH | | OWL2 | True | | OWL2 DL | True | | OWL2 EL | True | -| OWL2 QL | True | -| OWL2 RL | True | +| OWL2 QL | False | +| OWL2 RL | False | #### Axiom types | Metric | Value | | ------ | ----- | +| AnnotationAssertion | 340100 | +| EquivalentClasses | 5075 | +| SubObjectPropertyOf | 51 | +| Declaration | 100089 | +| SubClassOf | 125347 | #### Entity namespaces: axiom counts by namespace | Metric | Value | | ------ | ----- | +| prefix_unknown | 100084 | +| owl | 3 | +| rdf | 1 | +| xsd | 1 | +| skos | 5 | +| rdfs | 1 | #### Class expressions used | Metric | Value | | ------ | ----- | +| Class | 392111 | +| ObjectSomeValuesFrom | 40919 | +| ObjectIntersectionOf | 19706 | More information about the source can be found [in the documentation](../sources.md). The raw data (ontology metrics) can be found [on GitHub](https://github.com/monarch-initiative/mondo-ingest/tree/main/src/ontology/metadata). diff --git a/docs/odk-workflows/RepositoryFileStructure.md b/docs/odk-workflows/RepositoryFileStructure.md index 864e41c6..1a836825 100644 --- a/docs/odk-workflows/RepositoryFileStructure.md +++ b/docs/odk-workflows/RepositoryFileStructure.md @@ -34,7 +34,7 @@ These are the components in MONDO-INGEST | gard.owl | https://github.com/monarch-initiative/gard/releases/latest/download/gard.owl | | icd10cm.owl | https://data.bioontology.org/ontologies/ICD10CM/submissions/23/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb | | icd10who.owl | https://github.com/monarch-initiative/icd10who/releases/latest/download/icd10who.ttl | -| icd11.owl | https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz | +| icd11foundation.owl | https://github.com/monarch-initiative/icd11/releases/latest/download/icd11foundation.owl | | ncit.owl | http://purl.obolibrary.org/obo/ncit.owl | | omim.owl | https://github.com/monarch-initiative/omim/releases/latest/download/omim.owl | | ordo.owl | http://www.orphadata.org/data/ORDO/ordo_orphanet.owl | diff --git a/docs/sources.md b/docs/sources.md index a0b36af8..6293e371 100644 --- a/docs/sources.md +++ b/docs/sources.md @@ -4,7 +4,7 @@ - [GARD](sources/gard.md) - [ICD10CM](sources/icd10cm.md) - [ICD10WHO](sources/icd10who.md) -- [ICD11](sources/icd11.md) +- [ICD11Foundation](sources/icd11foundation.md) - [NCIT](sources/ncit.md) - [OMIM](sources/omim.md) - [ORDO](sources/ordo.md) diff --git a/docs/sources/icd11.md b/docs/sources/icd11foundation.md similarity index 71% rename from docs/sources/icd11.md rename to docs/sources/icd11foundation.md index 1229b716..8ab32c53 100644 --- a/docs/sources/icd11.md +++ b/docs/sources/icd11foundation.md @@ -1,4 +1,4 @@ -# MONDO - ICD11 Alignment +# MONDO - ICD11FOUNDATION Alignment **Source name:** International Classification of Diseases 11th Revision @@ -8,7 +8,10 @@ This data source in particular is the ICD11 foundation, not one of its lineariza **Homepage:** https://icd.who.int/ -**Comments about this source:** None +**Comments about this source:** +Because the existing logical equivalence class axioms led to equivalence cliques (groups of distinct disease identifiers +that inferred to he semantically identical) we decided to strip out all equivalence class axiom from the foundation +prior to processing it in the ingest. diff --git a/mkdocs.yaml b/mkdocs.yaml index 1e8d8231..369f2b23 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -28,7 +28,7 @@ nav: - GARD: sources/gard.md - ICD10CM: sources/icd10cm.md - ICD10WHO: sources/icd10who.md - - ICD11: sources/icd11.md + - ICD11Foundation: sources/icd11foundation.md - NCIT: sources/ncit.md - OMIM: sources/omim.md - ORDO: sources/ordo.md @@ -38,7 +38,7 @@ nav: - GARD: metrics/gard.md - ICD10CM: metrics/icd10cm.md - ICD10WHO: metrics/icd10who.md - - ICD11: metrics/icd11.md + - ICD11Foundation: metrics/icd11foundation.md - NCIT: metrics/ncit.md - OMIM: metrics/omim.md - ORDO: metrics/ordo.md diff --git a/src/ontology/Makefile b/src/ontology/Makefile index e01f2bc9..53ed676b 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -10,7 +10,7 @@ # More information: https://github.com/INCATools/ontology-development-kit/ # Fingerprint of the configuration file when this Makefile was last generated -CONFIG_HASH= af087eeec39b0ba73668a942a0dc34ad1e989b35f240d6466b4098dcbc8945eb +CONFIG_HASH= 1f779a242dc046d5c98b39bde1a60c2488195d1ad6e0a21ee94bac1ae0f05b98 # ---------------------------------------- @@ -54,7 +54,7 @@ OBODATE ?= $(shell date +'%d:%m:%Y %H:%M') VERSION= $(TODAY) ANNOTATE_ONTOLOGY_VERSION = annotate -V $(ONTBASE)/releases/$(VERSION)/$@ --annotation owl:versionInfo $(VERSION) ANNOTATE_CONVERT_FILE = annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) convert -f ofn --output $@.tmp.owl && mv $@.tmp.owl $@ -OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl +OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11foundation.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl ONTOLOGYTERMS = $(TMPDIR)/ontologyterms.txt EDIT_PREPROCESSED = $(TMPDIR)/$(ONT)-preprocess.owl @@ -478,18 +478,18 @@ $(COMPONENTSDIR)/icd10who.owl: component-download-icd10who.owl .PRECIOUS: $(COMPONENTSDIR)/icd10who.owl -.PHONY: component-download-icd11.owl -component-download-icd11.owl: | $(TMPDIR) - if [ $(MIR) = true ] && [ $(COMP) = true ]; then $(ROBOT) merge -I https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz \ +.PHONY: component-download-icd11foundation.owl +component-download-icd11foundation.owl: | $(TMPDIR) + if [ $(MIR) = true ] && [ $(COMP) = true ]; then $(ROBOT) merge -I https://github.com/monarch-initiative/icd11/releases/latest/download/icd11foundation.owl \ annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $(TMPDIR)/$@.owl; fi -$(COMPONENTSDIR)/icd11.owl: component-download-icd11.owl - if [ $(COMP) = true ]; then if cmp -s $(TMPDIR)/component-download-icd11.owl.owl $(TMPDIR)/component-download-icd11.owl.tmp.owl ; then echo "Component identical."; \ +$(COMPONENTSDIR)/icd11foundation.owl: component-download-icd11foundation.owl + if [ $(COMP) = true ]; then if cmp -s $(TMPDIR)/component-download-icd11foundation.owl.owl $(TMPDIR)/component-download-icd11foundation.owl.tmp.owl ; then echo "Component identical."; \ else echo "Component is different, updating." &&\ - cp $(TMPDIR)/component-download-icd11.owl.owl $(TMPDIR)/component-download-icd11.owl.tmp.owl &&\ - $(ROBOT) annotate -i $(TMPDIR)/component-download-icd11.owl.owl --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@; fi; fi + cp $(TMPDIR)/component-download-icd11foundation.owl.owl $(TMPDIR)/component-download-icd11foundation.owl.tmp.owl &&\ + $(ROBOT) annotate -i $(TMPDIR)/component-download-icd11foundation.owl.owl --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@; fi; fi -.PRECIOUS: $(COMPONENTSDIR)/icd11.owl +.PRECIOUS: $(COMPONENTSDIR)/icd11foundation.owl .PHONY: component-download-ncit.owl diff --git a/src/ontology/config/icd11foundation_exclusions.tsv b/src/ontology/config/icd11foundation_exclusions.tsv new file mode 100644 index 00000000..bdde4626 --- /dev/null +++ b/src/ontology/config/icd11foundation_exclusions.tsv @@ -0,0 +1 @@ +term_id term_label exclusion_reason exclude_children diff --git a/src/ontology/config/prefixes.csv b/src/ontology/config/prefixes.csv index d0a833cf..2d6bbc19 100644 --- a/src/ontology/config/prefixes.csv +++ b/src/ontology/config/prefixes.csv @@ -227,7 +227,10 @@ ICD10CM,http://purl.bioontology.org/ontology/ICD10CM/ ICD10CM2,https://icd.codes/icd10cm/ ICD10WHO,https://icd.who.int/browse10/2019/en#/ ICD10WHO2010,http://apps.who.int/classifications/icd10/browse/2010/en#/ -ICD11_FOUNDATION,http://id.who.int/icd/entity/ +ICD11,http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/ +icd11.foundation,http://id.who.int/icd/entity/ +icd11.schema,http://id.who.int/icd/schema/ +icd11.z,http://who.int/icd#Z_ OMIMPS,https://omim.org/phenotypicSeries/PS OMIM,https://omim.org/entry/ Orphanet,http://www.orpha.net/ORDO/Orphanet_ diff --git a/src/ontology/metadata/icd11-metrics.json b/src/ontology/metadata/icd11-metrics.json deleted file mode 100644 index 9f4d6b26..00000000 --- a/src/ontology/metadata/icd11-metrics.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "metrics": { - "abox_axiom_count": 0, - "abox_axiom_count_incl": 0, - "annotation_property_count": 0, - "annotation_property_count_incl": 0, - "axiom_count": 0, - "axiom_count_incl": 0, - "class_count": 0, - "class_count_incl": 0, - "dataproperty_count": 0, - "dataproperty_count_incl": 0, - "datatypes_count": 0, - "datatypes_count_incl": 0, - "dt_builtin_count": 0, - "dt_builtin_count_incl": 0, - "dt_notbuiltin_count": 0, - "dt_notbuiltin_count_incl": 0, - "expressivity": "", - "expressivity_incl": "", - "individual_count": 0, - "individual_count_incl": 0, - "logical_axiom_count": 0, - "logical_axiom_count_incl": 0, - "obj_property_count": 0, - "obj_property_count_incl": 0, - "ontology_anno_count": 0, - "ontology_iri": "http://purl.obolibrary.org/obo/mondo-ingest/components/icd11.owl", - "ontology_version_iri": "no.iri", - "owl2": true, - "owl2_dl": true, - "owl2_el": true, - "owl2_ql": true, - "owl2_rl": true, - "rbox_axiom_count": 0, - "rbox_axiom_count_incl": 0, - "rdfs": true, - "rule_count": 0, - "rule_count_incl": 0, - "signature_entity_count": 0, - "signature_entity_count_incl": 0, - "syntax": "RDF/XML Syntax", - "tbox_axiom_count": 0, - "tbox_axiom_count_incl": 0, - "tboxrbox_axiom_count": 0, - "tboxrbox_axiom_count_incl": 0, - "axiom_types": [], - "axiom_types_incl": [], - "constructs": [], - "constructs_incl": [], - "valid_imports": [], - "valid_imports_incl": [], - "axiom_type_count": {}, - "axiom_type_count_incl": {}, - "class_expression_count": {}, - "class_expression_count_incl": {}, - "curie_map": {}, - "namespace_axiom_count": {}, - "namespace_axiom_count_incl": {}, - "namespace_entity_count": {}, - "namespace_entity_count_incl": {}, - "owl2dl_profile_violation": {} - } -} \ No newline at end of file diff --git a/src/ontology/metadata/icd11foundation-metrics.json b/src/ontology/metadata/icd11foundation-metrics.json new file mode 100644 index 00000000..bfb4b8e5 --- /dev/null +++ b/src/ontology/metadata/icd11foundation-metrics.json @@ -0,0 +1,132 @@ +{ + "metrics": { + "abox_axiom_count": 0, + "abox_axiom_count_incl": 0, + "annotation_property_count": 21, + "annotation_property_count_incl": 21, + "axiom_count": 570662, + "axiom_count_incl": 570662, + "class_count": 100002, + "class_count_incl": 100002, + "dataproperty_count": 0, + "dataproperty_count_incl": 0, + "datatypes_count": 2, + "datatypes_count_incl": 2, + "dt_builtin_count": 2, + "dt_builtin_count_incl": 2, + "dt_notbuiltin_count": 0, + "dt_notbuiltin_count_incl": 0, + "expressivity": "CINTEH", + "expressivity_incl": "CINTEH", + "individual_count": 0, + "individual_count_incl": 0, + "logical_axiom_count": 130473, + "logical_axiom_count_incl": 130473, + "obj_property_count": 70, + "obj_property_count_incl": 70, + "ontology_anno_count": 5, + "ontology_iri": "http://purl.obolibrary.org/obo/mondo-ingest/components/icd11foundation.owl", + "ontology_version_iri": "http://purl.obolibrary.org/obo/mondo-ingest/releases/2024-02-17/components/icd11foundation.owl", + "owl2": true, + "owl2_dl": true, + "owl2_el": true, + "owl2_ql": false, + "owl2_rl": false, + "rbox_axiom_count": 51, + "rbox_axiom_count_incl": 51, + "rdfs": false, + "rule_count": 0, + "rule_count_incl": 0, + "signature_entity_count": 100095, + "signature_entity_count_incl": 100095, + "syntax": "RDF/XML Syntax", + "tbox_axiom_count": 130422, + "tbox_axiom_count_incl": 130422, + "tboxrbox_axiom_count": 130473, + "tboxrbox_axiom_count_incl": 130473, + "axiom_types": [ + "AnnotationAssertion", + "EquivalentClasses", + "SubObjectPropertyOf", + "Declaration", + "SubClassOf" + ], + "axiom_types_incl": [ + "AnnotationAssertion", + "EquivalentClasses", + "SubObjectPropertyOf", + "Declaration", + "SubClassOf" + ], + "constructs": [ + "ROLE_HIERARCHY", + "CONCEPT_INTERSECTION", + "FULL_EXISTENTIAL" + ], + "constructs_incl": [ + "ROLE_HIERARCHY", + "CONCEPT_INTERSECTION", + "FULL_EXISTENTIAL" + ], + "valid_imports": [], + "valid_imports_incl": [], + "axiom_type_count": { + "AnnotationAssertion": 340100, + "EquivalentClasses": 5075, + "SubObjectPropertyOf": 51, + "Declaration": 100089, + "SubClassOf": 125347 + }, + "axiom_type_count_incl": { + "AnnotationAssertion": 340100, + "EquivalentClasses": 5075, + "SubObjectPropertyOf": 51, + "Declaration": 100089, + "SubClassOf": 125347 + }, + "class_expression_count": { + "Class": 392111, + "ObjectSomeValuesFrom": 40919, + "ObjectIntersectionOf": 19706 + }, + "class_expression_count_incl": { + "Class": 392111, + "ObjectSomeValuesFrom": 40919, + "ObjectIntersectionOf": 19706 + }, + "curie_map": { + "owl": "http://www.w3.org/2002/07/owl#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#" + }, + "namespace_axiom_count": { + "prefix_unknown": 609878, + "owl": 6312, + "skos": 178135 + }, + "namespace_axiom_count_incl": { + "prefix_unknown": 609878, + "owl": 6312, + "skos": 178135 + }, + "namespace_entity_count": { + "prefix_unknown": 100084, + "owl": 3, + "rdf": 1, + "xsd": 1, + "skos": 5, + "rdfs": 1 + }, + "namespace_entity_count_incl": { + "prefix_unknown": 100084, + "owl": 3, + "rdf": 1, + "xsd": 1, + "skos": 5, + "rdfs": 1 + }, + "owl2dl_profile_violation": {} + } +} \ No newline at end of file diff --git a/src/ontology/metadata/icd11.yml b/src/ontology/metadata/icd11foundation.yml similarity index 65% rename from src/ontology/metadata/icd11.yml rename to src/ontology/metadata/icd11foundation.yml index 05b0df61..e3417a71 100644 --- a/src/ontology/metadata/icd11.yml +++ b/src/ontology/metadata/icd11foundation.yml @@ -1,7 +1,9 @@ -id: ICD11 +id: ICD11Foundation label: International Classification of Diseases 11th Revision prefix_map: - ICD11_FOUNDATION: http://id.who.int/icd/entity/ + icd11.foundation: http://id.who.int/icd/entity/ + icd11.schema: http://id.who.int/icd/schema/ + icd11.z: http://who.int/icd#Z_ description: > The International Classification of Diseases (ICD) provides a common language that allows health professionals to share standardized information across the world. The eleventh revision contains around 17 000 unique codes, more than @@ -10,4 +12,6 @@ description: > This data source in particular is the ICD11 foundation, not one of its linearizations. homepage: https://icd.who.int/ base_prefix_map: - ICD11_FOUNDATION: http://id.who.int/icd/entity/ + icd11.foundation: http://id.who.int/icd/entity/ + icd11.schema: http://id.who.int/icd/schema/ + icd11.z: http://who.int/icd#Z_ diff --git a/src/ontology/metadata/mondo.sssom.config.yml b/src/ontology/metadata/mondo.sssom.config.yml index cb59cf6d..2aefad03 100644 --- a/src/ontology/metadata/mondo.sssom.config.yml +++ b/src/ontology/metadata/mondo.sssom.config.yml @@ -57,7 +57,10 @@ curie_map: IEDB: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/IEDB/" PMID: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/PMID/" KEGG: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/KEGG/" - ICD11_FOUNDATION: "http://id.who.int/icd/entity/" + ICD11: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/" + icd11.foundation: http://id.who.int/icd/entity/ + icd11.schema: http://id.who.int/icd/schema/ + icd11.z: http://who.int/icd#Z_ DECIPHER: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/DECIPHER/" CSP: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/CSP/" Wikipedia: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/Wikipedia/" @@ -276,10 +279,22 @@ extended_prefix_map: prefix_synonyms: [] uri_prefix: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/KEGG/ uri_prefix_synonyms: [] - - prefix: ICD11_FOUNDATION + - prefix: ICD11 + prefix_synonyms: [] + uri_prefix: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/ + uri_prefix_synonyms: [] + - prefix: icd11.foundation prefix_synonyms: [] uri_prefix: http://id.who.int/icd/entity/ uri_prefix_synonyms: [] + - prefix: icd11.schema + prefix_synonyms: [] + uri_prefix: http://id.who.int/icd/schema/ + uri_prefix_synonyms: [] + - prefix: icd11.z + prefix_synonyms: [] + uri_prefix: http://who.int/icd#Z_ + uri_prefix_synonyms: [] - prefix: DECIPHER prefix_synonyms: [] uri_prefix: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/DECIPHER/ diff --git a/src/ontology/mondo-ingest-odk.yaml b/src/ontology/mondo-ingest-odk.yaml index 90ebf531..1320b2d2 100644 --- a/src/ontology/mondo-ingest-odk.yaml +++ b/src/ontology/mondo-ingest-odk.yaml @@ -33,8 +33,8 @@ components: source: https://data.bioontology.org/ontologies/ICD10CM/submissions/23/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb - filename: icd10who.owl source: https://github.com/monarch-initiative/icd10who/releases/latest/download/icd10who.ttl - - filename: icd11.owl - source: https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz + - filename: icd11foundation.owl + source: https://github.com/monarch-initiative/icd11/releases/latest/download/icd11foundation.owl - filename: ncit.owl source: http://purl.obolibrary.org/obo/ncit.owl - filename: omim.owl diff --git a/src/sparql/icd10cm-relevant-diseases.sparql b/src/sparql/icd10cm-relevant-diseases.sparql index 8a7734ad..4ead7aa1 100644 --- a/src/sparql/icd10cm-relevant-diseases.sparql +++ b/src/sparql/icd10cm-relevant-diseases.sparql @@ -9,7 +9,7 @@ PREFIX dbpedia: PREFIX foaf: PREFIX skos: -### For Disease Ontology, we are interested in all terms +### All terms SELECT DISTINCT ?term ?label ?deprecated WHERE { { diff --git a/src/sparql/icd10cm-relevant-signature.sparql b/src/sparql/icd10cm-relevant-signature.sparql index f86bb4f3..daba9b6c 100644 --- a/src/sparql/icd10cm-relevant-signature.sparql +++ b/src/sparql/icd10cm-relevant-signature.sparql @@ -1,4 +1,4 @@ -### For Disease Ontology, we are interested in all terms +### All terms SELECT DISTINCT ?term WHERE { { diff --git a/src/sparql/icd10who-relevant-diseases.sparql b/src/sparql/icd10who-relevant-diseases.sparql index ea8a9470..c723fe79 100644 --- a/src/sparql/icd10who-relevant-diseases.sparql +++ b/src/sparql/icd10who-relevant-diseases.sparql @@ -9,7 +9,7 @@ PREFIX dbpedia: PREFIX foaf: PREFIX skos: -### For Disease Ontology, we are interested in all terms +### All terms SELECT DISTINCT ?term ?label ?deprecated WHERE { { diff --git a/src/sparql/icd10who-relevant-signature.sparql b/src/sparql/icd10who-relevant-signature.sparql index d50701f7..21474cb4 100644 --- a/src/sparql/icd10who-relevant-signature.sparql +++ b/src/sparql/icd10who-relevant-signature.sparql @@ -1,4 +1,4 @@ -### For Disease Ontology, we are interested in all terms +### All terms SELECT DISTINCT ?term WHERE { { diff --git a/src/sparql/icd11foundation-relevant-signature.sparql b/src/sparql/icd11foundation-relevant-signature.sparql new file mode 100644 index 00000000..f149d948 --- /dev/null +++ b/src/sparql/icd11foundation-relevant-signature.sparql @@ -0,0 +1,18 @@ +prefix rdfs: + +### All diseases +SELECT DISTINCT ?term +WHERE { + { + { + ?s1 ?p1 ?term . + ?term rdfs:subClassOf* . + } + UNION + { + ?term ?p2 ?o2 . + ?term rdfs:subClassOf* . + } + } + FILTER(isIRI(?term)) +} diff --git a/src/sparql/medgen-relevant-diseases.sparql b/src/sparql/medgen-relevant-diseases.sparql index cac5065c..622a2d93 100644 --- a/src/sparql/medgen-relevant-diseases.sparql +++ b/src/sparql/medgen-relevant-diseases.sparql @@ -9,7 +9,7 @@ PREFIX dbpedia: PREFIX foaf: PREFIX skos: -# For SNOMED, we are only interested in the Disease or Disorder branch +# For MedGen, we are only interested in the Disease or Disorder branch SELECT DISTINCT ?term ?label ?deprecated WHERE { { diff --git a/src/sparql/medgen-relevant-signature.sparql b/src/sparql/medgen-relevant-signature.sparql index be0c9156..90efe462 100644 --- a/src/sparql/medgen-relevant-signature.sparql +++ b/src/sparql/medgen-relevant-signature.sparql @@ -10,7 +10,7 @@ PREFIX foaf: PREFIX skos: PREFIX rdfs: -# For SNOMED, we are only interested in the Disease or Disorder branch +# For MedGen, we are only interested in the Disease or Disorder branch SELECT DISTINCT ?term WHERE { { diff --git a/src/sparql/omim-relevant-diseases.sparql b/src/sparql/omim-relevant-diseases.sparql index f56a69a2..6e437823 100644 --- a/src/sparql/omim-relevant-diseases.sparql +++ b/src/sparql/omim-relevant-diseases.sparql @@ -9,7 +9,7 @@ PREFIX dbpedia: PREFIX foaf: PREFIX skos: -### For Disease Ontology, we are interested in all terms +### For OMIM, we are interested in all terms that are not genes. SELECT DISTINCT ?term ?label ?deprecated WHERE { { diff --git a/src/sparql/omim-relevant-signature.sparql b/src/sparql/omim-relevant-signature.sparql index 830065f8..3e543fc8 100644 --- a/src/sparql/omim-relevant-signature.sparql +++ b/src/sparql/omim-relevant-signature.sparql @@ -1,7 +1,6 @@ -### For OMIM, we are interested in all terms that are not genes. - PREFIX rdfs: +### For OMIM, we are interested in all terms that are not genes. SELECT DISTINCT ?term WHERE { { From 275453773a9d77906a42eefb4f1678512d98f00b Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Wed, 21 Feb 2024 16:56:00 -0500 Subject: [PATCH 06/18] Docs updates: Prereqs, etc Updates to README.md - Add: "Prerequisites" section, including Python and Docker. Added note about dev dependencies. - Add: "Running" section, and moved "Workflows" as a subsection into it. - Delete: Verbiage for "Workflows" section, leaving just the link. - Update: Fixed a typo: unapped -> unmapped - Update: Codestyle: Line length limit - Add: Extra verbiage to "Reports" section to outline further reports available. - Add: Some italics around a reference --- README.md | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 70b4d79a..b7b1549a 100644 --- a/README.md +++ b/README.md @@ -3,18 +3,37 @@ This repo is dedicated to the integration of various clinical terminologies and ontologies into Mondo. For more details see the [documentation](https://monarch-initiative.github.io/mondo-ingest/). -Work on the Mondo Source Ingest is funded by the NHGRI Phenomics First Grant 1RM1HG010860-01. +Work on the Mondo Source Ingest is funded by the _NHGRI Phenomics First Grant 1RM1HG010860-01_. -## Workflows -A variety of workflows are available to run the ingest. See the [workflows documentation](./docs/developer/workflows.md) for more details. +## Prerequisites +Python is a dev dependency. It's not needed to run the docker containers, but needed for local development situations +/ debugging. +1. Python 3.9+ +2. Docker +3. Docker images + One or both of the following, depending on if you want to run the stable build `latest` or `dev`: + - a. `docker pull obolibrary/odkfull:latest` + - b. `docker pull obolibrary/odkfull:dev` + +## Running +### Full build +`sh run.sh make build-mondo-ingest` + +### [Workflows](./docs/developer/workflows.md) ## Reports +A variety of reports are committed as static files in `src/ontology/reports/`, but some additional reports get rendered +into markdown pages as noted below. + ### Mapping progress report -The [mapping progress report](./docs/reports/unmapped.md) consists lists of all umapped terms fo each ontology, as well +The [mapping progress report](./docs/reports/unmapped.md) consists lists of all unmapped terms fo each ontology, as well as a table of statistics showing total number of terms, excluded terms, deprecated terms, and unmapped terms. ### Mapped deprecated terms -The [_mapped deprecated terms_ page](./docs/reports/mapped_deprecated.md) contains a table of statistics showing total number of deprecated terms that have existing xrefs in Mondo, for each ontology. There is also a link to a page for each ontology which shows the term IDs and their corresponding mapped Mondo ID(s). +The [_mapped deprecated terms_ page](./docs/reports/mapped_deprecated.md) contains a table of statistics showing total number of deprecated terms that +have existing xrefs in Mondo, for each ontology. There is also a link to a page for each ontology which shows the term +IDs and their corresponding mapped Mondo ID(s). ### Migratable terms -The [_migrate_ page](./docs/reports/migrate.md) contains a table of statistics showing of terms ready for migration / integration into Mondo. +The [_migrate_ page](./docs/reports/migrate.md) contains a table of statistics showing of terms ready for migration / +integration into Mondo. From 02561a8e65f4739da3a1aff22b96eed3e14911e2 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Sat, 2 Mar 2024 18:37:18 -0500 Subject: [PATCH 07/18] Bugfix: unmapped/ outdir Fixed bug in which location where output .owl's of unmapped terms were not matching up with the goal's target location. --- src/ontology/mondo-ingest.Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 1d8ef5cb..bcd1ed97 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -236,7 +236,7 @@ unmapped/: unmapped/%-unmapped.owl: $(COMPONENTSDIR)/%.owl reports/%_unmapped_terms.tsv | unmapped/ cut -f 1 reports/$*_unmapped_terms.tsv | tail -n +2 > reports/$*_unmapped_terms.txt - $(ROBOT) filter -i components/$*.owl -T reports/$*_unmapped_terms.txt -o components/$@ + $(ROBOT) filter -i components/$*.owl -T reports/$*_unmapped_terms.txt -o $@ rm reports/$*_unmapped_terms.txt .PHONY: recreate-unmapped-components From 481d20dce9ae5d2f0806073fb8f8e620eccebd33 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Sun, 3 Mar 2024 17:16:26 -0500 Subject: [PATCH 08/18] Unneeded HGNC SPARQL - Update: In makefile, in components goals, removed unnecessary robot query updates involving HGNC mappings. This query only applies to OMIM. --- src/ontology/mondo-ingest.Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 1d8ef5cb..521ae3d7 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -113,7 +113,6 @@ $(COMPONENTSDIR)/doid.owl: $(TMPDIR)/doid_relevant_signature.txt | component-dow remove -T $(TMPDIR)/doid_relevant_signature.txt --select complement --select "classes individuals" --trim false \ query \ --update ../sparql/fix_omimps.ru \ - --update ../sparql/fix_hgnc_mappings.ru \ --update ../sparql/fix-labels-with-brackets.ru \ --update ../sparql/fix_complex_reification.ru \ --update ../sparql/rm_xref_by_prefix.ru \ @@ -143,7 +142,6 @@ $(COMPONENTSDIR)/icd10cm.owl: $(TMPDIR)/icd10cm_relevant_signature.txt | compone query \ --update ../sparql/fix_omimps.ru \ --update ../sparql/fix-labels-with-brackets.ru \ - --update ../sparql/fix_hgnc_mappings.ru \ --update ../sparql/fix_complex_reification.ru \ remove -T config/properties.txt --select complement --select properties --trim true \ annotate --ontology-iri $(URIBASE)/mondo/sources/icd10cm.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd10cm.owl -o $@; fi @@ -158,7 +156,6 @@ $(COMPONENTSDIR)/icd10who.owl: $(TMPDIR)/icd10who_relevant_signature.txt | compo query \ --update ../sparql/fix_omimps.ru \ --update ../sparql/fix-labels-with-brackets.ru \ - --update ../sparql/fix_hgnc_mappings.ru \ --update ../sparql/fix_complex_reification.ru \ remove -T config/properties.txt --select complement --select properties --trim true \ annotate --ontology-iri $(URIBASE)/mondo/sources/icd10who.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd10who.owl -o $@; fi From 87e74a88fcc11ba22a7411a4a9036529ed77d8b8 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Sun, 3 Mar 2024 17:37:23 -0500 Subject: [PATCH 09/18] Redundant GARD goal - Update: mondo-ingest.Makefile, removal of unneeded component-download goal for GARD, exactly mirroring what is in Makefile. --- src/ontology/mondo-ingest.Makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 1d8ef5cb..aa91aa7d 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -163,11 +163,6 @@ $(COMPONENTSDIR)/icd10who.owl: $(TMPDIR)/icd10who_relevant_signature.txt | compo remove -T config/properties.txt --select complement --select properties --trim true \ annotate --ontology-iri $(URIBASE)/mondo/sources/icd10who.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd10who.owl -o $@; fi -.PHONY: component-download-gard.owl -component-download-gard.owl: | $(TMPDIR) - if [ $(MIR) = true ] && [ $(COMP) = true ]; then $(ROBOT) merge -I https://github.com/monarch-initiative/gard/releases/latest/download/gard.owl \ - annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $(TMPDIR)/$@.owl; fi - $(COMPONENTSDIR)/gard.owl: $(TMPDIR)/gard_relevant_signature.txt | component-download-gard.owl if [ $(COMP) = true ]; then $(ROBOT) remove -i $(TMPDIR)/component-download-gard.owl.owl --select imports \ remove -T $(TMPDIR)/gard_relevant_signature.txt --select complement --select "classes individuals" --trim false \ From d189132866c9d140795d6c8f0cd49f2f4fe46ae5 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Sun, 10 Mar 2024 17:16:32 -0400 Subject: [PATCH 10/18] Unneeded HGNC SPARQL - Delete: fix_hgnc_mappings.ru and references. It is no longer needed. --- src/ontology/mondo-ingest.Makefile | 1 - src/sparql/fix_hgnc_mappings.ru | 40 ------------------------------ 2 files changed, 41 deletions(-) delete mode 100644 src/sparql/fix_hgnc_mappings.ru diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 521ae3d7..6533f5ca 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -70,7 +70,6 @@ $(COMPONENTSDIR)/omim.owl: $(TMPDIR)/omim_relevant_signature.txt | component-dow query \ --update ../sparql/fix_omimps.ru \ --update ../sparql/fix-labels-with-brackets.ru \ - --update ../sparql/fix_hgnc_mappings.ru \ --update ../sparql/fix_complex_reification.ru \ --update ../sparql/fix_illegal_punning_omim.ru \ annotate --ontology-iri $(URIBASE)/mondo/sources/omim.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/omim.owl -o $@; fi diff --git a/src/sparql/fix_hgnc_mappings.ru b/src/sparql/fix_hgnc_mappings.ru deleted file mode 100644 index d7dc0888..00000000 --- a/src/sparql/fix_hgnc_mappings.ru +++ /dev/null @@ -1,40 +0,0 @@ -prefix oio: -prefix owl: -prefix oboInOwl: -prefix xref: - -#### https://github.com/monarch-initiative/omim/issues/45 - -DELETE { - ?entity owl:equivalentClass ?value . -} -WHERE -{ - VALUES ?entity { - - - - - - - - - - - - - - - - - - - - - - - - } - ?entity owl:equivalentClass ?value . - FILTER (isIRI(?value) && STRSTARTS(STR(?value),"https://www.ncbi.nlm.nih.gov/gene/")) -} From dcaefd78c5ca9fa32c515644a21b18490f58620d Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Tue, 12 Mar 2024 18:20:39 -0400 Subject: [PATCH 11/18] Docs: context.json - Update: add-new-source.md with instructions on updating this file. --- docs/developer/add-new-source.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/developer/add-new-source.md b/docs/developer/add-new-source.md index a06c56fe..b00c6faa 100644 --- a/docs/developer/add-new-source.md +++ b/docs/developer/add-new-source.md @@ -25,6 +25,9 @@ Prefixes need to be entered in the following places in the yml: ### 2.3. `config/prefixes.csv` Add prefixes. +### 2.4. `config/context.json` +Add prefixes. + ## 3. Docs ### 3.1. `mkdocs.yaml` Update the Website Table of Contents in [mkdocs.yaml](https://github.com/monarch-initiative/mondo-ingest/blob/main/mkdocs.yaml) From c7fbb35cb235e8ec9ddd421c237e335cca3fd74d Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 21 Mar 2024 18:57:29 -0400 Subject: [PATCH 12/18] Update/fix slurp comment --- src/scripts/migrate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scripts/migrate.py b/src/scripts/migrate.py index 994ac5a8..02899839 100644 --- a/src/scripts/migrate.py +++ b/src/scripts/migrate.py @@ -111,7 +111,8 @@ def slurp( # Determine slurpable / migratable terms # To be migratable, the term (i) must not already be mapped, (ii) must not be excluded (e.g. not in # `reports/%_term_exclusions.txt`), and (iii) must not be deprecated / obsolete. Then, unless - # `parent_conditions_off`, will also (iv) `_check_parent_conditions()`. + # `parent_conditions_off`, will also (iv) check parent conditions. For information about parent conditions, see the + # help text for `--parent-conditions-off`. terms_to_slurp: List[Dict[str, str]] = [] slurp_candidates = [t for t in slurp_candidates if _valid_parent_conditions( t.direct_owned_parent_curies, mapped, excluded, obsolete)] if not parent_conditions_off else slurp_candidates From ac9c70ddba67f85f6029f11c1929f54ce7602e19 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 21 Mar 2024 19:25:08 -0400 Subject: [PATCH 13/18] Bugfix: OMIMPS namespace Updated OMIMPS namespace to remove www, following change from source. --- src/ontology/config/context.json | 2 +- src/ontology/config/prefixes.csv | 2 +- src/ontology/metadata/doid.metadata.sssom.yml | 2 +- src/ontology/metadata/omim.yml | 4 ++-- src/sparql/fix_make_omim_exact.ru | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ontology/config/context.json b/src/ontology/config/context.json index 85abe98b..89834463 100644 --- a/src/ontology/config/context.json +++ b/src/ontology/config/context.json @@ -31,7 +31,7 @@ "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_", "ICD10CM": "http://purl.bioontology.org/ontology/ICD10CM/", "ICD10WHO": "http://apps.who.int/classifications/icd10/browse/2010/en#/", - "OMIMPS": "https://www.omim.org/phenotypicSeries/PS", + "OMIMPS": "https://omim.org/phenotypicSeries/PS", "MONDOREL": "http://purl.obolibrary.org/obo/mondo#" } } \ No newline at end of file diff --git a/src/ontology/config/prefixes.csv b/src/ontology/config/prefixes.csv index 2d6bbc19..75fb892f 100644 --- a/src/ontology/config/prefixes.csv +++ b/src/ontology/config/prefixes.csv @@ -250,7 +250,7 @@ semapv,https://w3id.org/semapv/vocab/ HGNC_SYMBOL,https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/ HGNC,https://identifiers.org/hgnc/ ncbi.gene,https://www.ncbi.nlm.nih.gov/gene/ -OMIMPS,https://www.omim.org/phenotypicSeries/PS +OMIMPS,https://omim.org/phenotypicSeries/PS STY,http://purl.bioontology.org/ontology/STY/ sssom,https://w3id.org/sssom/ biolink,https://w3id.org/biolink/vocab/ diff --git a/src/ontology/metadata/doid.metadata.sssom.yml b/src/ontology/metadata/doid.metadata.sssom.yml index 4397d026..46c1af35 100644 --- a/src/ontology/metadata/doid.metadata.sssom.yml +++ b/src/ontology/metadata/doid.metadata.sssom.yml @@ -9,7 +9,7 @@ curie_map: # MedDRA: https://identifiers.org/meddra/ MESH: https://meshb.nlm.nih.gov/record/ui?ui= OMIM: https://omim.org/entry/ - OMIMPS: https://www.omim.org/phenotypicSeries/PS + OMIMPS: https://omim.org/phenotypicSeries/PS # Orphanet: http://www.orpha.net/ORDO/Orphanet_ UMLS: http://linkedlifedata.com/resource/umls/id/ DOID: http://purl.obolibrary.org/obo/DOID_ diff --git a/src/ontology/metadata/omim.yml b/src/ontology/metadata/omim.yml index f87273d2..2b7dba88 100644 --- a/src/ontology/metadata/omim.yml +++ b/src/ontology/metadata/omim.yml @@ -2,7 +2,7 @@ id: OMIM label: Online Mendelian Inheritance in Man prefix_map: OMIM: https://omim.org/entry/ - OMIMPS: https://www.omim.org/phenotypicSeries/PS + OMIMPS: https://omim.org/phenotypicSeries/PS CHR: http://purl.obolibrary.org/obo/CHR_ CL: http://purl.obolibrary.org/obo/CL_ HGNC: "https://identifiers.org/hgnc:" @@ -28,4 +28,4 @@ description: > homepage: https://www.omim.org/ base_prefix_map: OMIM: https://omim.org/entry/ - OMIMPS: https://www.omim.org/phenotypicSeries/PS \ No newline at end of file + OMIMPS: https://omim.org/phenotypicSeries/PS \ No newline at end of file diff --git a/src/sparql/fix_make_omim_exact.ru b/src/sparql/fix_make_omim_exact.ru index d6d09ae8..47b0ba1d 100644 --- a/src/sparql/fix_make_omim_exact.ru +++ b/src/sparql/fix_make_omim_exact.ru @@ -29,6 +29,6 @@ WHERE FILTER( STRSTARTS(str(?value), "OMIM")) FILTER( !isBlank(?cls) && STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/DOID_")) - BIND(IRI(REPLACE(REPLACE(STR(?value), "OMIMPS:", "https://www.omim.org/phenotypicSeries/PS"), "OMIM:", "https://omim.org/entry/")) as ?iri) + BIND(IRI(REPLACE(REPLACE(STR(?value), "OMIMPS:", "https://omim.org/phenotypicSeries/PS"), "OMIM:", "https://omim.org/entry/")) as ?iri) } From 86b4abff6687c96765762d7644531a6df17263cc Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 21 Mar 2024 19:26:10 -0400 Subject: [PATCH 14/18] Update slurp/omim.tsv Updated file directly without running any prerequisites. Proof of bugfix on slurp following previous commit. --- src/ontology/slurp/omim.tsv | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/ontology/slurp/omim.tsv b/src/ontology/slurp/omim.tsv index ef77d1aa..999a975f 100644 --- a/src/ontology/slurp/omim.tsv +++ b/src/ontology/slurp/omim.tsv @@ -2,25 +2,13 @@ mondo_id mondo_label xref xref_source original_label definition parents ID LABEL A oboInOwl:hasDbXref >A oboInOwl:source SPLIT=| A IAO:0000115 SC % MONDO:0958222 maple syrup urine disease, iia 1b OMIM:620698 MONDO:equivalentTo maple syrup urine disease, iia 1b MONDO:0958223 maple syrup urine disease, iia 2 OMIM:620699 MONDO:equivalentTo maple syrup urine disease, iia 2 -MONDO:0958224 encephalopathy, porphyria-related OMIM:620704 MONDO:equivalentTo encephalopathy, porphyria-related -MONDO:0958225 epidermolytic hyperkeratosis 2b, autosomal recessive OMIM:620707 MONDO:equivalentTo epidermolytic hyperkeratosis 2b, autosomal recessive -MONDO:0958226 leukoencephalopathy, porphyria-related OMIM:620711 MONDO:equivalentTo leukoencephalopathy, porphyria-related +MONDO:0958225 epidermolytic hyperkeratosis 2b, autosomal recessive OMIM:620707 MONDO:equivalentTo epidermolytic hyperkeratosis 2b, autosomal recessive MONDO:0957316 MONDO:0958227 polydactyly-macrocephaly syndrome OMIM:620712 MONDO:equivalentTo polydactyly-macrocephaly syndrome -MONDO:0958228 deafness, autosomal recessive 122 OMIM:620714 MONDO:equivalentTo deafness, autosomal recessive 122 +MONDO:0958228 deafness, autosomal recessive 122 OMIM:620714 MONDO:equivalentTo deafness, autosomal recessive 122 MONDO:0019588 MONDO:0958229 bleeding disorder, vascular-type OMIM:620715 MONDO:equivalentTo bleeding disorder, vascular-type -MONDO:0958230 orofaciodigital syndrome 20 OMIM:620718 MONDO:equivalentTo orofaciodigital syndrome 20 -MONDO:0958231 neurodevelopmental disorder with motor abnormalities, seizures, and facial dysmorphism OMIM:620719 MONDO:equivalentTo neurodevelopmental disorder with motor abnormalities, seizures, and facial dysmorphism -MONDO:0958232 deafness, autosomal dominant 90 OMIM:620722 MONDO:equivalentTo deafness, autosomal dominant 90 -MONDO:0958233 bethlem myopathy 1b OMIM:620725 MONDO:equivalentTo bethlem myopathy 1b -MONDO:0958234 bethlem myopathy 1c OMIM:620726 MONDO:equivalentTo bethlem myopathy 1c -MONDO:0958235 ullrich congenital muscular dystrophy 1b OMIM:620727 MONDO:equivalentTo ullrich congenital muscular dystrophy 1b -MONDO:0958236 ullrich congenital muscular dystrophy 1c OMIM:620728 MONDO:equivalentTo ullrich congenital muscular dystrophy 1c -MONDO:0958237 hyperferritinemia OMIM:620729 MONDO:equivalentTo hyperferritinemia -MONDO:0958238 hyperemesis gravidarum, susceptibility to OMIM:620730 MONDO:equivalentTo hyperemesis gravidarum, susceptibility to -MONDO:0958239 microphthalmia/coloboma 11 OMIM:620731 MONDO:equivalentTo microphthalmia/coloboma 11 -MONDO:0958240 neurodevelopmental disorder with hyperkinetic movements, seizures, and structural brain abnormalities OMIM:620732 MONDO:equivalentTo neurodevelopmental disorder with hyperkinetic movements, seizures, and structural brain abnormalities -MONDO:0958241 cardiomyopathy, familial hypertrophic, 30, atrial OMIM:620734 MONDO:equivalentTo cardiomyopathy, familial hypertrophic, 30, atrial -MONDO:0958242 spermatogenic failure 90 OMIM:620744 MONDO:equivalentTo spermatogenic failure 90 -MONDO:0958277 deafness, autosomal recessive 123 OMIM:620745 MONDO:equivalentTo deafness, autosomal recessive 123 -MONDO:0958278 neurodevelopmental disorder with hypotonia and characteristic brain abnormalities OMIM:620746 MONDO:equivalentTo neurodevelopmental disorder with hypotonia and characteristic brain abnormalities -MONDO:0958279 megalencephaly-polydactyly syndrome OMIM:620748 MONDO:equivalentTo megalencephaly-polydactyly syndrome +MONDO:0958230 orofaciodigital syndrome 20 OMIM:620718 MONDO:equivalentTo orofaciodigital syndrome 20 MONDO:0015375 +MONDO:0958232 deafness, autosomal dominant 90 OMIM:620722 MONDO:equivalentTo deafness, autosomal dominant 90 MONDO:0019587 +MONDO:0958281 branchiootorenal syndrome OMIMPS:113650 MONDO:equivalentTo Branchiootorenal syndrome +MONDO:0958282 palmoplantar keratoderma, epidermolytic OMIMPS:144200 MONDO:equivalentTo Palmoplantar keratoderma, epidermolytic +MONDO:0958283 li-fraumeni syndrome OMIMPS:151623 MONDO:equivalentTo Li-Fraumeni syndrome +MONDO:0958284 branchiootic syndrome OMIMPS:602588 MONDO:equivalentTo Branchiootic syndrome From be67ecc3bfc54766828414826def239dc44b6f72 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Fri, 22 Mar 2024 18:13:56 -0400 Subject: [PATCH 15/18] ORDO sparql rename / update - Delete: Usages in modno-ingest.Makefile that were not necessary - Rename: of/to src/sparql/fix_complex_reification_ordo.ru. Renamed the file and its references. --- docs/sources/ordo.md | 2 +- src/ontology/metadata/ordo.yml | 4 ++-- src/ontology/mondo-ingest.Makefile | 6 +----- ...mplex_reification.ru => fix_complex_reification_ordo.ru} | 0 4 files changed, 4 insertions(+), 8 deletions(-) rename src/sparql/{fix_complex_reification.ru => fix_complex_reification_ordo.ru} (100%) diff --git a/docs/sources/ordo.md b/docs/sources/ordo.md index 0928a9e0..5477e10b 100644 --- a/docs/sources/ordo.md +++ b/docs/sources/ordo.md @@ -32,7 +32,7 @@ * **EntityRemoval**: Removing information that are on obsolete Mondo terms (MONDO:ObsoleteEquivalent). * **Update**: Updating the source with various SPARQL preprocessing steps * [MONDO_INGEST_QUERY:fix_deprecated.ru](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_deprecated.ru) - * [MONDO_INGEST_QUERY:fix_complex_reification.ru](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_complex_reification.ru) + * [MONDO_INGEST_QUERY:fix_complex_reification_ordo.ru](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_complex_reification_ordo.ru) * [MONDO_INGEST_QUERY:fix_xref_prefixes.ru](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_xref_prefixes.ru) * [MONDO_INGEST_QUERY:ordo-construct-subclass-from-part-of.ru](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/ordo-construct-subclass-from-part-of.ru) * [MONDO_INGEST_QUERY:ordo-construct-subsets.ru](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/ordo-construct-subsets.ru) diff --git a/src/ontology/metadata/ordo.yml b/src/ontology/metadata/ordo.yml index 19e7bd08..88b94056 100644 --- a/src/ontology/metadata/ordo.yml +++ b/src/ontology/metadata/ordo.yml @@ -50,8 +50,8 @@ preprocessing: queries: - id: MONDO_INGEST_QUERY:fix_deprecated.ru see_also: https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_deprecated.ru - - id: MONDO_INGEST_QUERY:fix_complex_reification.ru - see_also: https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_complex_reification.ru + - id: MONDO_INGEST_QUERY:fix_complex_reification_ordo.ru + see_also: https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_complex_reification_ordo.ru - id: MONDO_INGEST_QUERY:fix_xref_prefixes.ru see_also: https://github.com/monarch-initiative/mondo-ingest/blob/main/src/sparql/fix_xref_prefixes.ru - id: MONDO_INGEST_QUERY:ordo-construct-subclass-from-part-of.ru diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 099dc9e0..1559f1dd 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -70,7 +70,6 @@ $(COMPONENTSDIR)/omim.owl: $(TMPDIR)/omim_relevant_signature.txt | component-dow query \ --update ../sparql/fix_omimps.ru \ --update ../sparql/fix-labels-with-brackets.ru \ - --update ../sparql/fix_complex_reification.ru \ --update ../sparql/fix_illegal_punning_omim.ru \ annotate --ontology-iri $(URIBASE)/mondo/sources/omim.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/omim.owl -o $@; fi @@ -83,7 +82,7 @@ $(COMPONENTSDIR)/ordo.owl: $(TMPDIR)/ordo_relevant_signature.txt config/properti query \ --update ../sparql/fix_partof.ru \ --update ../sparql/fix_deprecated.ru \ - --update ../sparql/fix_complex_reification.ru \ + --update ../sparql/fix_complex_reification_ordo.ru \ --update ../sparql/fix_xref_prefixes.ru \ --update ../sparql/fix-labels-with-brackets.ru \ --update ../sparql/ordo-construct-subclass-from-part-of.ru \ @@ -113,7 +112,6 @@ $(COMPONENTSDIR)/doid.owl: $(TMPDIR)/doid_relevant_signature.txt | component-dow query \ --update ../sparql/fix_omimps.ru \ --update ../sparql/fix-labels-with-brackets.ru \ - --update ../sparql/fix_complex_reification.ru \ --update ../sparql/rm_xref_by_prefix.ru \ --update ../sparql/fix_make_omim_exact.ru \ remove -T config/properties.txt --select complement --select properties --trim true \ @@ -141,7 +139,6 @@ $(COMPONENTSDIR)/icd10cm.owl: $(TMPDIR)/icd10cm_relevant_signature.txt | compone query \ --update ../sparql/fix_omimps.ru \ --update ../sparql/fix-labels-with-brackets.ru \ - --update ../sparql/fix_complex_reification.ru \ remove -T config/properties.txt --select complement --select properties --trim true \ annotate --ontology-iri $(URIBASE)/mondo/sources/icd10cm.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd10cm.owl -o $@; fi @@ -155,7 +152,6 @@ $(COMPONENTSDIR)/icd10who.owl: $(TMPDIR)/icd10who_relevant_signature.txt | compo query \ --update ../sparql/fix_omimps.ru \ --update ../sparql/fix-labels-with-brackets.ru \ - --update ../sparql/fix_complex_reification.ru \ remove -T config/properties.txt --select complement --select properties --trim true \ annotate --ontology-iri $(URIBASE)/mondo/sources/icd10who.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd10who.owl -o $@; fi diff --git a/src/sparql/fix_complex_reification.ru b/src/sparql/fix_complex_reification_ordo.ru similarity index 100% rename from src/sparql/fix_complex_reification.ru rename to src/sparql/fix_complex_reification_ordo.ru From cfffd95e7fb6b4f8e250062c9675d17a0de4717f Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 15 Feb 2024 17:33:54 -0500 Subject: [PATCH 16/18] ICD11 config & docs - Rename: ICD11 -> ICD11Foundation - Rename prefix: icd11 -> icd11.foundation - Add prefixes: icd11.schema, icd11.z - Add: intensional exclusions TSV (currently empty) - Add: SPARQL query for selecting all diseases --- .../icd11foundation-relevant-diseases.sparql | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/sparql/icd11foundation-relevant-diseases.sparql diff --git a/src/sparql/icd11foundation-relevant-diseases.sparql b/src/sparql/icd11foundation-relevant-diseases.sparql new file mode 100644 index 00000000..e767f914 --- /dev/null +++ b/src/sparql/icd11foundation-relevant-diseases.sparql @@ -0,0 +1,32 @@ +PREFIX rdfs: +PREFIX owl: + + +### All diseases +SELECT DISTINCT ?term ?label ?deprecated +WHERE { + { + { + ?s1 ?p1 ?term . + ?term rdfs:subClassOf* . + OPTIONAL { + ?term rdfs:label ?label + } + OPTIONAL { + ?term owl:deprecated ?deprecated + } + } + UNION + { + ?term ?p2 ?o2 . + ?term rdfs:subClassOf* . + OPTIONAL { + ?term rdfs:label ?label + } + OPTIONAL { + ?term owl:deprecated ?deprecated + } + } + } + FILTER(isIRI(?term)) +} From 81a2d47da010c018d0a6cba6daff501961907bc3 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Tue, 30 Jan 2024 19:51:32 -0500 Subject: [PATCH 17/18] ICD11 config & docs - Update: mondo-ingest-odk.yaml: New entry for ICD11 in 'components' - Updates from running 'make update_repo': - modified: docs/odk-workflows/ManageDocumentation.md - modified: docs/odk-workflows/RepositoryFileStructure.md - modified: src/ontology/Makefile - modified: src/ontology/run.sh - new file: src/scripts/run-command.sh - modified: src/scripts/update_repo.sh - Add: metadata/icd11.yml - Update: prefixes.csv --- src/ontology/Makefile | 2 +- src/ontology/metadata/icd11.yml | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 src/ontology/metadata/icd11.yml diff --git a/src/ontology/Makefile b/src/ontology/Makefile index baa9ada9..ed84b4ff 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -54,7 +54,7 @@ OBODATE ?= $(shell date +'%d:%m:%Y %H:%M') VERSION= $(TODAY) ANNOTATE_ONTOLOGY_VERSION = annotate -V $(ONTBASE)/releases/$(VERSION)/$@ --annotation owl:versionInfo $(VERSION) ANNOTATE_CONVERT_FILE = annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) convert -f ofn --output $@.tmp.owl && mv $@.tmp.owl $@ -OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11foundation.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl +OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11foundation.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl ONTOLOGYTERMS = $(TMPDIR)/ontologyterms.txt EDIT_PREPROCESSED = $(TMPDIR)/$(ONT)-preprocess.owl diff --git a/src/ontology/metadata/icd11.yml b/src/ontology/metadata/icd11.yml new file mode 100644 index 00000000..2d2143c7 --- /dev/null +++ b/src/ontology/metadata/icd11.yml @@ -0,0 +1,13 @@ +id: ICD11 +label: International Classification of Diseases 11th Revision +prefix_map: + ICD11: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/ +description: > + The International Classification of Diseases (ICD) provides a common language that allows health professionals to + share standardized information across the world. The eleventh revision contains around 17 000 unique codes, more than + 120 000 codable terms and is now entirely digital.Feb 11, 2022 + + This data source in particular is the ICD11 foundation, not one of its linearizations. +homepage: https://icd.who.int/ +base_prefix_map: + ICD11: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/ From 738ab75d009b10d937ed2853e1de64b5863f210d Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 8 Feb 2024 18:24:53 -0500 Subject: [PATCH 18/18] ICD11 Ingest - Update: mondo-ingest.Makefile - Add: $(COMPONENTSDIR)/icd11.owl - Add: config/icd11foundation-property-map.sssom.tsv - Update: ICD11 docs - Update: config/properties.txt - Update: config/context.json - Update: metadata/mondo.sssom.config.yml: added icd11.foundation to subject_prefixes - Update: lexmatch-sssom-compare.py: entry for icd11 - Update: add-new-source.md: Instructions for additional configuration necessities. General - Bugfix: Slurp files were sometimes getting removed because they were considered intermediates and not .PRECIOUS. - Bugfix: reports/*_exclusion_reasons.robot.template.tsv files were getting removed by the build for same reason as above. --- docs/developer/add-new-source.md | 6 ++++ docs/sources/icd11foundation.md | 23 +++++++++---- src/ontology/Makefile | 2 +- src/ontology/config/context.json | 2 ++ .../icd11foundation-property-map.sssom.tsv | 5 +++ src/ontology/config/properties.txt | 2 ++ src/ontology/metadata/icd11.yml | 13 -------- src/ontology/metadata/icd11foundation.yml | 14 ++++++++ src/ontology/metadata/mondo.sssom.config.yml | 1 + src/ontology/mondo-ingest.Makefile | 13 ++++++++ src/scripts/lexmatch-sssom-compare.py | 5 ++- .../icd11foundation-relevant-diseases.sparql | 32 ------------------- 12 files changed, 64 insertions(+), 54 deletions(-) create mode 100644 src/ontology/config/icd11foundation-property-map.sssom.tsv delete mode 100644 src/ontology/metadata/icd11.yml delete mode 100644 src/sparql/icd11foundation-relevant-diseases.sparql diff --git a/docs/developer/add-new-source.md b/docs/developer/add-new-source.md index b00c6faa..1b663ad2 100644 --- a/docs/developer/add-new-source.md +++ b/docs/developer/add-new-source.md @@ -21,6 +21,7 @@ Add a new metadata file to [src/ontology/metadata](https://github.com/monarch-in Prefixes need to be entered in the following places in the yml: - `curie_map` - `extended_prefix_map` +- `subject_prefixes` ### 2.3. `config/prefixes.csv` Add prefixes. @@ -28,6 +29,11 @@ Add prefixes. ### 2.4. `config/context.json` Add prefixes. +### 2.5. `lexmatch-sssom-compare.py` +There is a section of branching logic with a comment "Map ontology filenames to prefixes". Add an entry there if either +(a) there is 1 prefix you care about, and it is spelled differently than the component filename (e.g. the prefix is +`myontology`, but the filename is `components/my-ontology.owl`), or (b) there is more than 1 prefix. + ## 3. Docs ### 3.1. `mkdocs.yaml` Update the Website Table of Contents in [mkdocs.yaml](https://github.com/monarch-initiative/mondo-ingest/blob/main/mkdocs.yaml) diff --git a/docs/sources/icd11foundation.md b/docs/sources/icd11foundation.md index 8ab32c53..289b6f50 100644 --- a/docs/sources/icd11foundation.md +++ b/docs/sources/icd11foundation.md @@ -2,20 +2,29 @@ **Source name:** International Classification of Diseases 11th Revision -**Source description:** The International Classification of Diseases (ICD) provides a common language that allows health professionals to share standardized information across the world. The eleventh revision contains around 17 000 unique codes, more than 120 000 codable terms and is now entirely digital.Feb 11, 2022 +**Source description:** The International Classification of Diseases (ICD) provides a common language that allows health +professionals to share standardized information across the world. The eleventh revision contains around 17 000 unique +codes, more than 120 000 codable terms and is now entirely digital.Feb 11, 2022 This data source in particular is the ICD11 foundation, not one of its linearizations. - **Homepage:** https://icd.who.int/ -**Comments about this source:** -Because the existing logical equivalence class axioms led to equivalence cliques (groups of distinct disease identifiers -that inferred to he semantically identical) we decided to strip out all equivalence class axiom from the foundation -prior to processing it in the ingest. - +**Comments about this source:** +_Data source_ +_Original source URL_: https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz +_Preprocessing_ +In the [monarch-initiative/icd11](https://github.com/monarch-initiative/icd11) repo, We remove unicode characters and +then remove equivalent class statements as discussed below. +_Equivalent classes_ +We remove all equivalent class statements as they are not unique and result in unintended node merges. For example +`icd11.foundation:2000662282` (_Occupant of pick-up truck or van injured in collision with car, pick-up truck or van: +person on outside of vehicle injured in traffic accident_) has the same exact equivalent concept expression as +`icd11.foundation:1279712844` (_Occupant of pick-up truck or van injured in collision with two- or three- wheeled motor +vehicle: person on outside of vehicle injured in traffic accident_). +--- The data pipeline that generates the source is implemented in `make`, in this source file: [src/ontology/mondo-ingest.Makefile](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/ontology/mondo-ingest.Makefile). diff --git a/src/ontology/Makefile b/src/ontology/Makefile index ed84b4ff..baa9ada9 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -54,7 +54,7 @@ OBODATE ?= $(shell date +'%d:%m:%Y %H:%M') VERSION= $(TODAY) ANNOTATE_ONTOLOGY_VERSION = annotate -V $(ONTBASE)/releases/$(VERSION)/$@ --annotation owl:versionInfo $(VERSION) ANNOTATE_CONVERT_FILE = annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) convert -f ofn --output $@.tmp.owl && mv $@.tmp.owl $@ -OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11foundation.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl +OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11foundation.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl ONTOLOGYTERMS = $(TMPDIR)/ontologyterms.txt EDIT_PREPROCESSED = $(TMPDIR)/$(ONT)-preprocess.owl diff --git a/src/ontology/config/context.json b/src/ontology/config/context.json index 89834463..793dff38 100644 --- a/src/ontology/config/context.json +++ b/src/ontology/config/context.json @@ -31,6 +31,8 @@ "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_", "ICD10CM": "http://purl.bioontology.org/ontology/ICD10CM/", "ICD10WHO": "http://apps.who.int/classifications/icd10/browse/2010/en#/", + "icd11.foundation": "http://id.who.int/icd/entity/", + "icd11.z": "http://who.int/icd#Z_", "OMIMPS": "https://omim.org/phenotypicSeries/PS", "MONDOREL": "http://purl.obolibrary.org/obo/mondo#" } diff --git a/src/ontology/config/icd11foundation-property-map.sssom.tsv b/src/ontology/config/icd11foundation-property-map.sssom.tsv new file mode 100644 index 00000000..2cfd32be --- /dev/null +++ b/src/ontology/config/icd11foundation-property-map.sssom.tsv @@ -0,0 +1,5 @@ +subject_id object_id +http://id.who.int/icd/schema/isObsolote owl:deprecated +http://id.who.int/icd/schema/longDefinition http://purl.org/dc/terms/description +http://id.who.int/icd/schema/note rdfs:comment +skos:definition IAO:0000115 diff --git a/src/ontology/config/properties.txt b/src/ontology/config/properties.txt index 33de4e8c..79b56639 100644 --- a/src/ontology/config/properties.txt +++ b/src/ontology/config/properties.txt @@ -20,6 +20,8 @@ http://www.w3.org/2004/02/skos/core#narrowMatch http://www.w3.org/2004/02/skos/core#relatedMatch http://www.w3.org/2004/02/skos/core#exactMatch http://www.w3.org/2004/02/skos/core#closeMatch +rdfs:comment rdfs:label rdfs:seeAlso owl:deprecated +http://purl.org/dc/terms/description diff --git a/src/ontology/metadata/icd11.yml b/src/ontology/metadata/icd11.yml deleted file mode 100644 index 2d2143c7..00000000 --- a/src/ontology/metadata/icd11.yml +++ /dev/null @@ -1,13 +0,0 @@ -id: ICD11 -label: International Classification of Diseases 11th Revision -prefix_map: - ICD11: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/ -description: > - The International Classification of Diseases (ICD) provides a common language that allows health professionals to - share standardized information across the world. The eleventh revision contains around 17 000 unique codes, more than - 120 000 codable terms and is now entirely digital.Feb 11, 2022 - - This data source in particular is the ICD11 foundation, not one of its linearizations. -homepage: https://icd.who.int/ -base_prefix_map: - ICD11: http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/ICD11/ diff --git a/src/ontology/metadata/icd11foundation.yml b/src/ontology/metadata/icd11foundation.yml index e3417a71..6e39f924 100644 --- a/src/ontology/metadata/icd11foundation.yml +++ b/src/ontology/metadata/icd11foundation.yml @@ -10,6 +10,20 @@ description: > 120 000 codable terms and is now entirely digital.Feb 11, 2022 This data source in particular is the ICD11 foundation, not one of its linearizations. +comments_about_this_source: > + _Data source_ + _Original source URL_: https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz + + _Preprocessing_ + In the [monarch-initiative/icd11](https://github.com/monarch-initiative/icd11) repo, We remove unicode characters and + then remove equivalent class statements as discussed below. + + _Equivalent classes_ + We remove all equivalent class statements as they are not unique and result in unintended node merges. For example + `icd11.foundation:2000662282` (_Occupant of pick-up truck or van injured in collision with car, pick-up truck or van: + person on outside of vehicle injured in traffic accident_) has the same exact equivalent concept expression as + `icd11.foundation:1279712844` (_Occupant of pick-up truck or van injured in collision with two- or three- wheeled motor + vehicle: person on outside of vehicle injured in traffic accident_). homepage: https://icd.who.int/ base_prefix_map: icd11.foundation: http://id.who.int/icd/entity/ diff --git a/src/ontology/metadata/mondo.sssom.config.yml b/src/ontology/metadata/mondo.sssom.config.yml index 2aefad03..0d95ed78 100644 --- a/src/ontology/metadata/mondo.sssom.config.yml +++ b/src/ontology/metadata/mondo.sssom.config.yml @@ -367,6 +367,7 @@ subject_prefixes: - EFO - ICD10CM - ICD10WHO + - icd11.foundation - OMIMPS - NCIT - DOID diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 099dc9e0..0b84a20e 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -159,6 +159,17 @@ $(COMPONENTSDIR)/icd10who.owl: $(TMPDIR)/icd10who_relevant_signature.txt | compo remove -T config/properties.txt --select complement --select properties --trim true \ annotate --ontology-iri $(URIBASE)/mondo/sources/icd10who.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd10who.owl -o $@; fi +$(COMPONENTSDIR)/icd11foundation.owl: $(TMPDIR)/icd11foundation_relevant_signature.txt | component-download-icd11foundation.owl + if [ $(COMP) = true ] ; then $(ROBOT) remove -i $(TMPDIR)/component-download-icd11foundation.owl.owl --select imports \ + rename --mappings config/property-map-1.sssom.tsv --allow-missing-entities true \ + rename --mappings config/icd11foundation-property-map.sssom.tsv \ + remove -T $(TMPDIR)/icd11foundation_relevant_signature.txt --select complement --select "classes individuals" --trim false \ + remove -T $(TMPDIR)/icd11foundation_relevant_signature.txt --select individuals \ + query \ + --update ../sparql/fix-labels-with-brackets.ru \ + remove -T config/properties.txt --select complement --select properties --trim true \ + annotate --ontology-iri $(URIBASE)/mondo/sources/icd11foundation.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd11foundation.owl -o $@; fi + $(COMPONENTSDIR)/gard.owl: $(TMPDIR)/gard_relevant_signature.txt | component-download-gard.owl if [ $(COMP) = true ]; then $(ROBOT) remove -i $(TMPDIR)/component-download-gard.owl.owl --select imports \ remove -T $(TMPDIR)/gard_relevant_signature.txt --select complement --select "classes individuals" --trim false \ @@ -246,6 +257,7 @@ $(REPORTDIR)/%_term_exclusions.txt $(REPORTDIR)/%_exclusion_reasons.robot.templa --config-path metadata/$*.yml \ --outpath-txt $(REPORTDIR)/$*_term_exclusions.txt \ --outpath-robot-template-tsv $(REPORTDIR)/$*_exclusion_reasons.robot.template.tsv +.PRECIOUS: $(REPORTDIR)/%_exclusion_reasons.robot.template.tsv $(REPORTDIR)/%_exclusion_reasons.ttl: component-download-%.owl $(REPORTDIR)/%_exclusion_reasons.robot.template.tsv $(ROBOT) template --input $(TMPDIR)/component-download-$*.owl.owl --add-prefixes config/context.json --template $(REPORTDIR)/$*_exclusion_reasons.robot.template.tsv --output $(REPORTDIR)/$*_exclusion_reasons.ttl @@ -476,6 +488,7 @@ slurp/%.tsv: $(COMPONENTSDIR)/%.owl $(TMPDIR)/mondo.sssom.tsv $(REPORTDIR)/%_map --mondo-terms-path $(REPORTDIR)/mirror_signature-mondo.tsv \ --slurp-dir-path slurp/ \ --outpath $@ +.PRECIOUS: slurp/%.tsv .PHONY: slurp-% slurp-%: slurp/%.tsv diff --git a/src/scripts/lexmatch-sssom-compare.py b/src/scripts/lexmatch-sssom-compare.py index a5914b31..e6b0da03 100644 --- a/src/scripts/lexmatch-sssom-compare.py +++ b/src/scripts/lexmatch-sssom-compare.py @@ -173,11 +173,14 @@ def extract_unmapped_matches(input: str, matches: TextIO, output_dir: str, summa ont_df_list = [] for _, ont in enumerate(input): + # Map ontology filenames to prefixes ont2 = ont.upper() if ont == "omim": ont2 = "|".join((["OMIM", "OMIMPS"])) elif ont == "ordo": ont2 = "|".join((["ORDO", "Orphanet"])) + elif ont == "icd11foundation": + ont2 = 'icd11.foundation' mondo_ont_df = msdf_mondo.df[condition_mondo_sssom_subj & msdf_mondo.df['object_id'].str.contains(ont2)] mondo_ont_lex_df = lex_df[(condition_lex_df_mondo_subj & lex_df['object_id'].str.contains(ont2))] @@ -201,7 +204,7 @@ def extract_unmapped_matches(input: str, matches: TextIO, output_dir: str, summa ont_df_list.append(unmapped_ont_df) - combined_df = pd.concat(ont_df_list) + combined_df = pd.concat(ont_df_list) if ont_df_list else pd.DataFrame() combined_msdf = MappingSetDataFrame( df=combined_df, converter=msdf_lex.converter, metadata=msdf_lex.metadata diff --git a/src/sparql/icd11foundation-relevant-diseases.sparql b/src/sparql/icd11foundation-relevant-diseases.sparql deleted file mode 100644 index e767f914..00000000 --- a/src/sparql/icd11foundation-relevant-diseases.sparql +++ /dev/null @@ -1,32 +0,0 @@ -PREFIX rdfs: -PREFIX owl: - - -### All diseases -SELECT DISTINCT ?term ?label ?deprecated -WHERE { - { - { - ?s1 ?p1 ?term . - ?term rdfs:subClassOf* . - OPTIONAL { - ?term rdfs:label ?label - } - OPTIONAL { - ?term owl:deprecated ?deprecated - } - } - UNION - { - ?term ?p2 ?o2 . - ?term rdfs:subClassOf* . - OPTIONAL { - ?term rdfs:label ?label - } - OPTIONAL { - ?term owl:deprecated ?deprecated - } - } - } - FILTER(isIRI(?term)) -}