Skip to content

Commit

Permalink
Address review comments and add sentence in the SOP
Browse files Browse the repository at this point in the history
  • Loading branch information
tcezard committed Jan 27, 2025
1 parent 6cf4f40 commit b73dd8b
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 13 deletions.
29 changes: 16 additions & 13 deletions bin/trait_mapping/get_children_with_keywords.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,54 +1,57 @@
#!/usr/bin/env python3

import argparse
from collections import defaultdict

from cmat.clinvar_xml_io.ontology_uri import OntologyUri
from cmat.trait_mapping.ols import build_ols_query
from cmat.trait_mapping.utils import json_request


def append_embedded(results, json_response):
if json_response and '_embedded' in json_response:
for key in json_response['_embedded']:
if key not in results:
results[key] = []
results[key].extend(json_response['_embedded'][key])


def query_and_depaginate(url):
json_response = json_request(url)
results = {}
results = defaultdict(list)
append_embedded(results, json_response)
while 'next' in json_response['_links']:
json_response = json_request(json_response['_links']['next']['href'])
append_embedded(results, json_response)
return results


def search_in(keywords, text):
return set((keyword for keyword in keywords if keyword in text))


def main():
parser = argparse.ArgumentParser('Search OLS for children of a term that match certain keywords in their label, description or synonyms')
parser.add_argument('--ontology', type=str, default='MONDO', help='Name of the Ontology to find the parent and children')
parser.add_argument('--parent_curie', type=str, help='Curie of the parent term', required=True)
parser.add_argument('--keywords', type=str, nargs='+', help="Words that must be present in the child's ontology label, description or synonyms to be reported")

args = parser.parse_args()


keywords = set(args.keywords)

db = args.ontology
parent_curie = args.parent_curie
url = build_ols_query(OntologyUri(parent_curie, db).uri)
results = query_and_depaginate(url)
for term in results['terms']:
if term['ontology_prefix'] == db:
results2 = query_and_depaginate(term['_links']['children']['href'])
for term in results2['terms']:
if term['ontology_prefix'] == db:
children_results = query_and_depaginate(term['_links']['children']['href'])
for child_term in children_results['terms']:
if child_term['ontology_prefix'] == db:
keyword_found = set()
keyword_found.update(search_in(keywords, term['label']))
keyword_found.update(search_in(keywords, term['description']))
for synonym in term['synonyms']:
keyword_found.update(search_in(keywords, child_term['label']))
keyword_found.update(search_in(keywords, child_term['description']))
for synonym in child_term['synonyms']:
keyword_found.update(search_in(keywords, synonym))
if keyword_found == keywords:
print(term['iri'], term['label'])
print(child_term['iri'], child_term['label'])


if __name__ == '__main__':
Expand Down
8 changes: 8 additions & 0 deletions docs/manual-curation/step2-manual-curation.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,11 @@ Terms for import do not require any additional manual intervention, but new term
* **MedGen, OMIM** - Links to the specified resource, useful references if any of the above cannot be found. These are often present in the "Suggested exact mapping" column.

Any additional comments can be left in the final column, they will be passed on to EFO.

Note: It is common that new terms are required to be inserted between a general term and more specific ones. The idea being that the new term would group a subset of the specific terms but not all of them.
To help with this a script was developed: given a parent CURIE it will search for all the children of that term that matches specific keyword in their label, description or synonyms.
This is useful for exampl when looking for all the terms that specifically labeled as "dominant" in a long list of children terms.

```bash
${PYTHON_BIN} ${CODE_ROOT}/bin/trait_mapping/get_children_with_keywords.py --ontology MONDO --parent_curie MONDO:0100062 --keywords dominant
```

0 comments on commit b73dd8b

Please sign in to comment.