Skip to content

Commit

Permalink
feat(*): new feature to exclude branches from trimming
Browse files Browse the repository at this point in the history
  • Loading branch information
valearna committed Apr 5, 2021
1 parent c5fbc3e commit 22904ba
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 2 deletions.
3 changes: 3 additions & 0 deletions genedescriptions/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class ConfigModuleProperty(Enum):
SLIM_URL = 15
SLIM_BONUS_PERC = 16
REMAP_TERMS = 17
DO_NOT_TRIM_BRANCH_AT = 18


class GenedescConfigParser(object):
Expand Down Expand Up @@ -104,6 +105,8 @@ def _get_module_property_name(prop: ConfigModuleProperty):
property_name = "slim_bonus_perc"
elif prop == ConfigModuleProperty.REMAP_TERMS:
property_name = "remap_terms"
elif prop == ConfigModuleProperty.DO_NOT_TRIM_BRANCH_AT:
property_name = "do_not_trim_branch_at"
return property_name

def get_prepostfix_sentence_map(self, module: Module, special_cases_only: bool = False, humans: bool = False):
Expand Down
23 changes: 21 additions & 2 deletions genedescriptions/descriptions_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,21 @@ def get_module_sentences(self, aspect: str, qualifier: str = '',
put_anatomy_male_at_end=True if aspect == 'A' else False)
return ModuleSentences(sentences)

def separate_do_not_trim_from_trim_terms(self, term_ids: List[str]):
branch_root_ids = self.config.get_module_property(module=self.module,
prop=ConfigModuleProperty.DO_NOT_TRIM_BRANCH_AT)
if branch_root_ids:
do_not_trim_terms = []
trim_terms = []
for term_id in term_ids:
if node_is_in_branch(ontology=self.ontology, node_id=term_id, branch_root_ids=branch_root_ids):
do_not_trim_terms.append(term_id)
else:
trim_terms.append(term_id)
return do_not_trim_terms, trim_terms
else:
return [], term_ids

def reduce_num_terms(self, terms: Set[str], min_distance_from_root: int = 0) -> TrimmingResult:
"""
Reduce the initial set of terms by resolving parent child relationships, deleting overlap with previous
Expand All @@ -177,11 +192,15 @@ def reduce_num_terms(self, terms: Set[str], min_distance_from_root: int = 0) ->
max_terms = self.config.get_module_property(module=self.module,
prop=ConfigModuleProperty.MAX_NUM_TERMS_IN_SENTENCE)
if 0 < max_terms < len(terms):
trimming_result = self.trimmer.trim(terms, max_terms, min_distance_from_root)
do_not_trim_terms, trim_terms = self.separate_do_not_trim_from_trim_terms(term_ids=terms)
trimming_result = self.trimmer.trim(trim_terms, max_terms, min_distance_from_root)
if do_not_trim_terms:
trimming_result.final_terms.extend(do_not_trim_terms)
trimming_result.covered_nodes.update(do_not_trim_terms)
else:
trimming_result.final_terms = terms
trimming_result.covered_nodes = terms
self.terms_already_covered.update(terms)
self.terms_already_covered.update(terms)
if self.config.get_module_property(module=self.module, prop=ConfigModuleProperty.DEL_CHILDREN_IF_PARENT):
trimming_result.final_terms = self.remove_children_if_parents_present(
terms=trimming_result.final_terms, ontology=self.ontology,
Expand Down
5 changes: 5 additions & 0 deletions genedescriptions/ontology_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,9 @@ def _set_information_content_in_subgraph(ontology: Ontology, root_id: str, maxle
relations=relations)


def node_is_in_branch(ontology: Ontology, node_id: str, branch_root_ids: List[str]):
branch_root_ids = set(branch_root_ids)
return any([parent_id in branch_root_ids for parent_id in ontology.ancestors(node=node_id, reflexive=True)])



6 changes: 6 additions & 0 deletions wormbase/config_wb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,9 @@ do_exp_sentences_options:
- "DOID:0080015"
- "DOID:0050117"
- "DOID:0080014"
do_not_trim_branch_at:
- "DOID:10652"
- "DOID:9884"
evidence_codes:
IMP:
group: EXPERIMENTAL
Expand Down Expand Up @@ -636,6 +639,9 @@ do_via_orth_sentences_options:
- "DOID:0080015"
- "DOID:0050117"
- "DOID:0080014"
do_not_trim_branch_at:
- "DOID:10652"
- "DOID:9884"
evidence_codes:
ISS:
group: ORTHOLOGY_BASED
Expand Down

0 comments on commit 22904ba

Please sign in to comment.