From 94688ccc00c1c5a7afbabf368d70a7a978313ad8 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 5 Oct 2023 19:36:02 -0400 Subject: [PATCH 01/39] Expose OntologyTermCollector module --- text2term/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/text2term/__init__.py b/text2term/__init__.py index 33b75b5..ad9f676 100644 --- a/text2term/__init__.py +++ b/text2term/__init__.py @@ -6,4 +6,5 @@ from .mapper import Mapper from .preprocess import preprocess_terms from .preprocess import preprocess_tagged_terms -from .tagged_terms import TaggedTerm \ No newline at end of file +from .tagged_term import TaggedTerm +from .term_collector import OntologyTermCollector From 3bbdefceae7c8a6fdcbf279dca9020ff22c20968 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 5 Oct 2023 20:03:17 -0400 Subject: [PATCH 02/39] Modify OntologyTerm to have a dictionary of complex OWL restrictions Add OntologyTermType enumeration , some missing docs and minor refactoring --- text2term/term.py | 93 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 13 deletions(-) diff --git a/text2term/term.py b/text2term/term.py index 4698982..334e218 100644 --- a/text2term/term.py +++ b/text2term/term.py @@ -1,9 +1,18 @@ -"""Provides OntologyTerm class""" +"""Provides OntologyTerm and OntologyTermType classes""" + +from enum import Enum + + +class OntologyTermType(str, Enum): + CLASS = "class" + PROPERTY = "property" + ANY = "any" class OntologyTerm: - def __init__(self, iri, labels, definitions=(), synonyms=(), parents=(), children=(), instances=(), deprecated=False, termtype='class'): + def __init__(self, iri, labels, definitions=(), synonyms=(), parents=(), children=(), instances=(), restrictions=(), + deprecated=False, term_type=OntologyTermType.CLASS): """ Constructor for a succinct representation of an ontology term :param iri: IRI of the ontology term @@ -13,6 +22,9 @@ def __init__(self, iri, labels, definitions=(), synonyms=(), parents=(), childre :param parents: Dictionary containing the IRIs of parent terms (superclasses) and their label(s) :param children: Dictionary containing the IRIs of child terms (subclasses) and their label(s) :param instances: Dictionary containing the IRIs of instances of the term (rdf:type) and their label(s) + :param restrictions: Dictionary containing complex class restrictions (such as located_in.Hand) on this term + :param deprecated: true if term is stated to be owl:deprecated, false otherwise + :param term_type: Type of term: class or property """ self._iri = iri self._labels = labels @@ -21,49 +33,103 @@ def __init__(self, iri, labels, definitions=(), synonyms=(), parents=(), childre self._parents = parents self._children = children self._instances = instances + self._restrictions = restrictions self._deprecated = deprecated - self._termtype = termtype + self._term_type = term_type @property def iri(self): + """ + Returns the IRI of this term + :return: str + """ return self._iri @property def labels(self): + """ + Returns the set of human-readable labels for the term specified using rdfs:label or skos:prefLabel properties + :return: set + """ return self._labels - @property - def synonyms(self): - return self._synonyms - @property def definitions(self): + """ + Returns the set of textual definitions of the term specified using either the skos:definition or the + IAO:0000115 ('definition') annotation properties + :return: set + """ return self._definitions + @property + def synonyms(self): + """ + Returns the set of synonyms of the term specified using obo:hasExactSynonym or ncit:P90 properties + :return: set + """ + return self._synonyms + @property def parents(self): + """ + Returns a dictionary containing the IRIs of parent terms as keys, and their respective labels as values + :return: dict + """ return self._parents @property def children(self): + """ + Returns a dictionary containing the IRIs of child terms as keys, and their respective labels as values + :return: dict + """ return self._children @property def instances(self): + """ + Returns a dictionary containing the IRIs of instance terms as keys, and their respective labels as values + :return: dict + """ return self._instances + @property + def restrictions(self): + """ + Returns a dictionary containing the IRIs of properties as keys, and the respective fillers as values + For example, for a restriction such as ':has_disease_location :pancreas', the dictionary would have: + {':has_disease_location': ':pancreas'} + For nested expressions such as 'has_disease_location (:pancreas or :liver);, the dictionary would have a string + representation of that expression (using owlready2s to_str): + {':has_disease_location': ':pancreas | :liver'} + :return: dict + """ + return self._restrictions + @property def label(self): - """Return a single label for this term""" + """ + Returns a single label for this term + :return: str + """ return next(iter(self.labels)) @property def deprecated(self): + """ + Returns true if this term is stated to be 'owl:deprecated True', false otherwise + :return: bool + """ return self._deprecated @property - def termtype(self): - return self._termtype + def term_type(self): + """ + Returns the ontology term type specified using OntologyTermType enum + :return: OntologyTermType + """ + return self._term_type def __eq__(self, other): if isinstance(other, OntologyTerm): @@ -74,6 +140,7 @@ def __hash__(self): return hash(str(self._iri)) def __str__(self): - return "Ontology Term: " + self.iri + ", Labels: " + str(self.labels) + ", Synonyms: " + \ - str(self.synonyms) + ", Definitions: " + str(self.definitions) + ", Parents: " + str(self.parents) + \ - ", Children: " + str(self.children) + ", Instances: " + str(self.instances) + return "Ontology Term: " + self.iri + ", Type: " + self.term_type + ", Labels: " + str(self.labels) + \ + ", Synonyms: " + str(self.synonyms) + ", Definitions: " + str(self.definitions) + \ + ", Parents: " + str(self.parents) + ", Children: " + str(self.children) + \ + ", Instances: " + str(self.instances) + ", Restrictions: " + str(self.restrictions) From c60bc2c0cc755397b74f12a4362eebb8e6b50979 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 5 Oct 2023 20:08:57 -0400 Subject: [PATCH 03/39] Modify OntologyTermCollector to include complex OWL restrictions --- text2term/term_collector.py | 89 ++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/text2term/term_collector.py b/text2term/term_collector.py index 4074728..3ee622f 100644 --- a/text2term/term_collector.py +++ b/text2term/term_collector.py @@ -2,7 +2,7 @@ from owlready2 import * from text2term import onto_utils -from text2term.term import OntologyTerm +from text2term.term import OntologyTerm, OntologyTermType import logging import bioregistry @@ -12,13 +12,15 @@ class OntologyTermCollector: def __init__(self, log_level=logging.INFO): self.logger = onto_utils.get_logger(__name__, level=log_level) - def get_ontology_terms(self, ontology_iri, base_iris=(), use_reasoning=False, exclude_deprecated=False, term_type="classes"): + def get_ontology_terms(self, ontology_iri, base_iris=(), use_reasoning=False, exclude_deprecated=False, + term_type=OntologyTermType.ANY): """ Collect the terms described in the ontology at the specified IRI :param ontology_iri: IRI of the ontology (e.g., path of ontology document in the local file system, URL) :param base_iris: Limit ontology term collection to terms whose IRIs start with any IRI given in this tuple :param use_reasoning: Use a reasoner to compute inferred class hierarchy :param exclude_deprecated: Exclude ontology terms stated as deprecated using owl:deprecated 'true' + :param term_type: Type of term--can be 'class' or 'property' or 'any' (individuals may be added in the future) :return: Dictionary of ontology term IRIs and their respective details in the specified ontology """ ontology = self._load_ontology(ontology_iri) @@ -48,7 +50,7 @@ def get_ontology_terms(self, ontology_iri, base_iris=(), use_reasoning=False, ex self.logger.debug("Unable to destroy ontology: ", err) return ontology_terms - def filter_terms(self, onto_terms, iris=(), excl_deprecated=False, term_type='classes'): + def filter_terms(self, onto_terms, iris=(), excl_deprecated=False, term_type=OntologyTermType.ANY): filtered_onto_terms = {} for base_iri, term in onto_terms.items(): if type(iris) == str: @@ -61,10 +63,10 @@ def filter_terms(self, onto_terms, iris=(), excl_deprecated=False, term_type='cl filtered_onto_terms.update({base_iri: term}) return filtered_onto_terms - def _get_ontology_signature(self, ontology, term_type='classes'): + def _get_ontology_signature(self, ontology): signature = list(ontology.classes()) signature.extend(list(ontology.properties())) - # ontology.classes() does not include classes in imported ontologies; we need to explicitly add them to our list + # owlready2::ontology.classes() does not include classes in imported ontologies; we need to explicitly add them for imported_ontology in ontology.imported_ontologies: signature.extend(list(imported_ontology.classes())) signature.extend(list(imported_ontology.properties())) @@ -80,55 +82,79 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type iri = ontology_term.iri labels = self._get_labels(ontology_term) synonyms = self._get_synonyms(ontology_term) - parents = self._get_parents(ontology_term) + named_parents, complex_parents = self._get_parents(ontology_term) children = self._get_children(ontology_term, ontology) instances = self._get_instances(ontology_term, ontology) definitions = self._get_definitions(ontology_term) is_deprecated = deprecated[ontology_term] == [True] - if self._filter_term_type(ontology_term, "classes", False): - termtype = 'class' - elif self._filter_term_type(ontology_term, "properties", False): - termtype = 'property' - else: - termtype = None + if self._filter_term_type(ontology_term, OntologyTermType.CLASS, False): + term_type = OntologyTermType.CLASS + elif self._filter_term_type(ontology_term, OntologyTermType.PROPERTY, False): + term_type = OntologyTermType.PROPERTY term_details = OntologyTerm(iri, labels, definitions=definitions, synonyms=synonyms, - parents=parents, children=children, instances=instances, - deprecated=is_deprecated, termtype=termtype) + parents=named_parents, children=children, instances=instances, + restrictions=complex_parents, deprecated=is_deprecated, term_type=term_type) ontology_terms[iri] = term_details else: self.logger.debug("Excluding deprecated ontology term: %s", ontology_term.iri) return ontology_terms def _filter_term_type(self, ontology_term, term_type, cached): - if term_type == 'classes': + if term_type == OntologyTermType.CLASS: if cached: - return ontology_term.termtype == 'class' + return ontology_term.term_type == OntologyTermType.CLASS else: - return not isinstance(ontology_term, PropertyClass) - elif term_type == 'properties': + return isinstance(ontology_term, ThingClass) + elif term_type == OntologyTermType.PROPERTY: if cached: - return ontology_term.termtype == 'property' + return ontology_term.term_type == OntologyTermType.PROPERTY else: return isinstance(ontology_term, PropertyClass) - elif term_type == 'both': + elif term_type == OntologyTermType.ANY: return True else: - raise ValueError("Option to include Properties or Classes is not valid") + raise ValueError("Invalid term-type option. Acceptable term types are: 'class' or 'property' or 'any'") def _get_parents(self, ontology_term): parents = dict() # named/atomic superclasses except owl:Thing + restrictions = dict() # restrictions are class expressions such as 'pancreatitis disease_has_location pancreas' try: all_parents = ontology_term.is_a # obtain direct parents of this entity for parent in all_parents: - # exclude OWL restrictions and owl:Thing and Self - if isinstance(parent, ThingClass) and parent is not Thing and parent is not ontology_term: - if len(parent.label) > 0: - parents.update({parent.iri: parent.label[0]}) - else: - parents.update({parent.iri: onto_utils.label_from_iri(parent.iri)}) + # exclude owl:Thing and Self + if parent is not Thing and parent is not ontology_term: + if isinstance(parent, ThingClass): # get named parents (i.e. classes with IRIs) + self._add_named_parent(parent, parents) + elif isinstance(parent, And): # get conjuncts and add them to the respective structures + for conjunct in parent.Classes: + if isinstance(conjunct, ThingClass): # if conjunct is a named class, add it to parents dict + self._add_named_parent(conjunct, parents) + else: + self._add_complex_parent(conjunct, restrictions) + elif isinstance(parent, Restriction): # get complex parents, i.e. restrictions or class expressions + self._add_complex_parent(parent, restrictions) except (AttributeError, ValueError) as err: self.logger.debug(err) - return parents + return parents, restrictions + + def _add_named_parent(self, parent, parents): + if len(parent.label) > 0: + parents.update({parent.iri: parent.label[0]}) + else: + parents.update({parent.iri: onto_utils.label_from_iri(parent.iri)}) + + def _add_complex_parent(self, parent, restrictions): + property_iri = parent.property.iri + if isinstance(parent.value, ThingClass): # the filler is a named term (i.e., it has an IRI) + value = parent.value.iri + else: # the filler is another complex class expression + value = parent.value + if property_iri in restrictions.keys(): + current_restrictions = restrictions[property_iri] + current_restrictions.add(value) + restrictions.update({property_iri: current_restrictions}) + else: + restrictions.update({property_iri: str(value)}) def _get_children(self, ontology_term, ontology): children = dict() @@ -175,7 +201,7 @@ def _get_labels(self, ontology_term): self.logger.debug("...collected %i labels and synonyms for %s", len(labels), ontology_term) return labels - def _get_synonyms(self, ontology_term, include_broad_synonyms=False): + def _get_synonyms(self, ontology_term, include_related_synonyms=False, include_broad_synonyms=False): """ Collect the synonyms of the given ontology term :param ontology_term: Ontology term @@ -185,12 +211,13 @@ def _get_synonyms(self, ontology_term, include_broad_synonyms=False): synonyms = set() for synonym in self._get_obo_exact_synonyms(ontology_term): synonyms.add(synonym) - for synonym in self._get_obo_related_synonyms(ontology_term): - synonyms.add(synonym) for nci_synonym in self._get_nci_synonyms(ontology_term): synonyms.add(nci_synonym) for efo_alt_term in self._get_efo_alt_terms(ontology_term): synonyms.add(efo_alt_term) + if include_related_synonyms: + for synonym in self._get_obo_related_synonyms(ontology_term): + synonyms.add(synonym) if include_broad_synonyms: for synonym in self._get_obo_broad_synonyms(ontology_term): synonyms.add(synonym) From fbfe58322da338b9db377c40534dd006be73f86b Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 5 Oct 2023 20:11:15 -0400 Subject: [PATCH 04/39] Rename tagged_terms -> tagged_term and fix some references --- text2term/{tagged_terms.py => tagged_term.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename text2term/{tagged_terms.py => tagged_term.py} (96%) diff --git a/text2term/tagged_terms.py b/text2term/tagged_term.py similarity index 96% rename from text2term/tagged_terms.py rename to text2term/tagged_term.py index 53d3441..f0204e1 100644 --- a/text2term/tagged_terms.py +++ b/text2term/tagged_term.py @@ -34,5 +34,5 @@ def get_source_term_id(self): return self.source_term_id def to_dict(self): - return {term : tags} + return {self.term: self.tags} \ No newline at end of file From f2eab117a1093162608fb5413c04f79b2e66782f Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 5 Oct 2023 20:17:14 -0400 Subject: [PATCH 05/39] Fix imports to deal with file rename --- text2term/preprocess.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/text2term/preprocess.py b/text2term/preprocess.py index 44e4f0f..2ef2838 100644 --- a/text2term/preprocess.py +++ b/text2term/preprocess.py @@ -1,13 +1,12 @@ import re -import os -from enum import Enum -from .tagged_terms import TaggedTerm +from .tagged_term import TaggedTerm + ## Tags should be stored with their terms in the same line, delineated by ";:;" ## ex: Age when diagnosed with (.*) ;:; age,diagnosis ## "Age when diagnosed with cancer" becomes: {"cancer", ["age", "diagnosis"]} -def preprocess_tagged_terms(file_path, template_path="", blocklist_path="", \ - blocklist_char='', rem_duplicates=False, separator=";:;"): +def preprocess_tagged_terms(file_path, template_path="", blocklist_path="", + blocklist_char='', rem_duplicates=False, separator=";:;"): # Seperate tags from the terms, put in TaggedTerm and add to list raw_terms = _get_values(file_path) terms = [] @@ -58,10 +57,10 @@ def preprocess_tagged_terms(file_path, template_path="", blocklist_path="", \ return processed_terms -def preprocess_terms(terms, template_path, output_file="", blocklist_path="", \ - blocklist_char='', rem_duplicates=False): + +def preprocess_terms(terms, template_path, output_file="", blocklist_path="", blocklist_char='', rem_duplicates=False): if isinstance(terms, str): - terms = _get_values(file_path) + terms = _get_values(file_path) # TODO: Unresolved reference 'file_path' # Form the templates as regular expressions template_strings = [] if template_path != "": @@ -96,6 +95,7 @@ def preprocess_terms(terms, template_path, output_file="", blocklist_path="", \ fp.write('\n'.join(processed_terms.values())) return processed_terms + ## Note: Because Python Dictionaries and Lists are passed by reference (sort of), updating the ## dictionary/list here will update the dictionary in the caller def _blocklist_term(processed_terms, term, blocklist, blocklist_char, tagged=False): @@ -110,20 +110,24 @@ def _blocklist_term(processed_terms, term, blocklist, blocklist_char, tagged=Fal return True return False + def _update_tagged_term(processed_terms, term, new_term, tags=[]): term.update_term(new_term) term.add_tags(tags) processed_terms.append(term) + def _get_values(path): return open(path).read().splitlines() + def _make_regex_list(strings): regexes = [] for string in strings: regexes.append(re.compile(string)) return regexes + def _remove_duplicates(terms): if type(terms) is dict: temp = {val : key for key, val in terms.items()} From eeb7f7e9a95613f0aff3c5f94cefa735ea0b6359 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 5 Oct 2023 20:20:29 -0400 Subject: [PATCH 06/39] Fix imports in t2t, minor fixes & shift docs to inside function --- text2term/t2t.py | 135 +++++++++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 58 deletions(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index 66f1233..14a7342 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -2,13 +2,12 @@ import sys import json import pickle -import time import datetime -import owlready2 import pandas as pd from text2term import onto_utils -from text2term.mapper import Mapper from text2term import onto_cache +from text2term.mapper import Mapper +from text2term.term import OntologyTermType from text2term.term_collector import OntologyTermCollector from text2term.term_graph_generator import TermGraphGenerator from text2term.bioportal_mapper import BioPortalAnnotatorMapper @@ -16,57 +15,60 @@ from text2term.tfidf_mapper import TFIDFMapper from text2term.zooma_mapper import ZoomaMapper from text2term.config import VERSION -from text2term.tagged_terms import TaggedTerm +from text2term.tagged_term import TaggedTerm from text2term.term_mapping import TermMapping IGNORE_TAGS = ["ignore", "Ignore", "ignore ", "Ignore "] UNMAPPED_TAG = "unmapped" -""" -Maps the terms in the given list to the specified target ontology. - -Parameters ----------- -source_terms : list - List of 'source' terms to map to ontology terms -target_ontology : str - Path or URL of 'target' ontology to map the source terms to. When the chosen mapper is BioPortal or Zooma, - provide a comma-separated list of ontology acronyms (eg 'EFO,HPO') or write 'all' to search all ontologies -base_iris : tuple - Map only to ontology terms whose IRIs start with one of the strings given in this tuple, for example: - ('http://www.ebi.ac.uk/efo','http://purl.obolibrary.org/obo/HP') -source_terms_ids : tuple - Collection of identifiers for the given source terms -excl_deprecated : bool - Exclude ontology terms stated as deprecated via `owl:deprecated true` -mapper : mapper.Mapper - Method used to compare source terms with ontology terms. One of: levenshtein, jaro, jarowinkler, jaccard, - fuzzy, tfidf, zooma, bioportal -max_mappings : int - Maximum number of top-ranked mappings returned per source term -min_score : float - Minimum similarity score [0,1] for the mappings (1=exact match) -output_file : str - Path to desired output file for the mappings -save_graphs : bool - Save vis.js graphs representing the neighborhood of each ontology term -save_mappings : bool - Save the generated mappings to a file (specified by `output_file`) - -Returns ----------- -df - Data frame containing the generated ontology mappings -""" + +# TODO missing parameters in docs def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_deprecated=False, max_mappings=3, - min_score=0.3, mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False, source_terms_ids=(), - separator=',', use_cache=False, term_type='classes', incl_unmapped=False): + min_score=0.3, mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False, + source_terms_ids=(), separator=',', use_cache=False, term_type=OntologyTermType.CLASS, + incl_unmapped=False): + """ + Maps the terms in the given list to the specified target ontology. + + Parameters + ---------- + source_terms : list + List of 'source' terms to map to ontology terms + target_ontology : str + Path or URL of 'target' ontology to map the source terms to. When the chosen mapper is BioPortal or Zooma, + provide a comma-separated list of ontology acronyms (eg 'EFO,HPO') or write 'all' to search all ontologies + base_iris : tuple + Map only to ontology terms whose IRIs start with one of the strings given in this tuple, for example: + ('http://www.ebi.ac.uk/efo','http://purl.obolibrary.org/obo/HP') + source_terms_ids : tuple + Collection of identifiers for the given source terms + excl_deprecated : bool + Exclude ontology terms stated as deprecated via `owl:deprecated true` + mapper : mapper.Mapper + Method used to compare source terms with ontology terms. One of: levenshtein, jaro, jarowinkler, jaccard, + fuzzy, tfidf, zooma, bioportal + max_mappings : int + Maximum number of top-ranked mappings returned per source term + min_score : float + Minimum similarity score [0,1] for the mappings (1=exact match) + output_file : str + Path to desired output file for the mappings + save_graphs : bool + Save vis.js graphs representing the neighborhood of each ontology term + save_mappings : bool + Save the generated mappings to a file (specified by `output_file`) + + Returns + ---------- + df + Data frame containing the generated ontology mappings + """ # Parse the possible source terms options and tags source_terms, source_term_ids, tags = _parse_source_terms(source_terms, source_terms_ids, csv_columns, separator) - # Create Source Term Ids if they are not provided + # Create source term IDs if they are not provided if len(source_terms_ids) != len(source_terms): if len(source_terms_ids) > 0: - sys.stderr.write("Warning: Source Term Ids are non-zero, but will not be used.") + sys.stderr.write("Warning: Source Term IDs are non-zero, but will not be used.") source_terms_ids = onto_utils.generate_iris(len(source_terms)) # Create the output file if output_file == '': @@ -81,17 +83,18 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_ mappings_df = _do_mapping(source_terms, source_terms_ids, target_terms, mapper, max_mappings, min_score, tags, incl_unmapped) mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3) if save_mappings: - _save_mappings(mappings_df, output_file, min_score, mapper, target_ontology, base_iris, \ - excl_deprecated, max_mappings, term_type, source_terms, incl_unmapped) + _save_mappings(mappings_df, output_file, min_score, mapper, target_ontology, base_iris, + excl_deprecated, max_mappings, term_type, source_terms, incl_unmapped) if save_graphs: _save_graphs(target_terms, output_file) return mappings_df + # Caches a single ontology def cache_ontology(ontology_url, ontology_acronym="", base_iris=()): if ontology_acronym == "": ontology_acronym = ontology_url - ontology_terms = _load_ontology(ontology_url, base_iris, exclude_deprecated=False, term_type='both') + ontology_terms = _load_ontology(ontology_url, base_iris, exclude_deprecated=False, term_type=OntologyTermType.ANY) cache_dir = "cache/" + ontology_acronym + "/" if not os.path.exists(cache_dir): os.makedirs(cache_dir) @@ -101,9 +104,12 @@ def cache_ontology(ontology_url, ontology_acronym="", base_iris=()): ontology_terms.clear() return onto_cache.OntologyCache(ontology_acronym) + """ PRIVATE/HELPER FUNCTIONS """ + + # Parses the source terms and returns what is to be mapped, the term ids, and the tags def _parse_source_terms(source_terms, source_terms_ids=(), csv_columns=(), separator=','): # If source_terms is a string, we assume it is a file location @@ -120,7 +126,7 @@ def _parse_source_terms(source_terms, source_terms_ids=(), csv_columns=(), separ source_terms_id_list = [] for tagged_term in source_terms: terms.append(tagged_term.get_term()) - if tagged_term.get_source_term_id() != None: + if tagged_term.get_source_term_id() is None: source_terms_id_list.append(tagged_term.get_source_term_id()) source_terms_ids = source_terms_id_list tags = source_terms @@ -129,11 +135,11 @@ def _parse_source_terms(source_terms, source_terms_ids=(), csv_columns=(), separ tags = dict.fromkeys(terms) return terms, source_terms_ids, tags + def _serialize_ontology(ontology_terms, ontology_acronym, cache_dir): - start = time.time() with open(cache_dir + ontology_acronym + "-term-details.pickle", 'wb+') as out_file: pickle.dump(ontology_terms, out_file) - end = time.time() + def _load_data(input_file_path, csv_column_names, separator): if len(csv_column_names) >= 1: @@ -148,19 +154,22 @@ def _load_data(input_file_path, csv_column_names, separator): term_ids = onto_utils.generate_iris(len(terms)) return terms, term_ids + def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_type='classes'): term_collector = OntologyTermCollector() if use_cache: - pickle_file = "cache/" + ontology + "/" + ontology + "-term-details.pickle" + pickle_file = os.path.join("cache", ontology, ontology + "-term-details.pickle") onto_terms_unfiltered = pickle.load(open(pickle_file, "rb")) onto_terms = term_collector.filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type) else: - onto_terms = term_collector.get_ontology_terms(ontology, base_iris=iris, exclude_deprecated=exclude_deprecated, term_type=term_type) + onto_terms = term_collector.get_ontology_terms(ontology, base_iris=iris, exclude_deprecated=exclude_deprecated, + term_type=term_type) if len(onto_terms) == 0: raise RuntimeError("Could not find any terms in the given ontology.") return onto_terms + def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped): to_map, tags = _process_tags(source_terms, tags) if mapper == Mapper.TFIDF: @@ -185,6 +194,7 @@ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappi df = _add_tags_to_df(mappings_df, tags) return df + # Takes in the tags and source terms and processes them accordingly def _process_tags(source_terms, tags): to_map = [] @@ -197,14 +207,17 @@ def _process_tags(source_terms, tags): if tag.get_term() == term: term_tags = tag.get_tags() break + # TODO: Local variable 'term_tags' might be referenced before assignmen if isinstance(term_tags, list): if not any(tag in IGNORE_TAGS for tag in term_tags): to_map.append(term) + # TODO: Local variable 'term_tags' might be referenced before assignmen else: if term_tags not in IGNORE_TAGS: to_map.append(term) return to_map, tags + def _add_tags_to_df(df, tags): if isinstance(tags, dict): for key, value in tags.items(): @@ -213,12 +226,13 @@ def _add_tags_to_df(df, tags): else: to_store = str(value) df.loc[df['Source Term'] == key, "Tags"] = to_store - else: + else: for term in tags: to_store = ','.join(term.get_tags()) df.loc[df['Source Term'] == term.get_term(), "Tags"] = to_store return df + def _filter_mappings(mappings_df, min_score): new_df = pd.DataFrame(columns=mappings_df.columns) for index, row in mappings_df.iterrows(): @@ -226,6 +240,7 @@ def _filter_mappings(mappings_df, min_score): new_df.loc[len(new_df.index)] = row return new_df + def _add_unmapped_terms(mappings_df, tags, source_terms, source_terms_ids): mapped = pd.unique(mappings_df["Source Term"]) for (term, term_id) in zip(source_terms, source_terms_ids): @@ -235,6 +250,7 @@ def _add_unmapped_terms(mappings_df, tags, source_terms, source_terms_ids): mappings_df.loc[len(mappings_df.index)] = non_mapping.to_dict() return mappings_df + def _add_tag(tags, term, to_add, ignore=False): if isinstance(tags, dict): new_tags = tags.get(term, []) @@ -254,14 +270,15 @@ def _add_tag(tags, term, to_add, ignore=False): if tagged_term.get_term() == term and check_ignore: tagged_term.add_tags([to_add]) -def _save_mappings(mappings, output_file, min_score, mapper, target_ontology, base_iris, \ - excl_deprecated, max_mappings, term_type, source_terms, incl_unmapped): + +def _save_mappings(mappings, output_file, min_score, mapper, target_ontology, base_iris, + excl_deprecated, max_mappings, term_type, source_terms, incl_unmapped): if os.path.dirname(output_file): # create output directories if needed os.makedirs(os.path.dirname(output_file), exist_ok=True) with open(output_file, "a") as f: - f.write("# Date and time run: %s\n" % datetime.datetime.now()) + f.write("# Timestamp: %s\n" % datetime.datetime.now()) f.write("# Target Ontology: %s\n" % target_ontology) - f.write("# Text2term version: %s\n" % VERSION) + f.write("# text2term version: %s\n" % VERSION) f.write("# Minimum Score: %.2f\n" % min_score) f.write("# Mapper: %s\n" % mapper.value) f.write("# Base IRIs: %s\n" % (base_iris,)) @@ -272,10 +289,12 @@ def _save_mappings(mappings, output_file, min_score, mapper, target_ontology, ba f.write("# Unmapped Terms ") f.write("Excluded\n" if not incl_unmapped else "Included\n") writestring = "# Of " + str(len(source_terms)) + " entries, " + str(len(pd.unique(mappings["Source Term ID"]))) - writestring += " were successfully mapped to " + str(len(pd.unique(mappings["Mapped Term IRI"]))) + " unique terms\n" + writestring += " were mapped to " + str( + len(pd.unique(mappings["Mapped Term IRI"]))) + " unique terms\n" f.write(writestring) mappings.to_csv(output_file, index=False, mode='a') + def _save_graphs(terms, output_file): term_graphs = TermGraphGenerator(terms).graphs_dicts() with open(output_file + "-term-graphs.json", 'w') as json_file: From 9a3f375780f548ab351a16053a0c943af3683268 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 5 Oct 2023 20:21:07 -0400 Subject: [PATCH 07/39] Bump version --- text2term/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text2term/config.py b/text2term/config.py index a2ded2f..189c03b 100644 --- a/text2term/config.py +++ b/text2term/config.py @@ -1 +1 @@ -VERSION = "3.0.2" +VERSION = "4.0.0" From ee5a0cc1753c8cd85b469eb049019b703354bbb7 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 6 Oct 2023 15:55:43 -0400 Subject: [PATCH 08/39] Minor changes to adhere to PEP 8 code style --- text2term/__main__.py | 2 +- text2term/onto_utils.py | 6 ++---- text2term/tagged_term.py | 2 +- text2term/term.py | 2 +- text2term/term_collector.py | 8 +++++--- text2term/term_graph_generator.py | 3 +++ text2term/tfidf_mapper.py | 3 ++- 7 files changed, 15 insertions(+), 11 deletions(-) diff --git a/text2term/__main__.py b/text2term/__main__.py index 39fa830..193f16d 100644 --- a/text2term/__main__.py +++ b/text2term/__main__.py @@ -9,7 +9,7 @@ parser = argparse.ArgumentParser(description='A tool for mapping free-text descriptions of (biomedical) ' 'entities to controlled terms in an ontology') parser.add_argument("-s", "--source", required=True, type=str, - help="Input file containing 'source' terms to map to ontology terms (list of terms or CSV file)") + help="Input file containing 'source' terms to map to ontology terms: list of terms or CSV file") parser.add_argument("-t", "--target", required=True, type=str, help="Path or URL of 'target' ontology to map source terms to. When the chosen mapper is " "BioPortal or Zooma, provide a comma-separated list of acronyms (eg 'EFO,HPO') or write " diff --git a/text2term/onto_utils.py b/text2term/onto_utils.py index 222f6c2..9cbd9ac 100644 --- a/text2term/onto_utils.py +++ b/text2term/onto_utils.py @@ -1,6 +1,4 @@ import logging -import re -import sys import pandas as pd import bioregistry import shortuuid @@ -21,8 +19,8 @@ 'later', 'trimester'} QUANTITY_WORDS = {'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'frequently', 'per', 'hour', 'day', 'week', 'month', - 'year', 'years', 'total', 'quantity', 'amount', 'level', 'levels', 'volume', 'count', 'counts', 'percentage', - 'abundance', 'proportion', 'content', 'average', 'prevalence', 'mean', 'ratio'} + 'year', 'years', 'total', 'quantity', 'amount', 'level', 'levels', 'volume', 'count', 'counts', + 'percentage', 'abundance', 'proportion', 'content', 'average', 'prevalence', 'mean', 'ratio'} def normalize_list(token_list): diff --git a/text2term/tagged_term.py b/text2term/tagged_term.py index f0204e1..20d6468 100644 --- a/text2term/tagged_term.py +++ b/text2term/tagged_term.py @@ -1,6 +1,6 @@ class TaggedTerm: - def __init__(self, term=None, tags=[], original_term=None, source_term_id=None): + def __init__(self, term=None, tags=(), original_term=None, source_term_id=None): self.term = term self.tags = tags self.original_term = original_term diff --git a/text2term/term.py b/text2term/term.py index 334e218..618bda8 100644 --- a/text2term/term.py +++ b/text2term/term.py @@ -1,4 +1,4 @@ -"""Provides OntologyTerm and OntologyTermType classes""" +"""Provides OntologyTerm class and OntologyTermType string enumeration""" from enum import Enum diff --git a/text2term/term_collector.py b/text2term/term_collector.py index 3ee622f..f96548b 100644 --- a/text2term/term_collector.py +++ b/text2term/term_collector.py @@ -35,7 +35,8 @@ def get_ontology_terms(self, ontology_iri, base_iris=(), use_reasoning=False, ex query = iri + "*" self.logger.info("...collecting terms with IRIs starting in: " + iri) iris = list(default_world.search(iri=query)) - ontology_terms = ontology_terms | self._get_ontology_terms(iris, ontology, exclude_deprecated, term_type) + ontology_terms = ontology_terms | self._get_ontology_terms(iris, ontology, exclude_deprecated, + term_type) else: ontology_signature = self._get_ontology_signature(ontology) ontology_terms = self._get_ontology_terms(ontology_signature, ontology, exclude_deprecated, term_type) @@ -93,7 +94,8 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type term_type = OntologyTermType.PROPERTY term_details = OntologyTerm(iri, labels, definitions=definitions, synonyms=synonyms, parents=named_parents, children=children, instances=instances, - restrictions=complex_parents, deprecated=is_deprecated, term_type=term_type) + restrictions=complex_parents, deprecated=is_deprecated, + term_type=term_type) ontology_terms[iri] = term_details else: self.logger.debug("Excluding deprecated ontology term: %s", ontology_term.iri) @@ -366,7 +368,7 @@ def _load_ontology(self, ontology_iri): self.logger.info("Loading ontology %s...", ontology_iri) start = time.time() owl_link = bioregistry.get_owl_download(ontology_iri) - if owl_link != None: + if owl_link is not None: ontology_iri = owl_link ontology = get_ontology(ontology_iri).load() end = time.time() diff --git a/text2term/term_graph_generator.py b/text2term/term_graph_generator.py index 231c602..c2a061b 100644 --- a/text2term/term_graph_generator.py +++ b/text2term/term_graph_generator.py @@ -1,6 +1,9 @@ +"""Provides TermGraphGenerator class""" + from text2term import onto_utils from text2term.term_graph import TermGraph, Node, Edge + class TermGraphGenerator: def __init__(self, terms): diff --git a/text2term/tfidf_mapper.py b/text2term/tfidf_mapper.py index 098c04f..2e5566f 100644 --- a/text2term/tfidf_mapper.py +++ b/text2term/tfidf_mapper.py @@ -28,7 +28,8 @@ def map(self, source_terms, source_terms_ids, max_mappings=3, min_score=0.3): Default set to 0, so consider all candidates """ self.logger.info("Mapping %i source terms...", len(source_terms)) - self.logger.info("...against %i ontology terms (%i labels/synonyms)", len(self.target_ontology_terms), len(self.target_labels)) + self.logger.info("...against %i ontology terms (%i labels/synonyms)", len(self.target_ontology_terms), + len(self.target_labels)) start = time.time() source_terms_norm = onto_utils.normalize_list(source_terms) vectorizer = self._tokenize(source_terms_norm, self.target_labels) From a7332ed6592f80bff3f6cb08be2014fe89109ae6 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 6 Oct 2023 16:06:58 -0400 Subject: [PATCH 09/39] Add documentation about parameters in map_terms Specify cache directory using os.path.join --- text2term/t2t.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index 14a7342..45946bd 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -22,7 +22,6 @@ UNMAPPED_TAG = "unmapped" -# TODO missing parameters in docs def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_deprecated=False, max_mappings=3, min_score=0.3, mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False, source_terms_ids=(), separator=',', use_cache=False, term_type=OntologyTermType.CLASS, @@ -35,11 +34,15 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_ source_terms : list List of 'source' terms to map to ontology terms target_ontology : str - Path or URL of 'target' ontology to map the source terms to. When the chosen mapper is BioPortal or Zooma, - provide a comma-separated list of ontology acronyms (eg 'EFO,HPO') or write 'all' to search all ontologies + Filepath or URL of 'target' ontology to map the source terms to. When the chosen mapper is BioPortal or Zooma, + provide a comma-separated list of ontology acronyms (eg 'EFO,HPO') or write 'all' to search all ontologies. + When the target ontology has been previously cached, provide the ontology name as used when it was cached base_iris : tuple Map only to ontology terms whose IRIs start with one of the strings given in this tuple, for example: ('http://www.ebi.ac.uk/efo','http://purl.obolibrary.org/obo/HP') + csv_columns : tuple + Name of column containing the terms to map, optionally followed by another column name containing the term IDs, + for example: ('disease', 'disease_identifier') source_terms_ids : tuple Collection of identifiers for the given source terms excl_deprecated : bool @@ -57,6 +60,14 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_ Save vis.js graphs representing the neighborhood of each ontology term save_mappings : bool Save the generated mappings to a file (specified by `output_file`) + separator : str + Symbol used to separate columns in the input table (eg ',' or '\t' for csv or tsv, respectively) + use_cache : bool + Use a previously cached ontology + term_type : OntologyTermType + The type(s) of ontology terms to map to, which can be 'class' or 'property' or 'any' + incl_unmapped : bool + Include unmapped terms in the output data frame Returns ---------- @@ -80,7 +91,8 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_ else: target_terms = _load_ontology(target_ontology, base_iris, excl_deprecated, use_cache, term_type) # Run the mapper - mappings_df = _do_mapping(source_terms, source_terms_ids, target_terms, mapper, max_mappings, min_score, tags, incl_unmapped) + mappings_df = _do_mapping(source_terms, source_terms_ids, target_terms, mapper, max_mappings, min_score, tags, + incl_unmapped) mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3) if save_mappings: _save_mappings(mappings_df, output_file, min_score, mapper, target_ontology, base_iris, @@ -95,7 +107,7 @@ def cache_ontology(ontology_url, ontology_acronym="", base_iris=()): if ontology_acronym == "": ontology_acronym = ontology_url ontology_terms = _load_ontology(ontology_url, base_iris, exclude_deprecated=False, term_type=OntologyTermType.ANY) - cache_dir = "cache/" + ontology_acronym + "/" + cache_dir = os.path.join("cache", ontology_acronym) if not os.path.exists(cache_dir): os.makedirs(cache_dir) @@ -207,11 +219,9 @@ def _process_tags(source_terms, tags): if tag.get_term() == term: term_tags = tag.get_tags() break - # TODO: Local variable 'term_tags' might be referenced before assignmen if isinstance(term_tags, list): if not any(tag in IGNORE_TAGS for tag in term_tags): to_map.append(term) - # TODO: Local variable 'term_tags' might be referenced before assignmen else: if term_tags not in IGNORE_TAGS: to_map.append(term) From 48cfbfb745b3c7ab1899aa00b3ad58cd103864f4 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 6 Oct 2023 16:10:06 -0400 Subject: [PATCH 10/39] Fix variable reference and typo --- text2term/preprocess.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/text2term/preprocess.py b/text2term/preprocess.py index 2ef2838..06cd199 100644 --- a/text2term/preprocess.py +++ b/text2term/preprocess.py @@ -7,14 +7,14 @@ ## "Age when diagnosed with cancer" becomes: {"cancer", ["age", "diagnosis"]} def preprocess_tagged_terms(file_path, template_path="", blocklist_path="", blocklist_char='', rem_duplicates=False, separator=";:;"): - # Seperate tags from the terms, put in TaggedTerm and add to list + # Separate tags from the terms, put in TaggedTerm and add to list raw_terms = _get_values(file_path) terms = [] for raw_term in raw_terms: - seperated = raw_term.split(separator) + separated = raw_term.split(separator) try: - tags = seperated[1].split(",") - term = TaggedTerm(original_term=seperated[0], tags=tags) + tags = separated[1].split(",") + term = TaggedTerm(original_term=separated[0], tags=tags) except IndexError: term = TaggedTerm(original_term=raw_term) terms.append(term) @@ -24,10 +24,10 @@ def preprocess_tagged_terms(file_path, template_path="", blocklist_path="", if template_path != "": raw_templates = _get_values(template_path) for raw_template in raw_templates: - seperated = raw_template.split(separator) + separated = raw_template.split(separator) try: - tags = seperated[1].split(",") - regex_term = re.compile(seperated[0]) + tags = separated[1].split(",") + regex_term = re.compile(separated[0]) templates[regex_term] = tags except IndexError: regex_term = re.compile(raw_template) @@ -60,7 +60,7 @@ def preprocess_tagged_terms(file_path, template_path="", blocklist_path="", def preprocess_terms(terms, template_path, output_file="", blocklist_path="", blocklist_char='', rem_duplicates=False): if isinstance(terms, str): - terms = _get_values(file_path) # TODO: Unresolved reference 'file_path' + terms = _get_values(terms) # if 'terms' is a string, we assume it is a filepath # Form the templates as regular expressions template_strings = [] if template_path != "": @@ -111,7 +111,7 @@ def _blocklist_term(processed_terms, term, blocklist, blocklist_char, tagged=Fal return False -def _update_tagged_term(processed_terms, term, new_term, tags=[]): +def _update_tagged_term(processed_terms, term, new_term, tags=()): term.update_term(new_term) term.add_tags(tags) processed_terms.append(term) From ccd9119a5029baebe4e366bdfb426060c9466301 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 6 Oct 2023 16:12:54 -0400 Subject: [PATCH 11/39] Remove no longer needed mapping functions Specify paths using os.path.join. Fix typo in function name. PEP8 style changes --- text2term/onto_cache.py | 65 +++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/text2term/onto_cache.py b/text2term/onto_cache.py index 7af3e40..a2ccc61 100644 --- a/text2term/onto_cache.py +++ b/text2term/onto_cache.py @@ -1,14 +1,19 @@ -import text2term -from .mapper import Mapper import os -from shutil import rmtree import sys -import pandas as pd +import text2term import owlready2 +import pandas as pd +from .term import OntologyTermType +from .mapper import Mapper +from shutil import rmtree + +CACHE_FOLDER = "cache" """ CACHING FUNCTIONS -- Public """ + + # Caches many ontologies from a csv def cache_ontology_set(ontology_registry_path): registry = pd.read_csv(ontology_registry_path) @@ -16,62 +21,46 @@ def cache_ontology_set(ontology_registry_path): for index, row in registry.iterrows(): try: cache = text2term.cache_ontology(row.url, row.acronym) - cache_set.update({row.acronym : cache}) + cache_set.update({row.acronym: cache}) except Exception as err: err_message = "Could not cache ontology " + row.acronym + " due to error: " + str(err) sys.stderr.write(err_message) owlready2.default_world.ontologies.clear() return cache_set + # Will check if an acronym exists in the cache def cache_exists(ontology_acronym=''): - return os.path.exists("cache/" + ontology_acronym) + return os.path.exists(os.path.join(CACHE_FOLDER, ontology_acronym)) + # Clears the cache def clear_cache(ontology_acronym=''): - cache_dir = "cache/" if ontology_acronym != '': - cache_dir = os.path.join(cache_dir, ontology_acronym) + cache_dir = os.path.join(CACHE_FOLDER, ontology_acronym) # Is equivalent to: rm -r cache_dir try: - rmtree(cache_dir) + rmtree(CACHE_FOLDER) sys.stderr.write("Cache has been cleared successfully\n") except OSError as error: sys.stderr.write("Cache cannot be removed:") - sys.stderr.write(error) + sys.stderr.write(str(error)) + -## Class that is returned to run +# Class that is returned to run class OntologyCache: def __init__(self, ontology_acronym): self.acronym = ontology_acronym - self.ontology = "cache/" + ontology_acronym + "/" + self.ontology = os.path.join(CACHE_FOLDER, ontology_acronym) def map_terms(self, source_terms, base_iris=(), excl_deprecated=False, max_mappings=3, min_score=0.3, - mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False, source_terms_ids=(), - term_type='classes'): - return text2term.map_terms(source_terms, self.acronym, base_iris=base_iris, \ - excl_deprecated=excl_deprecated, max_mappings=max_mappings, min_score=min_score, \ - mapper=mapper, output_file=output_file, save_graphs=save_graphs, \ - save_mappings=save_mappings, source_terms_ids=source_terms_ids, use_cache=True, \ - term_type=term_type) - - def map_tagged_terms(self, tagged_terms_dict, base_iris=(), excl_deprecated=False, max_mappings=3, min_score=0.3, - mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False, source_terms_ids=(), - term_type='classes'): - return text2term.map_tagged_terms(tagged_terms_dict, self.acronym, base_iris=base_iris, \ - excl_deprecated=excl_deprecated, max_mappings=max_mappings, min_score=min_score, \ - mapper=mapper, output_file=output_file, save_graphs=save_graphs, \ - save_mappings=save_mappings, source_terms_ids=source_terms_ids, use_cache=True, \ - term_type=term_type) - - def map_file(self, input_file, base_iris=(), csv_columns=(), excl_deprecated=False, max_mappings=3, - mapper=Mapper.TFIDF, min_score=0.3, output_file='', save_graphs=False, save_mappings=False, - separator=',', term_type='classes'): - return text2term.map_file(source_terms, self.acronym, base_iris=base_iris, csv_columns=csv_columns, \ - excl_deprecated=excl_deprecated, max_mappings=max_mappings, min_score=min_score, \ - mapper=mapper, output_file=output_file, save_graphs=save_graphs, separator=separator, \ - save_mappings=save_mappings, source_terms_ids=source_terms_ids, use_cache=True, \ - term_type=term_type) + mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False, source_terms_ids=(), + term_type=OntologyTermType.CLASS): + return text2term.map_terms(source_terms, self.acronym, base_iris=base_iris, + excl_deprecated=excl_deprecated, max_mappings=max_mappings, min_score=min_score, + mapper=mapper, output_file=output_file, save_graphs=save_graphs, + save_mappings=save_mappings, source_terms_ids=source_terms_ids, use_cache=True, + term_type=term_type) def clear_cache(self): clear_cache(self.acronym) @@ -79,5 +68,5 @@ def clear_cache(self): def cache_exists(self): return cache_exists(self.acronym) - def acroynm(self): + def acronym(self): return self.acronym From 1ea32a90e99cbc1e11cb9bdab3be3a054936c8ac Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 6 Oct 2023 17:42:59 -0400 Subject: [PATCH 12/39] Update docs of source_terms parameter in map_terms() Use os.path.join to specify file paths --- text2term/t2t.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index 45946bd..764c83d 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -31,8 +31,9 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_ Parameters ---------- - source_terms : list - List of 'source' terms to map to ontology terms + source_terms : str or list or dict + Path to file containing the terms to map to. Or list of terms to map to an ontology. Or dictionary containing + tagged terms, where the keys are the source terms and the values are tags attached to those terms target_ontology : str Filepath or URL of 'target' ontology to map the source terms to. When the chosen mapper is BioPortal or Zooma, provide a comma-separated list of ontology acronyms (eg 'EFO,HPO') or write 'all' to search all ontologies. @@ -112,7 +113,7 @@ def cache_ontology(ontology_url, ontology_acronym="", base_iris=()): os.makedirs(cache_dir) _serialize_ontology(ontology_terms, ontology_acronym, cache_dir) - _save_graphs(ontology_terms, output_file=cache_dir + ontology_acronym) + _save_graphs(ontology_terms, output_file=os.path.join(cache_dir, ontology_acronym)) ontology_terms.clear() return onto_cache.OntologyCache(ontology_acronym) @@ -149,7 +150,7 @@ def _parse_source_terms(source_terms, source_terms_ids=(), csv_columns=(), separ def _serialize_ontology(ontology_terms, ontology_acronym, cache_dir): - with open(cache_dir + ontology_acronym + "-term-details.pickle", 'wb+') as out_file: + with open(os.path.join(cache_dir, ontology_acronym + "-term-details.pickle"), 'wb+') as out_file: pickle.dump(ontology_terms, out_file) @@ -167,14 +168,13 @@ def _load_data(input_file_path, csv_column_names, separator): return terms, term_ids -def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_type='classes'): +def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_type=OntologyTermType.CLASS): term_collector = OntologyTermCollector() if use_cache: pickle_file = os.path.join("cache", ontology, ontology + "-term-details.pickle") onto_terms_unfiltered = pickle.load(open(pickle_file, "rb")) onto_terms = term_collector.filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type) else: - onto_terms = term_collector.get_ontology_terms(ontology, base_iris=iris, exclude_deprecated=exclude_deprecated, term_type=term_type) if len(onto_terms) == 0: From ae45a81389c5dfa62e381a46aea2060eb12be476 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 6 Oct 2023 19:06:52 -0400 Subject: [PATCH 13/39] Update README.md --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 08a52ee..f936def 100644 --- a/README.md +++ b/README.md @@ -65,22 +65,22 @@ text2term.map_terms(source_terms, save_mappings=False, separator=',', use_cache=False, - term_type='classes', + term_type=OntologyTermType.CLASS, incl_unmapped=False) ``` NOTE: As of 3.0.0, the former three functions (`map_file`, `map_terms`, `map_tagged_terms`) have been condensed into one function. Users can now change the name of any function in old code to `map_terms` and it reads the input context to maintain the functionality of each one. ### Arguments -For `map_terms`, the first argument can be any of the following: 1) a string that specifies a path to a file containing the terms to be mapped, 2) a list of the terms to be mapped, or 3)dictionary of terms to a list of tags, or a list of TaggedTerm objects (see below). +For `map_terms`, the first argument can be any of the following: 1) a string that specifies a path to a file containing the terms to be mapped, 2) a list of the terms to be mapped, or 3) a dictionary where the keys are the terms to be mapped, and values can be a list of tags or a list of TaggedTerm objects (see below). Currently, the tags do not affect the mapping in any way, but they are added to the output dataframe at the end of the process. The exception is the Ignore tag, which causes the term to not be mapped at all, but still be outputted in the results if the incl_unmapped argument is True (see below). All other arguments are the same, and have the same functionality: `target_ontology` : str - Path or URL of 'target' ontology to map the source terms to. When the chosen mapper is BioPortal or Zooma, - provide a comma-separated list of ontology acronyms (eg 'EFO,HPO') or write 'all' to search all ontologies - As of version 2.3.0, passing a recognized acronym to `target_ontology` will generate the download link automatically. This is done using the `bioregistry` python package. + Path or URL or acronym of 'target' ontology to map the source terms to. When the chosen mapper is BioPortal or Zooma, + provide a comma-separated list of ontology acronyms (eg 'EFO,HPO') or write 'all' to search all ontologies. When the target ontology has been previously cached, provide the ontology name that was used to cache it. + As of version 2.3.0, it is possible to specify ontology acronyms as the `target_ontology` (eg "EFO" or "CL"), which is achieved using [bioregistry](https://bioregistry.io) to retrieve URLs for those acronyms. `base_iris` : tuple Map only to ontology terms whose IRIs start with one of the strings given in this tuple, for example: @@ -116,16 +116,16 @@ All other arguments are the same, and have the same functionality: Save the generated mappings to a file (specified by `output_file`) `seperator` : str - Character that seperates the source term values if a file input is given. Ignored if the input is not a file path. + Character that separates the source term values if a file input is given. Ignored if the input is not a file path. `use_cache` : bool Use the cache for the ontology. More details are below. -`term_type` : str - Determines whether the ontology should be parsed for its classes (ThingClass), properties (PropertyClass), or both. Possible values are ['classes', 'properties', 'both']. If it does not match one of these values, the program will throw a ValueError. +`term_type` : term.OntologyTermType + Specifies whether to map to ontology classes, properties or any of the two. Possible values are ['class', 'property', 'any']. `incl_unmapped` : bool - Include all unmapped terms in the output. If something has been tagged Ignore (see below) or falls below the `min_score` threshold, it is included without a mapped term at the end of the output. + Include all unmapped terms in the output. If something has been tagged 'Ignore' (see below) or falls below the `min_score` threshold, it is included without a mapped term at the end of the output data frame. All default values, if they exist, can be seen above. From eeddaf2cad53a6077e66d708a1f68d73f1c6f7ff Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 6 Oct 2023 19:37:07 -0400 Subject: [PATCH 14/39] Fix error thrown when concatenating list and tuple --- text2term/tagged_term.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/text2term/tagged_term.py b/text2term/tagged_term.py index 20d6468..7891f63 100644 --- a/text2term/tagged_term.py +++ b/text2term/tagged_term.py @@ -1,3 +1,5 @@ +"""Provides TaggedTerm class""" + class TaggedTerm: def __init__(self, term=None, tags=(), original_term=None, source_term_id=None): @@ -10,7 +12,7 @@ def __repr__(self): return f" Date: Fri, 6 Oct 2023 19:38:08 -0400 Subject: [PATCH 15/39] Use enums for mapper and term_type where possible --- text2term/__main__.py | 5 +++-- text2term/preprocess.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/text2term/__main__.py b/text2term/__main__.py index 193f16d..df9863b 100644 --- a/text2term/__main__.py +++ b/text2term/__main__.py @@ -4,6 +4,7 @@ from t2t import map_terms, cache_ontology from onto_cache import cache_exists from mapper import Mapper +from term import OntologyTermType if __name__ == "__main__": parser = argparse.ArgumentParser(description='A tool for mapping free-text descriptions of (biomedical) ' @@ -16,7 +17,7 @@ "'all' to search all ontologies") parser.add_argument("-o", "--output", required=False, type=str, default="", help="Path to desired output file for the mappings (default=current working directory)") - parser.add_argument("-m", "--mapper", required=False, type=str, default="tfidf", + parser.add_argument("-m", "--mapper", required=False, type=str, default=Mapper.TFIDF, help="Method used to compare source terms with ontology terms. One of: " + str(Mapper.list()) + " (default=tfidf)") parser.add_argument("-csv", "--csv_input", required=False, type=str, default=(), @@ -38,7 +39,7 @@ help="Save vis.js graphs representing the neighborhood of each ontology term (default=False)") parser.add_argument("-c", "--store_in_cache", required=False, type=str, default="", help="Store the target ontology into local cache under acronym") - parser.add_argument("-type", "--term_type", required=False, type=str, default="classes", + parser.add_argument("-type", "--term_type", required=False, type=str, default=OntologyTermType.CLASS, help="Define whether to return ontology classes, properties, or both") arguments = parser.parse_args() diff --git a/text2term/preprocess.py b/text2term/preprocess.py index 06cd199..2e97883 100644 --- a/text2term/preprocess.py +++ b/text2term/preprocess.py @@ -44,12 +44,12 @@ def preprocess_tagged_terms(file_path, template_path="", blocklist_path="", for term in terms: if _blocklist_term(processed_terms, term, blocklist, blocklist_char, tagged=True): continue - for template, tem_tags in templates.items(): + for template, term_tags in templates.items(): match = template.fullmatch(term.get_original_term()) if match: combined_matches = ' '.join(map(str, match.groups())) if combined_matches: - _update_tagged_term(processed_terms, term, combined_matches, tem_tags) + _update_tagged_term(processed_terms, term, combined_matches, term_tags) break if rem_duplicates: From b9f776a2e9bb1cd3dc14ba5cce2ba571bca39852 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Sat, 7 Oct 2023 15:47:11 -0400 Subject: [PATCH 16/39] Add logging to t2t module --- text2term/t2t.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index 764c83d..3c9d8d1 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -1,7 +1,7 @@ import os -import sys import json import pickle +import logging import datetime import pandas as pd from text2term import onto_utils @@ -21,6 +21,8 @@ IGNORE_TAGS = ["ignore", "Ignore", "ignore ", "Ignore "] UNMAPPED_TAG = "unmapped" +LOGGER = onto_utils.get_logger(__name__, level=logging.INFO) + def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_deprecated=False, max_mappings=3, min_score=0.3, mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False, @@ -80,7 +82,8 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_ # Create source term IDs if they are not provided if len(source_terms_ids) != len(source_terms): if len(source_terms_ids) > 0: - sys.stderr.write("Warning: Source Term IDs are non-zero, but will not be used.") + LOGGER.warning(f"The number of Source Term IDs provided ({len(source_terms_ids)}) is different than the " + f"number of Source Terms ({len(source_terms)}). New Source Term IDs will be used instead.") source_terms_ids = onto_utils.generate_iris(len(source_terms)) # Create the output file if output_file == '': @@ -109,9 +112,9 @@ def cache_ontology(ontology_url, ontology_acronym="", base_iris=()): ontology_acronym = ontology_url ontology_terms = _load_ontology(ontology_url, base_iris, exclude_deprecated=False, term_type=OntologyTermType.ANY) cache_dir = os.path.join("cache", ontology_acronym) + LOGGER.info(f"Caching ontology {ontology_url} to: {cache_dir}") if not os.path.exists(cache_dir): os.makedirs(cache_dir) - _serialize_ontology(ontology_terms, ontology_acronym, cache_dir) _save_graphs(ontology_terms, output_file=os.path.join(cache_dir, ontology_acronym)) ontology_terms.clear() @@ -172,11 +175,13 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ term_collector = OntologyTermCollector() if use_cache: pickle_file = os.path.join("cache", ontology, ontology + "-term-details.pickle") + LOGGER.info(f"Loading cached ontology from: {pickle_file}") onto_terms_unfiltered = pickle.load(open(pickle_file, "rb")) onto_terms = term_collector.filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type) else: onto_terms = term_collector.get_ontology_terms(ontology, base_iris=iris, exclude_deprecated=exclude_deprecated, term_type=term_type) + LOGGER.info(f"Filtered ontology terms to those of type: {term_type}") if len(onto_terms) == 0: raise RuntimeError("Could not find any terms in the given ontology.") return onto_terms From 5b69d13edabab5aff2843137d67f8ee45f07409a Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Sat, 7 Oct 2023 15:50:52 -0400 Subject: [PATCH 17/39] Rename variable used both for term type and term type filter --- text2term/term_collector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/text2term/term_collector.py b/text2term/term_collector.py index f96548b..85a5d61 100644 --- a/text2term/term_collector.py +++ b/text2term/term_collector.py @@ -89,13 +89,13 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type definitions = self._get_definitions(ontology_term) is_deprecated = deprecated[ontology_term] == [True] if self._filter_term_type(ontology_term, OntologyTermType.CLASS, False): - term_type = OntologyTermType.CLASS + owl_term_type = OntologyTermType.CLASS elif self._filter_term_type(ontology_term, OntologyTermType.PROPERTY, False): - term_type = OntologyTermType.PROPERTY + owl_term_type = OntologyTermType.PROPERTY term_details = OntologyTerm(iri, labels, definitions=definitions, synonyms=synonyms, parents=named_parents, children=children, instances=instances, restrictions=complex_parents, deprecated=is_deprecated, - term_type=term_type) + term_type=owl_term_type) ontology_terms[iri] = term_details else: self.logger.debug("Excluding deprecated ontology term: %s", ontology_term.iri) From 480952a3c17777c2fa4dcac5df53de32b40cbe3b Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Sat, 7 Oct 2023 18:28:51 -0400 Subject: [PATCH 18/39] Update ontologies with latest releases. Add FoodOn --- text2term/resources/ontologies.csv | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/text2term/resources/ontologies.csv b/text2term/resources/ontologies.csv index 77edfb6..910acbd 100644 --- a/text2term/resources/ontologies.csv +++ b/text2term/resources/ontologies.csv @@ -1,11 +1,9 @@ acronym,version,url -CLO,2.1.178,http://purl.obolibrary.org/obo/clo.owl -CL,9/15/22,http://purl.obolibrary.org/obo/cl/releases/2022-09-15/cl.owl -EFO,3.46.0,https://github.com/EBISPOT/efo/releases/download/v3.46.0/efo.owl -GO,9/19/22,http://purl.obolibrary.org/obo/go/releases/2022-09-19/go.owl -HPO,6/11/22,http://purl.obolibrary.org/obo/hp/releases/2022-06-11/hp.owl -MONDO,8/1/22,http://purl.obolibrary.org/obo/mondo/releases/2022-08-01/mondo.owl -NCIT,22.07d,http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl -PRO,67,http://purl.obolibrary.org/obo/pr/67.0/pr.owl -UBERON,8/19/22,http://purl.obolibrary.org/obo/uberon/releases/2022-08-19/uberon.owl -MP,8/4/22,http://purl.obolibrary.org/obo/mp/releases/2022-08-04/mp.owl \ No newline at end of file +CL,2023-09-21,https://github.com/obophenotype/cell-ontology/releases/download/v2023-09-21/cl.owl +EFO,3.57.0,https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl +FOODON,0.6.0,https://github.com/FoodOntology/foodon/raw/v0.6.0/foodon.owl +GO,2023-07-27,http://purl.obolibrary.org/obo/go/releases/2023-07-27/go.owl +HPO,2023-09-01,https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-09-01/hp.owl +MONDO,2023-09-12,https://github.com/monarch-initiative/mondo/releases/download/v2023-08-02/mondo.owl +NCIT,2022-08-19,https://github.com/NCI-Thesaurus/thesaurus-obo-edition/releases/download/v2022-08-19/ncit.owl +UBERON,2023-09-05,https://github.com/obophenotype/uberon/releases/download/v2023-09-05/uberon.owl \ No newline at end of file From 9496ec2792e807a53cb1aa42cf736b406cc3a265 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 15:37:31 -0400 Subject: [PATCH 19/39] Add some tests and documentation about the tests --- test/simple-test.py | 119 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 101 insertions(+), 18 deletions(-) diff --git a/test/simple-test.py b/test/simple-test.py index be7ddaa..2e6a6f3 100644 --- a/test/simple-test.py +++ b/test/simple-test.py @@ -1,21 +1,104 @@ +import os +import pandas as pd import text2term -import bioregistry - -def main(): - efo = "http://www.ebi.ac.uk/efo/efo.owl#" - pizza = "https://protege.stanford.edu/ontologies/pizza/pizza.owl" - ncit = "http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl" - if not text2term.cache_exists("EFO"): - cached_onto = text2term.cache_ontology("EFO") - # df = cached_onto.map_terms(["asthma", "disease location", "obsolete food allergy"], excl_deprecated=True, term_type="classes") - print("Cache exists:", cached_onto.cache_exists()) - # caches = text2term.cache_ontology_set("text2term/resources/ontologies.csv") - # df = text2term.map_terms(["asthma", "disease location", "obsolete food allergy"], "EFO", min_score=.8, mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, term_type="classes") - # df = text2term.map_terms(["contains", "asthma"], "EFO", term_type="classes") - df = text2term.map_terms({"asthma":"disease", "allergy":["ignore", "response"], "assdhfbswif":["sent"], "isdjfnsdfwd":None}, "EFO", excl_deprecated=True, use_cache=True, incl_unmapped=True) - # taggedterms = text2term.preprocess_tagged_terms("test/simple_preprocess.txt") - # df = text2term.map_terms(taggedterms, "EFO", excl_deprecated=True, use_cache=True, incl_unmapped=True) - print(df.to_string()) +from term import OntologyTermType + +pd.set_option('display.max_columns', None) + + +def run_tests(): + efo_url = "https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl" + pizza = "https://protege.stanford.edu/ontologies/pizza/pizza.owl" + ncit = "http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl" + hpo = "http://purl.obolibrary.org/obo/hp/releases/2022-06-11/hp.owl" + ecto = "http://purl.obolibrary.org/obo/ecto/releases/2022-12-12/ecto.owl" + + # ONTOLOGY CACHING + # Test caching an ontology loaded from a URL + print("Test caching an ontology loaded from a URL...") + efo_cache = text2term.cache_ontology(ontology_url=efo_url, ontology_acronym="EFO") + print(f"Cache exists: {efo_cache.cache_exists()}\n") + + # Test caching an ontology by resolving its acronym using bioregistry + print("Test caching an ontology by resolving its acronym using bioregistry...") + clo_cache = text2term.cache_ontology(ontology_url="CLO", ontology_acronym="CLO") + print(f"Cache exists: {clo_cache.cache_exists()}\n") + print() + + # Test caching the set of ontologies specified in resources/ontologies.csv + caches = text2term.cache_ontology_set(os.path.join("..", "text2term", "resources", "ontologies.csv")) + + # MAPPING TO A (CACHED) ONTOLOGY + # Test mapping a list of terms to cached EFO ontology + print("Test mapping a list of terms to cached EFO ontology...") + mappings_efo_cache = efo_cache.map_terms(["asthma", "disease location", "food allergy"], + term_type=OntologyTermType.ANY) + print(f"{mappings_efo_cache}\n") + + # Test mapping a list of terms to EFO loaded from a URL + print("Test mapping a list of terms to EFO loaded from a URL...") + mappings_efo_url = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology=efo_url, + term_type=OntologyTermType.ANY) + print(f"{mappings_efo_url}\n") + + # Test that mapping to cached ontology is the same as to ontology loaded from its URL + print("Test that mapping to cached ontology is the same as to ontology loaded from its URL...") + mappings_match = test_df_equals(drop_source_term_ids(mappings_efo_cache), + drop_source_term_ids(mappings_efo_url)) + print(f"...{mappings_match}") + + # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric + print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...") + df1 = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", min_score=.8, + mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, + term_type=OntologyTermType.ANY) + print(f"{df1}\n") + + # Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry + print("Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry") + df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS) + print(f"{df2}\n") + + # TAGGED TERMS + # Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output + print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...") + df3 = text2term.map_terms( + {"asthma": "disease", "allergy": ["ignore", "response"], "protein level": ["measurement"], "isdjfnsdfwd": None}, + target_ontology="EFO", excl_deprecated=True, use_cache=True, incl_unmapped=True) + print(f"{df3}\n") + + # Test processing tagged terms where the tags are provided in a file + print("Test processing tagged terms where the tags are provided in a file...") + tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt") + df4 = text2term.map_terms(tagged_terms, target_ontology="EFO", use_cache=True, incl_unmapped=True) + print(f"{df4}\n") + + # MAPPING TO PROPERTIES + # Test mapping a list of properties to EFO loaded from a URL and restrict search to properties + print("Test mapping a list of properties to EFO loaded from a URL and restrict search to properties...") + df5 = text2term.map_terms(source_terms=["contains", "location"], target_ontology=efo_url, + term_type=OntologyTermType.PROPERTY) + print(f"{df5}\n") + + # Test mapping a list of properties to EFO loaded from cache and restrict search to properties + print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...") + df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True, + term_type=OntologyTermType.PROPERTY) + print(f"{df6}\n") + + # Test that mapping to properties in cached ontology is the same as to ontology loaded from its URL + properties_df_match = test_df_equals(drop_source_term_ids(df5), drop_source_term_ids(df6)) + print(f"...{properties_df_match}") + + +def drop_source_term_ids(df): + return df.drop('Source Term ID', axis=1) + + +def test_df_equals(df, expected_df): + pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True) + return True + if __name__ == '__main__': - main() \ No newline at end of file + run_tests() From 8af2a07a8ac38fab51b7f6a69c4b8add9bd529ca Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 15:38:18 -0400 Subject: [PATCH 20/39] Rename to simple_tests.py --- test/{simple-test.py => simple_tests.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/{simple-test.py => simple_tests.py} (100%) diff --git a/test/simple-test.py b/test/simple_tests.py similarity index 100% rename from test/simple-test.py rename to test/simple_tests.py From aaf253f043de2822b3d22fea80654957e8e0c411 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 16:05:47 -0400 Subject: [PATCH 21/39] Move some logging from individual mappers to t2t --- text2term/bioportal_mapper.py | 3 --- text2term/syntactic_mapper.py | 5 ----- text2term/t2t.py | 4 ++++ text2term/tfidf_mapper.py | 7 ------- text2term/zooma_mapper.py | 4 ---- 5 files changed, 4 insertions(+), 19 deletions(-) diff --git a/text2term/bioportal_mapper.py b/text2term/bioportal_mapper.py index dedcb3e..2e08bf0 100644 --- a/text2term/bioportal_mapper.py +++ b/text2term/bioportal_mapper.py @@ -30,12 +30,9 @@ def map(self, source_terms, source_terms_ids, ontologies, max_mappings=3, api_pa :param max_mappings: The maximum number of (top scoring) ontology term mappings that should be returned :param api_params: Additional BioPortal Annotator-specific parameters to include in the request """ - self.logger.info("Mapping %i source terms against ontologies: %s...", len(source_terms), ontologies) - start = time.time() mappings = [] for term, term_id in zip(source_terms, source_terms_ids): mappings.extend(self._map_term(term, term_id, ontologies, max_mappings, api_params)) - self.logger.info('done (mapping time: %.2fs seconds)', time.time()-start) return TermMappingCollection(mappings).mappings_df() def _map_term(self, source_term, source_term_id, ontologies, max_mappings, api_params): diff --git a/text2term/syntactic_mapper.py b/text2term/syntactic_mapper.py index a9ab4ff..5316303 100644 --- a/text2term/syntactic_mapper.py +++ b/text2term/syntactic_mapper.py @@ -1,7 +1,6 @@ """Provides SyntacticMapper class""" import logging -import time import nltk import rapidfuzz from tqdm import tqdm @@ -26,14 +25,10 @@ def map(self, source_terms, source_terms_ids, mapper=Mapper.JARO_WINKLER, max_ma :param mapper: Mapping method to be used for matching :param max_mappings: Maximum number of (top scoring) ontology term mappings that should be returned """ - self.logger.info("Mapping %i source terms...", len(source_terms)) - start = time.time() mappings = [] for term, term_id in tqdm(zip(source_terms, source_terms_ids), total=len(source_terms)): matches = self._map(term, term_id, mapper, max_mappings) mappings.extend(matches) - end = time.time() - self.logger.info('done (mapping time: %.2fs seconds)', end - start) return TermMappingCollection(mappings).mappings_df() def _map(self, source_term, source_term_id, mapper, max_matches=3): diff --git a/text2term/t2t.py b/text2term/t2t.py index 3c9d8d1..8892c24 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -3,6 +3,7 @@ import pickle import logging import datetime +import time import pandas as pd from text2term import onto_utils from text2term import onto_cache @@ -95,6 +96,7 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_ else: target_terms = _load_ontology(target_ontology, base_iris, excl_deprecated, use_cache, term_type) # Run the mapper + LOGGER.info(f"Mapping {len(source_terms)} source terms to {target_ontology}") mappings_df = _do_mapping(source_terms, source_terms_ids, target_terms, mapper, max_mappings, min_score, tags, incl_unmapped) mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3) @@ -189,6 +191,7 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped): to_map, tags = _process_tags(source_terms, tags) + start = time.time() if mapper == Mapper.TFIDF: term_mapper = TFIDFMapper(ontology_terms) mappings_df = term_mapper.map(to_map, source_term_ids, max_mappings=max_mappings, min_score=min_score) @@ -203,6 +206,7 @@ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappi mappings_df = term_mapper.map(to_map, source_term_ids, mapper, max_mappings=max_mappings) else: raise ValueError("Unsupported mapper: " + mapper) + LOGGER.info("...done (mapping time: %.2fs seconds)", time.time() - start) # Add tags, process, and filter df = _filter_mappings(mappings_df, min_score) diff --git a/text2term/tfidf_mapper.py b/text2term/tfidf_mapper.py index 2e5566f..c90c7f9 100644 --- a/text2term/tfidf_mapper.py +++ b/text2term/tfidf_mapper.py @@ -1,7 +1,6 @@ """Provides TFIDFMapper class""" import logging -import time import sparse_dot_topn as ct from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from text2term import onto_utils @@ -27,16 +26,10 @@ def map(self, source_terms, source_terms_ids, max_mappings=3, min_score=0.3): :param min_score: The lower-bound threshold for keeping a candidate term mapping, between 0-1. Default set to 0, so consider all candidates """ - self.logger.info("Mapping %i source terms...", len(source_terms)) - self.logger.info("...against %i ontology terms (%i labels/synonyms)", len(self.target_ontology_terms), - len(self.target_labels)) - start = time.time() source_terms_norm = onto_utils.normalize_list(source_terms) vectorizer = self._tokenize(source_terms_norm, self.target_labels) results_mtx = self._sparse_dot_top(vectorizer, source_terms_norm, self.target_labels, min_score) results_df = self._get_mappings(results_mtx, max_mappings, source_terms, source_terms_ids, self.target_terms) - end = time.time() - self.logger.info("...done (mapping time: %.2fs seconds)", end-start) return results_df def _tokenize(self, source_terms, target_labels, analyzer='char_wb', n=3): diff --git a/text2term/zooma_mapper.py b/text2term/zooma_mapper.py index 26df493..8f72377 100644 --- a/text2term/zooma_mapper.py +++ b/text2term/zooma_mapper.py @@ -2,7 +2,6 @@ import json import logging -import time import requests from text2term import onto_utils from text2term.term_mapping import TermMappingCollection, TermMapping @@ -23,12 +22,9 @@ def map(self, source_terms, source_terms_ids, ontologies, max_mappings=3, api_pa :param max_mappings: The maximum number of (top scoring) ontology term mappings that should be returned :param api_params: Additional Zooma API-specific parameters to include in the request """ - self.logger.info("Mapping %i source terms against ontologies: %s...", len(source_terms), ontologies) - start = time.time() mappings = [] for term, term_id in zip(source_terms, source_terms_ids): mappings.extend(self._map_term(term, term_id, ontologies, max_mappings, api_params)) - self.logger.info('done (mapping time: %.2fs seconds)', time.time()-start) return TermMappingCollection(mappings).mappings_df() def _map_term(self, source_term, source_term_id, ontologies, max_mappings, api_params): From 5849bf5dd483f858337b6e22f2825773adbce7ef Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 16:06:41 -0400 Subject: [PATCH 22/39] Add tests for Zooma and Bioportal mappers --- test/simple_tests.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/simple_tests.py b/test/simple_tests.py index 2e6a6f3..f745c2d 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -2,6 +2,7 @@ import pandas as pd import text2term from term import OntologyTermType +from mapper import Mapper pd.set_option('display.max_columns', None) @@ -90,6 +91,18 @@ def run_tests(): properties_df_match = test_df_equals(drop_source_term_ids(df5), drop_source_term_ids(df6)) print(f"...{properties_df_match}") + # Test mapping a list of terms to multiple ontologies using the Zooma mapper + print("Test mapping a list of terms to multiple ontologies using the Zooma mapper...") + df_zooma = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", + mapper=Mapper.ZOOMA, term_type=OntologyTermType.ANY) + print(f"{df_zooma}\n") + + # Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper + print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...") + df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", + mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY) + print(f"{df_bioportal}\n") + def drop_source_term_ids(df): return df.drop('Source Term ID', axis=1) From 04704de1ebe96379b470dbc1ebbed70d81542696 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 16:12:16 -0400 Subject: [PATCH 23/39] Fix cache folder misnaming introduced after refactoring --- text2term/onto_cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/text2term/onto_cache.py b/text2term/onto_cache.py index a2ccc61..614f912 100644 --- a/text2term/onto_cache.py +++ b/text2term/onto_cache.py @@ -36,11 +36,12 @@ def cache_exists(ontology_acronym=''): # Clears the cache def clear_cache(ontology_acronym=''): + cache_dir = CACHE_FOLDER if ontology_acronym != '': cache_dir = os.path.join(CACHE_FOLDER, ontology_acronym) # Is equivalent to: rm -r cache_dir try: - rmtree(CACHE_FOLDER) + rmtree(cache_dir) sys.stderr.write("Cache has been cleared successfully\n") except OSError as error: sys.stderr.write("Cache cannot be removed:") From db84bcd29e7eb6c474a4dc83f1258be1fd2044c5 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 16:12:42 -0400 Subject: [PATCH 24/39] Rename to test_pypi.py to be consistent --- test/test-t2t.py | 17 +++++++++++++++++ test/test_pypi.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 test/test-t2t.py create mode 100644 test/test_pypi.py diff --git a/test/test-t2t.py b/test/test-t2t.py new file mode 100644 index 0000000..0470d94 --- /dev/null +++ b/test/test-t2t.py @@ -0,0 +1,17 @@ +import text2term +from text2term import Mapper + +text2term.map_file(input_file="/Users/rsgoncalves/Documents/Harvard/gwaslake/all-traits-list-July23.csv", + target_ontology="EFO", use_cache=True, + csv_columns=("trait", "trait_id"), + max_mappings=1, + min_score=0.6, + excl_deprecated=True, + separator=",", + save_mappings=True, + mapper=Mapper.TFIDF, + output_file="/Users/rsgoncalves/Documents/Workspace/text2term/test/output/opengwas-mappings.csv", + base_iris=("http://www.ebi.ac.uk/efo/", "http://purl.obolibrary.org/obo/MONDO", + "http://purl.obolibrary.org/obo/HP"), + save_graphs=False + ) diff --git a/test/test_pypi.py b/test/test_pypi.py new file mode 100644 index 0000000..6d04fe2 --- /dev/null +++ b/test/test_pypi.py @@ -0,0 +1,46 @@ +import os +import sys +import text2term +from text2term.term import OntologyTermType +from contextlib import contextmanager + + +def main(): + try: + with suppress_stdout(): + # Simple set up and testing + text2term.map_terms(["fever", "headache"], + "https://github.com/EBISPOT/efo/releases/download/current/efo.owl") + text2term.cache_ontology("https://github.com/EBISPOT/efo/releases/download/current/efo.owl", "EFO") + text2term.map_terms(["fever", "headache"], "EFO", use_cache=True) + text2term.map_terms(["fever", "headache"], "EFO", base_iris=("http://www.ebi.ac.uk/efo",), + mapper=text2term.mapper.Mapper.LEVENSHTEIN, max_mappings=4, use_cache=True) + + # Properties and classes tests + text2term.map_terms(["fever", "headache"], "EFO", term_type=OntologyTermType.CLASS, use_cache=True) + text2term.map_terms(["contains", "location"], "EFO", term_type=OntologyTermType.PROPERTY, use_cache=True) + text2term.map_terms(["fever", "contains"], "EFO", term_type=OntologyTermType.ANY, use_cache=True) + + # Clear cache and set down + text2term.clear_cache("EFO") + except: + print("ERROR") + + +# From https://stackoverflow.com/questions/2125702/how-to-suppress-console-output-in-python +@contextmanager +def suppress_stdout(): + with open(os.devnull, "w") as devnull: + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = devnull + sys.stderr = devnull + try: + yield + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + + +if __name__ == '__main__': + main() From dcec08e281dbecf8b421f851d5617e2bf05ea3b1 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 17:58:13 -0400 Subject: [PATCH 25/39] Revert adding test with local file system references This partly reverts commit db84bcd29e7eb6c474a4dc83f1258be1fd2044c5. --- test/test-t2t.py | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 test/test-t2t.py diff --git a/test/test-t2t.py b/test/test-t2t.py deleted file mode 100644 index 0470d94..0000000 --- a/test/test-t2t.py +++ /dev/null @@ -1,17 +0,0 @@ -import text2term -from text2term import Mapper - -text2term.map_file(input_file="/Users/rsgoncalves/Documents/Harvard/gwaslake/all-traits-list-July23.csv", - target_ontology="EFO", use_cache=True, - csv_columns=("trait", "trait_id"), - max_mappings=1, - min_score=0.6, - excl_deprecated=True, - separator=",", - save_mappings=True, - mapper=Mapper.TFIDF, - output_file="/Users/rsgoncalves/Documents/Workspace/text2term/test/output/opengwas-mappings.csv", - base_iris=("http://www.ebi.ac.uk/efo/", "http://purl.obolibrary.org/obo/MONDO", - "http://purl.obolibrary.org/obo/HP"), - save_graphs=False - ) From 4475e2617bb1f057e64d56411f5e363205247e12 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 17:59:50 -0400 Subject: [PATCH 26/39] Delete test-pypi.py --- test/test-pypi.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 test/test-pypi.py diff --git a/test/test-pypi.py b/test/test-pypi.py deleted file mode 100644 index 54e2390..0000000 --- a/test/test-pypi.py +++ /dev/null @@ -1,39 +0,0 @@ -from contextlib import contextmanager -import sys, os -import text2term - -def main(): - try: - with suppress_stdout(): - # Simple set up and testing - text2term.map_terms(["fever", "headache"], "https://github.com/EBISPOT/efo/releases/download/current/efo.owl") - text2term.cache_ontology("https://github.com/EBISPOT/efo/releases/download/current/efo.owl", "EFO") - text2term.map_terms(["fever", "headache"], "EFO", use_cache=True) - text2term.map_terms(["fever", "headache"], "EFO", base_iris=("http://www.ebi.ac.uk/efo",), mapper=text2term.mapper.Mapper.LEVENSHTEIN, max_mappings=4, use_cache=True) - - # Properties and classes tests - text2term.map_terms(["fever", "headache"], "EFO", term_type="classes", use_cache=True) - text2term.map_terms(["contains", "location"], "EFO", term_type="properties", use_cache=True) - text2term.map_terms(["fever", "contains"], "EFO", term_type="both", use_cache=True) - - # Clear cache and set down - text2term.clear_cache("EFO") - except: - print("ERROR") - -# From https://stackoverflow.com/questions/2125702/how-to-suppress-console-output-in-python -@contextmanager -def suppress_stdout(): - with open(os.devnull, "w") as devnull: - old_stdout = sys.stdout - old_stderr = sys.stderr - sys.stdout = devnull - sys.stderr = devnull - try: - yield - finally: - sys.stdout = old_stdout - sys.stderr = old_stderr - -if __name__ == '__main__': - main() \ No newline at end of file From 1f6bfa8fd1e525add03434087f1a597f8a2aa293 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 19:15:09 -0400 Subject: [PATCH 27/39] Modify TermCollector to be able to reuse previously loaded ontology --- text2term/t2t.py | 5 +++-- text2term/term_collector.py | 41 ++++++++++++++++++++++--------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index 8892c24..890088c 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -174,15 +174,16 @@ def _load_data(input_file_path, csv_column_names, separator): def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_type=OntologyTermType.CLASS): - term_collector = OntologyTermCollector() + term_collector = OntologyTermCollector(ontology_iri=ontology) if use_cache: pickle_file = os.path.join("cache", ontology, ontology + "-term-details.pickle") LOGGER.info(f"Loading cached ontology from: {pickle_file}") onto_terms_unfiltered = pickle.load(open(pickle_file, "rb")) onto_terms = term_collector.filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type) else: - onto_terms = term_collector.get_ontology_terms(ontology, base_iris=iris, exclude_deprecated=exclude_deprecated, + onto_terms = term_collector.get_ontology_terms(base_iris=iris, exclude_deprecated=exclude_deprecated, term_type=term_type) + term_collector.close() LOGGER.info(f"Filtered ontology terms to those of type: {term_type}") if len(onto_terms) == 0: raise RuntimeError("Could not find any terms in the given ontology.") diff --git a/text2term/term_collector.py b/text2term/term_collector.py index 85a5d61..31f9a23 100644 --- a/text2term/term_collector.py +++ b/text2term/term_collector.py @@ -9,23 +9,25 @@ class OntologyTermCollector: - def __init__(self, log_level=logging.INFO): + def __init__(self, ontology_iri, use_reasoning=False, log_level=logging.INFO): + """ + Construct an ontology term collector for the ontology at the given IRI + :param ontology_iri: IRI of the ontology (e.g., path of ontology document in the local file system, URL) + :param use_reasoning: Use a reasoner to compute inferred class hierarchy + """ self.logger = onto_utils.get_logger(__name__, level=log_level) + self.ontology = self._load_ontology(ontology_iri) + if use_reasoning: + self._classify_ontology(self.ontology) - def get_ontology_terms(self, ontology_iri, base_iris=(), use_reasoning=False, exclude_deprecated=False, - term_type=OntologyTermType.ANY): + def get_ontology_terms(self, base_iris=(), exclude_deprecated=False, term_type=OntologyTermType.ANY): """ Collect the terms described in the ontology at the specified IRI - :param ontology_iri: IRI of the ontology (e.g., path of ontology document in the local file system, URL) :param base_iris: Limit ontology term collection to terms whose IRIs start with any IRI given in this tuple - :param use_reasoning: Use a reasoner to compute inferred class hierarchy :param exclude_deprecated: Exclude ontology terms stated as deprecated using owl:deprecated 'true' :param term_type: Type of term--can be 'class' or 'property' or 'any' (individuals may be added in the future) :return: Dictionary of ontology term IRIs and their respective details in the specified ontology """ - ontology = self._load_ontology(ontology_iri) - if use_reasoning: - self._classify_ontology(ontology) self.logger.info("Collecting ontology term details...") start = time.time() ontology_terms = dict() @@ -35,20 +37,14 @@ def get_ontology_terms(self, ontology_iri, base_iris=(), use_reasoning=False, ex query = iri + "*" self.logger.info("...collecting terms with IRIs starting in: " + iri) iris = list(default_world.search(iri=query)) - ontology_terms = ontology_terms | self._get_ontology_terms(iris, ontology, exclude_deprecated, + ontology_terms = ontology_terms | self._get_ontology_terms(iris, self.ontology, exclude_deprecated, term_type) else: - ontology_signature = self._get_ontology_signature(ontology) - ontology_terms = self._get_ontology_terms(ontology_signature, ontology, exclude_deprecated, term_type) + ontology_signature = self._get_ontology_signature(self.ontology) + ontology_terms = self._get_ontology_terms(ontology_signature, self.ontology, exclude_deprecated, term_type) end = time.time() self.logger.info("...done: collected %i ontology terms (collection time: %.2fs)", len(ontology_terms), end - start) - # when multiple ontologies are loaded with owlready2, and they reference the same ontology term (IRI), a lookup - # for that IRI returns the term from the first ontology loaded —> need to unload previously loaded ontologies - try: - ontology.destroy() - except Exception as err: - self.logger.debug("Unable to destroy ontology: ", err) return ontology_terms def filter_terms(self, onto_terms, iris=(), excl_deprecated=False, term_type=OntologyTermType.ANY): @@ -92,6 +88,9 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type owl_term_type = OntologyTermType.CLASS elif self._filter_term_type(ontology_term, OntologyTermType.PROPERTY, False): owl_term_type = OntologyTermType.PROPERTY + else: + owl_term_type = "undetermined" + self.logger.info("Term has undetermined type %s %s", iri, labels) term_details = OntologyTerm(iri, labels, definitions=definitions, synonyms=synonyms, parents=named_parents, children=children, instances=instances, restrictions=complex_parents, deprecated=is_deprecated, @@ -388,6 +387,14 @@ def _classify_ontology(self, ontology): end = time.time() self.logger.info("...done (reasoning time: %.2fs)", end - start) + def close(self): + # when multiple ontologies are loaded with owlready2, and they reference the same ontology term (IRI), a lookup + # for that IRI returns the term from the first ontology loaded —> need to unload previously loaded ontologies + try: + self.ontology.destroy() + except Exception as err: + self.logger.debug("Unable to destroy ontology: ", err) + def _log_ontology_metrics(self, ontology): self.logger.debug(" Ontology IRI: %s", ontology.base_iri) self.logger.debug(" Class count: %i", len(list(ontology.classes()))) From 2a4ccc5ede65d66994ed66746825028523856e7e Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Tue, 10 Oct 2023 19:27:06 -0400 Subject: [PATCH 28/39] Update setuptools dependency version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0617121..98714ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ numpy~=1.24.2 gensim~=4.3.0 scipy~=1.10.1 scikit-learn~=1.2.1 -setuptools~=67.6.0 +setuptools~=68.2.2 requests~=2.31.0 tqdm~=4.66.1 sparse_dot_topn~=0.3.4 From 9e3a39658899565d1690da571d60a731cc541d3d Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Wed, 11 Oct 2023 11:06:21 -0400 Subject: [PATCH 29/39] Add TermCollector tests and assert statements to some tests --- test/simple_tests.py | 86 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/test/simple_tests.py b/test/simple_tests.py index f745c2d..90a798a 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -3,12 +3,17 @@ import text2term from term import OntologyTermType from mapper import Mapper +from text2term import OntologyTermCollector pd.set_option('display.max_columns', None) +EFO_URL = "https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl" + +MAPPED_TERM_CURIE_COLUMN = "Mapped Term CURIE" +TAGS_COLUMN = "Tags" + def run_tests(): - efo_url = "https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl" pizza = "https://protege.stanford.edu/ontologies/pizza/pizza.owl" ncit = "http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl" hpo = "http://purl.obolibrary.org/obo/hp/releases/2022-06-11/hp.owl" @@ -17,14 +22,13 @@ def run_tests(): # ONTOLOGY CACHING # Test caching an ontology loaded from a URL print("Test caching an ontology loaded from a URL...") - efo_cache = text2term.cache_ontology(ontology_url=efo_url, ontology_acronym="EFO") + efo_cache = text2term.cache_ontology(ontology_url=EFO_URL, ontology_acronym="EFO") print(f"Cache exists: {efo_cache.cache_exists()}\n") # Test caching an ontology by resolving its acronym using bioregistry print("Test caching an ontology by resolving its acronym using bioregistry...") clo_cache = text2term.cache_ontology(ontology_url="CLO", ontology_acronym="CLO") print(f"Cache exists: {clo_cache.cache_exists()}\n") - print() # Test caching the set of ontologies specified in resources/ontologies.csv caches = text2term.cache_ontology_set(os.path.join("..", "text2term", "resources", "ontologies.csv")) @@ -38,14 +42,14 @@ def run_tests(): # Test mapping a list of terms to EFO loaded from a URL print("Test mapping a list of terms to EFO loaded from a URL...") - mappings_efo_url = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology=efo_url, + mappings_efo_url = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology=EFO_URL, term_type=OntologyTermType.ANY) print(f"{mappings_efo_url}\n") # Test that mapping to cached ontology is the same as to ontology loaded from its URL print("Test that mapping to cached ontology is the same as to ontology loaded from its URL...") - mappings_match = test_df_equals(drop_source_term_ids(mappings_efo_cache), - drop_source_term_ids(mappings_efo_url)) + mappings_match = check_df_equals(drop_source_term_ids(mappings_efo_cache), + drop_source_term_ids(mappings_efo_url)) print(f"...{mappings_match}") # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric @@ -60,55 +64,115 @@ def run_tests(): df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS) print(f"{df2}\n") - # TAGGED TERMS + +def test_mapping_tagged_terms(): # Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...") df3 = text2term.map_terms( {"asthma": "disease", "allergy": ["ignore", "response"], "protein level": ["measurement"], "isdjfnsdfwd": None}, target_ontology="EFO", excl_deprecated=True, use_cache=True, incl_unmapped=True) print(f"{df3}\n") + assert df3.size > 0 + assert df3[TAGS_COLUMN].str.contains("disease").any() + assert df3[TAGS_COLUMN].str.contains("measurement").any() + +def test_preprocessing_from_file(): # Test processing tagged terms where the tags are provided in a file print("Test processing tagged terms where the tags are provided in a file...") tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt") df4 = text2term.map_terms(tagged_terms, target_ontology="EFO", use_cache=True, incl_unmapped=True) print(f"{df4}\n") + assert df4.size > 0 + assert df4[TAGS_COLUMN].str.contains("disease").any() + assert df4[TAGS_COLUMN].str.contains("important").any() + - # MAPPING TO PROPERTIES +def test_mapping_to_properties(): # Test mapping a list of properties to EFO loaded from a URL and restrict search to properties print("Test mapping a list of properties to EFO loaded from a URL and restrict search to properties...") - df5 = text2term.map_terms(source_terms=["contains", "location"], target_ontology=efo_url, + df5 = text2term.map_terms(source_terms=["contains", "location"], target_ontology=EFO_URL, term_type=OntologyTermType.PROPERTY) print(f"{df5}\n") + assert df5.size > 0 # Test mapping a list of properties to EFO loaded from cache and restrict search to properties print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...") + if not text2term.cache_exists("EFO"): + text2term.cache_ontology(ontology_url=EFO_URL, ontology_acronym="EFO") df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True, term_type=OntologyTermType.PROPERTY) print(f"{df6}\n") + assert df6.size > 0 # Test that mapping to properties in cached ontology is the same as to ontology loaded from its URL - properties_df_match = test_df_equals(drop_source_term_ids(df5), drop_source_term_ids(df6)) + properties_df_match = check_df_equals(drop_source_term_ids(df5), drop_source_term_ids(df6)) print(f"...{properties_df_match}") + +def test_mapping_zooma_ontologies(): # Test mapping a list of terms to multiple ontologies using the Zooma mapper print("Test mapping a list of terms to multiple ontologies using the Zooma mapper...") df_zooma = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", mapper=Mapper.ZOOMA, term_type=OntologyTermType.ANY) print(f"{df_zooma}\n") + assert df_zooma.size > 0 + assert df_zooma[MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() # returns true if any of the values contains EFO + assert df_zooma[MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() + +def test_mapping_bioportal_ontologies(): # Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...") df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY) print(f"{df_bioportal}\n") + assert df_bioportal.size > 0 + assert df_bioportal[MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() + assert df_bioportal[MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() + + +# TEST ONTOLOGY TERM COLLECTOR +EFO_TERM_COLLECTOR = OntologyTermCollector(ontology_iri=EFO_URL) + + +def test_term_collector(): + expected_nr_efo_terms = 50867 + terms = EFO_TERM_COLLECTOR.get_ontology_terms() + assert len(terms) == expected_nr_efo_terms + + +def test_term_collector_classes_only(): + expected_nr_efo_classes = 50643 + terms = EFO_TERM_COLLECTOR.get_ontology_terms(term_type=OntologyTermType.CLASS) + assert len(terms) == expected_nr_efo_classes + + +def test_term_collector_properties_only(): + expected_nr_efo_properties = 224 + terms = EFO_TERM_COLLECTOR.get_ontology_terms(term_type=OntologyTermType.PROPERTY) + assert len(terms) == expected_nr_efo_properties + + +def test_term_collector_iri_limit(): + iri = "http://www.ebi.ac.uk/efo/" + expected_nr_terms_with_efo_iri = 17383 + terms = EFO_TERM_COLLECTOR.get_ontology_terms(base_iris=[iri], term_type=OntologyTermType.ANY) + assert len(terms) == expected_nr_terms_with_efo_iri + + +def test_term_collector_iri_limit_properties_only(): + iri = "http://www.ebi.ac.uk/efo/" + expected_nr_properties_with_efo_iri = 29 + terms = EFO_TERM_COLLECTOR.get_ontology_terms(base_iris=[iri], term_type=OntologyTermType.PROPERTY) + assert len(terms) == expected_nr_properties_with_efo_iri def drop_source_term_ids(df): return df.drop('Source Term ID', axis=1) -def test_df_equals(df, expected_df): +def check_df_equals(df, expected_df): pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True) return True From d1718eacedfb518770f910b307fd0821485a58fc Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Wed, 11 Oct 2023 11:07:22 -0400 Subject: [PATCH 30/39] Adds Sphinx Docs Creates a rudimentary Sphinx document with documentation pulled from the READMEs of both text2term and the UI (which is copied) --- README-UI.md | 90 +++++++++++++++++++++++++++++++ docs/source/_static/ccb_logo.jpg | Bin 0 -> 60131 bytes docs/{ => source}/conf.py | 3 +- docs/{ => source}/index.rst | 16 ++++-- 4 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 README-UI.md create mode 100644 docs/source/_static/ccb_logo.jpg rename docs/{ => source}/conf.py (95%) rename docs/{ => source}/index.rst (61%) diff --git a/README-UI.md b/README-UI.md new file mode 100644 index 0000000..a096884 --- /dev/null +++ b/README-UI.md @@ -0,0 +1,90 @@ +# ontology-mapper-ui +The following information pertains to the text2term UI, which is written [here](https://github.com/ccb-hms/ontology-mapper-ui) and runs online [here](https://text2term.hms.harvard.edu/). It supports fewer features than the base package does, but provides a user interface for non-programmers. + +### Running Locally via Node + Python + +##### Requirements + +- Node >= 16.0.0 +- npm >= 8.0.0 +- Python >= 3.9.0 +- pip >= 21.0.0 +- text2term >= 1.1.0 + +**\*** These are the versions I have that work; while I know Python 3.9 or higher is necessary, the others may not strictly require the listed versions. + +**\*\*** If you are running this locally on Google Chrome, you will likely run into issues with CORS (Cross-Origin Requests) that I have been unable to completely resolve. I would recommend using a different browser, using the Docker method, or finding some way to disable CORS on Chrome while running this. + +#### Instructions + +##### Initial Setup + +When first cloned, run the command: + + +``` +npm install +``` + +to install all necessary packages for the React frontend. + +Next, go into the `flask-api` folder (perhaps by running `cd flask-api`) and run + +``` +pip install -r requirements-flask.txt +``` + +to install necessary packages for the Flask api. + +##### Running + +To run, make sure you are in the root of the repository and run, in two separate command line instances, the command + +``` +npm start +``` + +to start the front-end, which can be seen at `localhost:3000`, and the command + +``` +npm run flask-api +``` + +to start the back-end, which can be interacted with at `localhost:5000`. + +### Running Locally via Docker + +#### Requirements + +- Docker + +#### Instructions + +##### Initial Setup + +Before running, make sure you have the latest version of the repository built by running the command + +``` +docker-compose build +``` + +Docker should build two images: + +- `ontology-mapper-api`: the Flask backend API +- `ontology-mapper-client`: the React frontend + +##### Running + +To run the website, run the command: + +``` +docker-compose up +``` + +Docker should build two containers corresponding to the two images. + +In a browser, navigate to `localhost:8602` to see the front-end. + +### Acknowledgements + +Initial setup of React and Flask and Dockerization aided by an [article series](https://blog.miguelgrinberg.com/post/how-to-dockerize-a-react-flask-project) by Miguel Grinberg. \ No newline at end of file diff --git a/docs/source/_static/ccb_logo.jpg b/docs/source/_static/ccb_logo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..422182be10e0e390a585ee185ff818e24ebe1b50 GIT binary patch literal 60131 zcmdSAcUV(jw=NorA|e7J(uAmh2uKI%#0H26h=@py3P_2F^cE7ON{xUNX;A^`M0zK5 zq#5Z#XrXr!N(iKI^83y{-#Pc*{p{!7``;cbbFJ|_nQLZ_wPqRP9dAzlp3DI_AL{Ds z0vH$ofLrtfaDoHG==eFm1^^5V0g?a!;0)jt!vz2nU1Ojh00sfTsekDJz(WSXf9p>e zZv00bMgSnf8NmD>bu8)Ee}vxhKX?DfmFX+P|8DUs4r^~-|v45$YBSJ9~2tYaW+I!a2P10T1Neh7M3_}v* zA0~!t07fnbCN74P4giqei&G5$l7HPsKQS;eF`qika)$LR8@)mmCxDTGiHVV!>C~xz zeug24{yl)1>(qs-@^?;io4jPX=6+G(L;SZhB6rJLd7cd7MHOFp1hbyK#LLGoAa-5+ zhJ>V&@+}orHT8Sjy~ArqCubK=FK-`TKmUM`(6I1`kC9Oc ziJy~_Q&PXAeb3Ix{gIbnP*`43Syf$ATUX!K-qG3B{i~;UXn16FZ2a%UByN6TacOyF zb#0xnySINpJR}_*|6>>ZdHzqc=;!}w+5gQhF1lTe%*;&8EdSWW!07vr;atq8uF9Xj zaL0t@r91aEg%4*g-i`lO*2*fP_yo`M%46{CB~c}u7~vn&{$<(!JHvwie_8f#!~VBj zSin61X6BE;^)2HZi`poHnCof6EEI5e9l2%*@Oz z^baQ+D;wwkXE~Xs4@>EjNx(TK2Kr!P;sSsGRLb|L8-V|2h3Vwaf75;s{x|LS(0|i@ z5C5nAW@IZWf(Tlm+{B){ZVl!n=&3%v&;MFmFQH{E5f3>Fsv!={ot;pc#5by&(4&pEB z57^~OgRg$%$Mx0GA6yIMMqYORkhxcmIDGzWsBc!+f8Vc^gY>E92YL~1Hex<++_poK zx>dl|ZbwLLC18LIs7go4_UV!z;smVtAM>@HHqfNwTwWL1%PnH&dQC&4h_r+Jt}N#< zpM5*bf~UunLvk}Bu5POHm`9|hbMCN)J2oq|9LSLO;F8tc6jmeee!En!1mzFd!pbDC^P3ae@nlMmPabE=J5Z*AHPyOb^QO zon<(O)S&7OVf3xPpHBcSf*aOV?w@fDTF~n{ktM};4%4eJths!VAGS4(L8^3P5|*Y1 zNzeaw0{HXSe9wIT1Q3Vhhx&kN7cc@YDTF%FPGU;b`Jijj_I@XT_v7)smd~6;#U|>L z7m*#~7x&DFuY!bu!C0D@aUF1Y0JFt_zcmmwMLGf8yQ_Kv_!P==q-9f1!r>iFG~P6N zf9o~Att<$op}>9qOle`CeR)EDi#GSH>3xs@9r@7~;g-ifs9YC3HCw+am?%Pd_n=&M zIVb85Y7t>P>TM%1XXA8O@z*Htnt{J9FUyDmCu9|U3N`_+M7+=BH6|o=6}(9WI*yvT zefog)LN+l`$9ZxTQ>pnG2Ndlh$Y3F|u;vIk6w7mT6+cWn8)2u>X-DvED|v0iWV`mr z0kQt)f$s;}hB7P;szjQ=*UTS$Y$gTGOWBw+$)0V-emnupM*dvbMd`*VWEzJd#Zwpg z8fB^qDjT+&_X9F7n%IcPT{O`F{O83CC3|z9Y~kj%5;r-+Gc8*F^S+MIL^^=O^Y$et z)CZUxyj2(OhzY-7oV%KmK*E?ui}Vb&IGJ@j*1i%ECK~`i%~LQBKvfPW5+4Bag;9jFLfAy42( zP19WGIQ8hF)vKVyTGZkRpbLB%ra-wHkL;RJ8B_L#l@0jc2z4B~D8&cVLEUuAj8^II zF3?O{ivv$*N!#88ze3JnHn8{U@6@fMei|K4(O3u)d^-9zl5=u$_MXxZ55I;9U*dbk z&VVGiIH&^f8ER9_X`TBy&v4XC++Xu^HU**nLn!f{u_Joe`C!xKQm z);NvfSO<7k!Z`WwXb}Xr6$F#12K}hhG{vQ6eory4(7St2qBe$8@9`aF<-Ctdf=6Rx z-A}6PzLek9Q_$d4Q$6e12^UMFFB_Mq{dx~|Bc*djt~WR(a|N=-=e%yQc3VueoaKNC z`Blj%6RYYg!p(+cjMHRs#&KI>&&@mzyL17vxm|NyIU3tC~q}{u5 z0&oM4QW!$ie?oD2?HN9>~>Nu(62z zYf3W&;by7+`X_+wMN~=AYW~xeP{--(FqO2q=8~wQh^8aa1sKui?2YeLs*F)LzW!H* z<7DJN-*HL~l1g(O-1ETqE3h`Y1OvfyMcKi{d4EGfz+x`5Kf#;nMIEnFa0i(vK6aNq ztDnbw(~ym0$?RLzug%`=uMj#xrn{)_S^PuUP=S6A41TX84H^E{=%JpP&E3DkEk&Ct zK;jS@2sOt8JJgLoLhpU^6M0fa*!8njTOM&3T&SIPX0O;xS`PQi_C7P~S9&1iYnMi= zDkAL7r9_?p95zrN(U-5{Q$h~Xg^Y%E$HCUgfuji;MfUNup--}u3P-icz-MNAmX*1u zyd7uvwayiNc=vprFDfB)=H4X0|Vkh%TP#J7Z$j!ceL_3c>R2yZq6H%+i;O`r6i$#k}5MZWdk_N)%`PT?CQ zHUh_Ol6MZpzH<0F8ZT2T3JKCxo_WD4rBl7dt8IFrQDsVv?+w8=i|JObZ4b!52#eQN zjG}0FFTM2^KfR`F-8iN{GhU(JpaE&HbsR@l{Ie5f&!gM*s{b_K5b;}RF9=n5(5ifc z`^OWz-9E&2PtU3l29kELXbD5whwHC=Y10!ppPPyY z{X+c$PpS}i#u|;rZ8=HuqG`|K`#sh}-_+L%aU^iaU)}6p5g}J4Sq=s_Xvp~qBrQ3~ zemNBHcAH$mAzCk{JWO`mIstGq?5-upsa(}!?`SN(_jd{7piDX-Ac*-SG$Fy+^`PYO zT7V)l#qg)}MS#!wJF{00?e>{+p$AF{hEf>C%7os|%)iO6?Rst+i)Jm3`aJV_dfOuP zJ%C>{3jpvI|A`WAmNebfn{uerkyM}DX^@;_=yZ0nJl!xK^iWH%JuRrK0RGOW`&6aj z%^2hF`PZ8dNi1Y0zv1pR*LiT1v257Bgxs2Q40!75N|NT?JSr-k8V@dovo~`=HAwn7 zrHDD{OY91o45Eb%dFeCyMmA0UszUF-Co`Iku13Vpt3P$)3ml?o?-4d|C<-@ffh9UY z>I87vc9Nvt`-uB8a?Q8F7_^>!O&PSKA3qJ*WS6C}&f-s9$i%PTS`^e8llkDp1 zZ~f`JCh?-dwe1sMjE5!qUcnjW7rX2MaxJC?5TqhF(%sW{xqP#d;!81J$!kaQXsQ%C z&y(B1<(Dl-^OT*vl9n1T%x2P+_3T>(TDTP~}nB$e7Nv^)Y+7X{%&%v%RC{Ema3G z<422JwzdK>LOQ%=`owbWJ&kVj@C#Hh*a41kD0>)lSL`LpLyRG-Vrpm8F0szsS-SRpcZPs@@zHcq=oA zf&DUZSLum_&>4p-*)rxz``Tbe%4Ke}*)?@+ z$u7b3;2ZZg+}@u6@K3-3eui6(AVT3pqhZcDabgUE`Qxwvhw;#i^Ctjr@)ClFEQsHk z_kB;3BKZI>Qb1!b#+26TD-HL$ribpU+PhCbX9D+>?bwXB!=swDNq6R-Q_S#t<|z-G zo@jQO*7J|RnFoPM;~T#=bs(VwE9{akXzoN!{0V>~(&&fa6q5FkW}^*ur#cLRPt*F) zH7da{)o8afq*@&uT7Z-1g>Pro#(CoQG~WpTbL$A*&`>jDwm$-C%klrZjp$OmS>Al1 zDEnh9@}O`#IO;}8-2Wx=&kh!@xhf1iY<0eHk561w z)P+YE7ToCn3i8|IiBDW|)$kGWn@O)nzqat_p9GJR9nZb|UTy1wypozA;qLa?iy-?G zz(gMo-3I0(MYlC`mHlzsQVRT&7JPqifU!}>BK&#TT$bVh>^kMvkT&-ORGXCYGH3yU zC|^(W$z&Qhj}U1TQHty#cAFui;5e znA!98u+~KPxy5tt8RI>Usvk#-_^)ThLOhQ-+O4yiS(u(BJ$T5QsU{|DTVk<6^}R;8 z6oHR24rvAsE1@q>^+6!T;IgcsOAjov9(>cdE7p+#P~YB(aW{cJroZF@FWK-S?}h6h zuHk!c8d6gfj|!-HFyA>)-`<5* zjURsHR>l_x3I^ageNJW9zlh%Fm${-5t@-|i>)Kcou(Mf;o}rzpO|LBak{>F((4u`6znoW!$&1{%DU?lWjPg#^{NVWRnIa=?@q!zf!@6_4v%J zFu2x0*y!Su0cITm@0gH?cP)kRiWs2fzO-o+D;jx_8OvVAiB8=OuL8R|Ji|Lv^(pN`xAp`VQYry;|?mS$8hJmdtx z+o)IKcerpJ>4^vH{z<$)q+IZ=Aw)lcU-@;gF59{7$NqhwXC|2w3(Ci3;0!q19JP+O z3x4qgfP2w%bx`ops9xBfOh$`sid)%>`<)-Kh=W@2j(KJrT1gt#AVsAj%5iYVO?gd9 zy6)7Fdm#05v#fTXX*&f-sOVHdSMVY<&BR-R&US4Wk1DO&{TbGYmqsO<_77Qo?vLfg z|777gW+5xOY%O3W$nEuUXq=QR8@mx(C{ibQc$4@kFZ0^POWUV;TBh#sscBd3a)iyD z0D?m}Cic1{=2^u~0AVrSY_gl4E6$R9X?o5#-!|d7R}<;OUCzhwX$T1AoGGA`X(PubWzjrkWQQd z&CDDFzksUf&YjD(6eK6w(aIIEXW)&)!%U~D;@w{q;htgGjn zt;=N(az*3rt2#KSUI`fyp#6amc4%i4@piZ+WJu)+ASTTx^$o>LYyX+zfLo|R{`B)@ zFQyf5?C#HYAJI6ewSr|yF6zc`9Vt#K)a6xS{+yjZy1f3_{|Fcn1Ua}zq=Qevp)T}U zD$s|^>)I&r2Az#9vvetal%nppS|DpG%JfyUoWpGG$oZz??N;&F@2?75Q;qKjM{q-Hon*euy9v3;-PXF0pu+#` z*=0^0C-c3RWC0ph_zE@a;G-sRLGASjO?XB>^6Jvk@E43CHm*UV(JojGoQ~Re!<_&) zowB3`Z#L@T`uAv|^p*Y7!dBOba+`(Te_sxD>WY|KsRraf>8^s)t z{u%31A4y>Z-Zl_hqrB(*jsQ3}Htc7Yv8)KP;WA`2vw?w9Skr-t%;F{);-t`gq_$P7 z3GBIR=N+<44`0Q?BUAS&7#=o6QZ>t|Rx`ZSCmkDn>s56Ok`kZ`^gqrD z0RYrC#vj`{!dMvB8=B&f#7OE7iXCCa9OBQvk7l#{W%wf_w%snLhWXuY!1-uwM&cfm zZxQ=U69>+?&5e)ZK=2ag#JkpfQKSq{sa|7y?%7}dbCu=gD1x!^i;DPiP|2CL&q{+L zDR|%DMiT;C$Ft^(cpv2Tyn>m_q9$=s>J2qIccu}H%``%HTQvM{I!-zQQ6lXMH6VaH zREWp6>FypMHofY$nYcwCke+!lG0c)aYTmd)LqN}wSmux)g!vJA_(eAN0;6c+maYzC zU;$t;k#YkR#=Nv5@qLAtY~q{7%>D2QR*i1qP;`;kKOWM#?z6 zr1$8j!qcTbf|sgX#+^G05@c`k=W&&^{jH@@U?wlHB9C$r@~}FkTV%o7)8(h`TR~%$ zC#kF^wRxZaP*bqmVTYYT?PL+C>u9q!lxG;~Hm~8d%E7nGQuY!aOqie<@09dH z6uzf;@I19oYyp^;kB%bxapL?EG7)o*t!}aCQ*eip(}y_oEp|LiAL?M|@A1}K;*E4^ z+JWueU)XKAyJ1&u+c%%rMbIFy*~u3uHDJ#Y+u8!=L8q7!tLRB+Tsh#->d zNoE8qTapz7KWs|i=?Z@(t(B49-mSo5ej~s0PS53I^Q+%z(+l&zYx1DV1W>Dx^%_j9 zHmYlGro+DGX=ZS8yyZlJJXEV1>wpSkh4EL6wM(b=tNo%OQ0`NNMD{qc@{7Qh`j z>kLM}fj80viE?Ji3dWD*2K+G`Ha#`wMq^e$$8#%Eu{XL@rFz76JbMDiMkoB&VWO~6 z5Z<`Syw%x%0rLUhSLo#Y<*ib}qm*YwqcO~TH|@xID@^2R$nF4*z!J>LEPO^Jm>vGI z>L`r-iq`fF$eGJj@l19l<4io(tw+7Wmr2 z6=Hi{kXKz{`NMIzYy8WKv%IxgZgnYeDQ4VQ-`2h8+0z2*2X~=kvouLZ3+X5!zvI2%wTTk0uEIK18T`HN zjKWbGiF@)(k=N_-e;u`KyTdXZ<=aaiY^aa|G9$M5$wp9JY7WJeS~iajam=1nggZLR z)sdA8vsAb!o?M4HnnXV9vq!(9j#-Gl)C^63vYub3rUuT5)}4!0sLn0y&)X!r18vin zULK0Di0;1o8ov=(W)qJGHla)#zjRSVNE!sD1$gvVJh**89<^iNZgBPJMfJxF{@01$ zZ%{C|Sy8J+L`!NEO$hc|9ctN;pLK;$+!J|Vkw`V#X%hN=`AGk7&FtG1;Eki%PIN_g zH+^3gzJZqwJptGo_?JMrE?A*I%G@XFYi+gE3csmgSF;d`v0-E{)HY+vg%zF87>_WO z_jb4;Is}e94i^lSn9Dcm_?_dZ_G2@1N_wy~#_|Y5{6Q*OK!t!=-!{78H|~PDAac{# zyVbE5E0`{0s<8GTP_M?m(8jQ~RQrMkA{-_Wf*W)o+*_+L_`zeZ%Nwq^h|9UkFdO%L zVXRS*s5yKBaF*6+nd|^p_9yh9ZhcF?r!+PUEI-o!gH^mCc=*o0^z4CPN_p()7?tb5 z-}XX=L{LCc%x6)QVZ%d%9-nQD4T)<3!41_X>ElqB{kjbWv@43Rtfj6z!t&hPB2N0+ z?k%g3aS4Q|`$GSVy0ye8lFEEm5*a)N8=>)`Nr5*PaO(Pbnfgxxbz;8S5i`0)SP^|O zTbGG>Pm1z*yq3y`_Y~V+e2eXPy7bvu>Y>4x31_0|`mxN)PpA)G?>$r&-!RXMP(q$@ zTCqvgZ%%-Ommvn~6t2ot==14Yg9y+g?JAN1zG6P6oE|ym`n%zW+r_q&GS$NB*J@;i zx~P_ew|c{I(UL2EOh7pnRX)(5Tj%{z8JYm85YwKXGnz9jGd6lnq<^p9u)?i*^A(y4 zdg>eO?=eT~Y*Yd%n*6rgI*Xl4$^Uxhg(vjX#IxGx*0rs>ccv73qX?b>Zj*XR&W`1~ zAS3*qAa8JCk-iGXXlmFQ`{hl8OjG>4up|jd4?aO`>%2fViXxkSv5N^c$*U+8XIOUE zw~)1UYNAy%W6rD7j`E0X_trSrU0|Gh3A6dDrV**u z3UHv9H&e6c@aS*^0(B7S%_Kd?F1l=2A@!k-|NXHr=JsvM4VUP|f+VBwgS9ar95Dq=baD9k6;FJVL-+A|-&hDOy=e6p36v z7c0XjgFKa3=aT*+QC{YVJ%>-XZpGN~Eq}w0#~6n<^1poB%r&HGuwp%p{!(CR=q~zv zx2wY`H|CVqyE|$t%ImkPfr5>C_y*gm5unM48*XHO?TUGNf4s=Mij-al z5y?-{!ed%4sf(r@d+)Uu>IcLdF8%uLX01E)@`Lyb2K`OI<+GT0O28Jjf8%&SLsEFB zo3eA{7D#t}m}z04*HXjzG|UWB^R0KzT|qJ|WUX?gcb%VLPjz2AF#Qxo1{}KJ-6mkm zv>p2TD>_-cl)UbEu2fCv05Qmoc&`Tk#1 zahOiV{;NolBLvN=lnLyf<#+o^TFNDTI4A*Hg&U~8b#NRE-IZ7DJ=g7m-G6l}KJE-^ z^qBIVB%TZFm80Az&5DOqVYsYaDx4-)#}k%kl%oXgM5qNNA0Jzcth`Vg@c$BQjc=+y zsJXXYduQM!!MfDo!4{8f)oA6`(o_!~-s&oVA%Y1zVZ+4GA>}j5?B+&as`eW`x4w9# zd3R?e;at=E7aZ>(`+7uk{1!S)o5|1WP9?!w_*T@ITH89YTYtX+~exwhKIm;QFm}Iyj%y}z5$-t zX?qO4+0fLCZ_@$&T^`}GtM?|~Pp5P~1{@tSd>H2GOU}gzisAQtH>fUwQ*M$7HuE%7 zcoX@t37By`p@Oze&xH;i>^g2mqo82?VY`L~CYZ*D=nTMuI}$AWUMQb>eztmL7xCya z`*a1^V!Q`bZ5#oU@k__WW+GZq7iB{aa!0QD*1O&#D=A&I^6;Q1oh(w5 zWFBhP=W2Qw^iFEGs8g7i?j2F?!Ewes&kcUK99x&L4^;M5EqHGR-B~{~7k39WjfkcL z#U}ul0fSaBiz`%!a9vC4$*>nn);43KYs!UJ<2!XO@f}%@q4{k9c{~m_0V-GN;)t0A zDua3az>kN-gAJtowp?YD&nz_h1T#K@upX6fo}VK84tVzEt(~ODyZm%Zx1o*QZHKN# zAw6DQ)#954UEj?FOiONsp-)u}iPIK*38(iWg1OCa?b-LdaESVIf zIhRw6X_x!^WiTy6C=FDRDf@KTyXn^W^TBPq#%mT`njX3flD+N0XViFLXhzT+ z5?pNsRUMUvu|kcSLHKBwC!s^A>u;6FksO>2!k->=hYZpYaxVgT`;5r7#)l);LSK&G z+N9p&GS1RCunDdM3;GSWBRQb*a5~|HXF6I-#VDpR&i_?_wu-;=iH!ErP4iB=o%!#IIwKz}qt zrysXjc%df-@}}8)bHf#IMG28v$l7J3Mk(qpkSRW%B+5~QVm?ERP0(oB8dEa`MiN542^b3-d4xc1?bw^rw_a=8p8++m zM~TJleEbd~?o6jXDDOUk;~#RIrB=C?!o)T%te$P5hTe|CdLBYCa(PBHa~${w`aDdC zWPmdc-bVxcgnvDEn!FFQmS6F_oY24Bo$UL{BK1HtZO?;p6K9&9m}J$ojj5fdkl8-A zNg8l%t$SWaRy;j2%N)<#A9k>-DF$C~mPV0$pZ-a0i-rh=9^^YSfM;{l24oD5$OS2G{Ql~hIMa54v;Zgj-I z3bNbu^<_U@W*=7sOMGBsllMHwB4Eyg@0X?K05I^3r6N#==p;L%ljCn~?G@fNd zP%gU+#@K0+RpxKKeCA-1#ZT@ONu~?(8LykJ(vv^Av*p3^dgHR~60N|A;R9e;AV({B za$^ojccHm#{j6}Mxt{*{&#}}(v$U}X7j)U$+jy3(ifs1D#Z!QhCK1b>xwndaHY7 zJL3h}j7}g!R{^`NW}^j=EwdAjZE_ovn21+VhTzBbHh8R3yJS?u!sB;({rw((Y+aC8 zyvmjt*@PlZ*on}TAY1q;%=F!(2WQU2w5o*A-O^kf53%Q(oanCh6BZFw6f&VrGPbtq zaBTbl1kTpJntWb9%t`E5i4lIqy_#5cV7cQ1%cqBG`z(trE5=(zWZu6NyWzJm5sFGx z+<5mb_w8h!w(A7~9JSQYmZ0vPLt_^TQ&UC5@b>c0kt=~#MUGCnQDzdN9#`9?Bo;Y! z&!9HYomn#Y?hjsU=!>Lo(~t*4Z_K^MRnA{f5~|bE-((T5f^pF%nx&|pVM;TYg`Lpm z8{y+!pI+GBChoSn73!)^l`DjC=dT(u#j*Zkru_B}{n13KM7~|wvK3sZ?U5~dnkQju zDIq#5n;3+aHAJh}qK!`gFF{x%;|LWWFpIXNZ3@0*_+uq@Egdv5q}CGo_XN<_AdKAW zKLJz>?=h@1GVqC98OPuVrrBbuz@pbj%I9E0I0BpAVcxVqaza7i` z-EmHP<=st#_HkQ8w$1oK_Ss!jERe5J5HE$JWfM5M#)Zu?)3(BSM1I%TzJS(VGaa2m zv^XFDXcAy|EF?FUg3%@s2$(Kp zC9s1HE$r88(@-IlnI{rv1`4SKJQADxvtex-MTR zS-?vM%h@}@?|(TgmZ1-Ln-z$H2~guujlN_VKR4WL8zL&rtKDb}CA&OUS(Wmue(yBq zMydmIC6w`b+VKg%fQq2;I|D09W;d5!3{N^+7Q3GSoWQMRZZf5p?kDB>IJ~U;0ssW_ zyvu$sHAG{EUc}GRp>{kGi$jOWY`b}potHE18*U6GXV%Dx@#r$FbwYo?lrS&&HD33Y zYK&FY&Ag0aJxKGUFEYzkol4n`6_AEe@4~gqJXJIuuD)b!tW6LVb_m+mu8XYljECse!GtHBdNM_ z`8`?ey=p=#R}BgD^f4(2`DI{h0i0*h|W%u9ma(=&ZO+QJ1mu?pQ22 z5Xw2$%nL#CzBDwFX1m>7J-n?lNjv=Az>jb_z3AS9;qjJNnmclk((0G95LV^-&~%1R- z+y0N9fI>$?>c@C{@2lk;j|kR?kMOx%FvmU+xHiM6ePL8Q#gz72{{doiaU;^Ew-lKG}9!5 zSUOWuCH6e5c&LF_!Rnm-t?r13_*ZXQ9sc~cpv0a?p*%})9oR4Qe5A1tqHV0~INi!m#Y zXU{*$(z@qy#VpzeGg&LekSfWYiA1bef}OQ#_;kenT}^%i1x(a`Ef7j;=2Jc>8Dw(c(hL@2FL1Ghi|9C?qKSdGG>9{)l^_{? za5~EsLJaqSEeV9vMdCLM376}MD6Jp4`BUwD2;=t;P;LTmr-bLuakle;ELUkaCMn|T z!1g=-1rP`yzT*{R^0R(R(3S=btRlkR;L9Y!r`B`DKcikFw|rx5IjEv2;PaHYY!ukB z*PJYFF8-m?i~1vh(CC`&UxezKb?iJ$N6JBwgpCTVW}?etn=a$n(r+V98nTuSiG9FwUYPFm-Kxlzg(2>_iGg?4SW@q+*Dli zGSo~+Sxmy2&&x(ID4NEN_pUCO19Iv6m#)>@H2S6m938SNUiIPTe1Gp;YyyL3jQdzk zquU&tlpC|(dH4BDVFTxzJ7a2}r^7zwQn1zG)$F_5-B&z^Mbd_cN?;c_LYqaOA85V9 z3YkBe>FUDzxyh_JF>R@e@b`$?#j)E%?>FKfH8YU>bDb+(6f7+!U8FCUx1W8`Wp(6x zS|{m!tF2tO_NZpTJ=qy%gA!muvlHFqVd^|oj3hCix+Nd1W9e;>#F&mM6FDSjnJGh znp(dL?OaBl3NC-nZS-*q+Ml96`;O*=<95{DsN9igycV1^Elbf_FgyW#cUPgYpiTgS zC7>EPH0CG+PGD14l7yRu@IC77J6!v9@%0&NL!{7-(k?0deX(h4JNn}^)e0YN6ZITUdu-H(#wf|@9 zs`+0cyWjjho?iLwV}}Z$YL{-_lF4-1GxgQ$TY-Y2`w9zHe|hv2O|GpKuFk%o(-glF z==7rh2UC{gWciPPp{?WVizWnLtK7MyvLqDH} z@yw3xTW!jp8l=HKg`*0m1AG}-Z>8s7>%G}5n}tPc`N=NmwQ9^x7|(M=71K^lJYNlf zmHg=R{?4{$sWD*>YWR{P7j6oCadBl5tq-?HbNSJ^(iMlH^vB8S=K^9`Gk5>}atN37 z!{9~D1u4L0q0#cGI;Ik+9S$r+n@FVnZdMvSKt_~dNt0NUGQ(RfPG#RQZ%zOk8QA?q z$3w(JauLyOgmx7IJgupgPca(`kDQOpPTxv~yp?8tbp2gwn4k4mQ_^x>`Nkfd^dh^P zJAOR89GGGM?Z73Y0M46hVZ14W@TO8}zq@P4JK!}b^sKob7RM8wNRaAgo0FDTR<$=+ zsS5nL8w%Ac<2SyNm_djldo*98HXZ;&)E(Od`Dv=o>d6(~gp9Z@t6R76hb{pjmtKQE zpIdz0Kp+wonyNQy=AaR*YQ?=F5z0r+y+&X$Y1wsXBmsj1wn#hDGsZ3HKrV_C;!fG1 zlAHL$di6Kuv6k$AXrvpT`04;JD-N5AM(K37g}JTqt)*|InZx-QpJw@B15@7c=5`UK zTvz_5L0?r(>gZg$hHj&SMYIs~4*9ATMVrJW8%vRzmugbj(4EGuZj}y-RaZ@O^m+Kk z?j$q@2M8P>c_5$)OV~_+@RtHopk?B`l|EztyhOP3qflA*9 zbL?@=JnU;;BvHzeo*%3*n!SKFL?a8~=OeV}8&x1o$z zZFQDnc--^fPZ8@HaZ=?~VyY8JjpT);h{8Of`qEYEDz&LuV|}cKX)YonwECUFMPGs3 z)>E4MiOX?kF#JX;588v@HKhXkuCIaf$9*k1wl>JBlazb-MR_aA1AG+U+pW*OJ%3`Hnn~V6(`CcJG<_R4-a%CF%F@af$Gq_bN4H~}C2$k|J&g3_dd2HE zjze!`Za$HtkB8#(@!=ZPpYA5qivHxs>{Q;pbAk1v0zif9Pljg{rIDJ7b_V`6e@y5^ zN6-ZPEJ2JUgsI!LF{?h+`Mqfpm)^T_gDT|LC+6-)y84W0lMU-qiM^*!jEE$kKdQS; z59(Fbh6;GtD%Tr&_N)qrdMB;=rItWg2AC#C_$5!+??YZWQl__o{U<9rRm<{$ko`^3 zXM$!?WmoLajgKZRAcdM${O%E9B<~;sZ4NcaQjON!L{89LDeb8mCc>tI-i_Zhi<uf|REtz7>PDHEw!MjR?d1ykMzR z(dta!ZpqIt0*lUZ9X+jzoQIINWZMHu^M2eTM)w)$F9*vs+n04&@Di?^B?|tbEN56H zJUGHWpk|;Q+`=t2)!A3Vy~Ggx$vTP8o{QJ|eChk-P}}$O)X%S3rvdX#@Kz+dHV;V` zcR)vb-1LWD*CYBMvD%Y-G4U%J1|8W_S?+0+ha@RHc+Pz5&NndBg!+vp+bFEk+*rqJ z>D@_f%AWZC;O-5pxQ9uA;MCh^tnCGk#ffrR7+lSk$~@)+hz$ynjiU*+=dc}Q{Hkbo z3@E>L;_G=6F=e6UF_fm)MolTTcF{+PWk(3U0=LjncVyoTXgMc z{ZDQ8VN`j>+$SSd%xT(hP&G%_EYQ!fgU+7%DD6s7#tHs95DZcn!SZG;%REl1G~wf4 z@9%v7izxw8Nt!0#g$-#dk~)j&?BLIpR?aew5*H1N*Hd0=mFcVYem}~&_Fn5tfBJfGbey$*g$Bq^MPtU83^gwo|&0VOfi6C8@$Vo8+0z34}k5pofzDM83$XZd3q@VgzNBeK-42uqPIytmb13Sxuadpk^{7{9yZHcnEo{_QB8@EdnhH2t6aiWc77pPaxvB*@nq&!`OW z4A2=5pvSlj+Fw;uFPFZFTf=$R6C2QIdm62KNxTE~ceo42PR*dVS;)qpT@R5vrbbKb zNt;Zi%G^2td^5R1;||$Cgl~|A8!IYCit~nda0cmC`I8c_;_r#Z*G~5K`dtYL<}d*d zAhrs~a@I5qkh2;4_3q9bl1roS$1jiB*NHp&PFw4lcH%qYLQlsN(;aMkCB29nG)F96 zMW`{z0#{~fSiP`}f`L%flz*L%)}I0x!gI5>A*^7z)?9y0xftw-9Xuc(1HYs3 zrf!oe9RWMBNMg{?wc24XX^x)S5dj^i-{0J?MT%gNP0TN1gkz32vq6bfLIJIfh&32@&SamA#b+CYFodIU9Delv-bA;ZF?;Mug3unS>zFwlhpf%f-;7-Y8ee( z%VQ#ON4?{V(N^#xoE^{zu*&PBu@-z=O+!a|rvuNEU_%B}W799vTP5>?Ims61N6mzD zknIKYXbVjqXYw6dC+wzi$SeyD=-jME(5}m#+)yteTUqz1%3uVsqVc~Y7h5*MT24hy z1-#d?qOlf{B~cNTsdU)ISZLw|&@pZrn|LqSY^cvOnq_Si zDXLDK$MCJD$rK1kDe=~aRaLcR@8;x`7Bj4?wxU#2@>hV4{JR_I)gV;FU)7g|jfQX3 zizj`eo3jESCjbCtaTx84BHai*4y)uvGrgIwNqA~;+0vFh|M_XnX&;^g$1QjAqkn)3 z&OwT?9S%j(Pd96Xm~u?AqmvnVr)%}D2iWoPm@e#W$>M_8aN-}8je}=y;kU~c9z5a7 zv((FvA}l1IvvjJ_)GWp;=KyhC*Py^k=)%E zkRj@rSZz)VQnnc(E+(%x8`x#rAXFT^RL!% zfX!1-K{t9vXwr}N7lZ48UJk**C&f&P;IFUm+_b2W3PSC8-r@y?=>%K|{hWdFmn}!5 zkoU4B2b^Br7|At4%8E&*k0uyxDLYhDPo*2=?*xM@OCq-X=Z!xCnJKPwPk#<7{SpoL z7z4rf^o8XR``$EU z92G#|h-{Xl{?v3OE8Z$0aJQ6W76gsCb$Cj?{t4@&zQDCS|iqETR)GUKAu8@q3m_TbZIb-h7TCNdU#c&O56& z7zm^CCMG9b1i~|2?Og3VKA2okvwtmPf73vIC6C8<9{!QW?L1qF4sBG2@mUy3--Emw%0RhS67 z**8TV!GDj={|~;-EFS9k-~W`TY(>akmSjsr$TCR?MP*+n3CT{jjG0RIoh%_!*+NYA zY%}%=A?swUGqM|IEMu6Z|Mz$PcjuhPxtI%cF&AUL-|y%Bem`H&cl>gAg6&_Wkt9qK zbhJ>Ha#+F`JLG?*m^5lTGg$oEbrkph&iq}4jf`Bu7_0qd(8EFRwCh={=e7;f?CVe z)||yKsOLDM5k6}qwT<`6$4Hh}f@4Lk#EB3nq>rM|rGcGe`IVL}<E80SVxsL^msSugjAIf&HgFF5;4JufaqSo%!K zcJ|L zyOKj|gCA!9V?tPrZ9~9|Iskpb=zzuAJBA9}!r%7Gpjz!s7T!j_C~s5t75sVX>}thQ z0ude?M7iVD9`cXrA@P@b%^KEf=yy}b_PXSUC^Y{x$74xC&pR6MSPM#uBB0I%FC=5Z zy+a{;=A+wH1Vz8nux$&YN6Qt@VtK0e7R-~L1OQ^0a2ZF9fyjpk$iKw*H-zMv70%-}t~!%22`KpQ2i=m;%ynSbWQ(wT9<4omIQ=syR5HVR#4y z{|)aeDh)H$C!u5OBXq=Hh7OjR4P#WYty@kLnpika%svx`V9FmLXgVbIUIcI03vg_n z&-+?OFQx^($co5Ms?J`1?9wy6+44V?)B8I_Twhb7mq78?!%Ah}w%PELvwvT%y9N|{SgX^I(L3h287+rgt84Cm2s&H*zeISP06Ym7g=tW1eBQu8b*` z@z-qUn50ZfNj$-wd2XpL|3h~B2DZ)1lEfC@X3lc+a2RtPTc;xRutKixKqL8ff5+b` zzVOFQIj-ZKav251*oJ>hAKEpo^Ft?Ao|pAs_}-lI?3v5-8@655fhC=~Ex$i(46~&U z@q?+oe@ypQ5F58pIpY80J>(v6CB3G@?!pwpmj5wHnt%@ou=U44%mkT6v;E(4CjXZx z2l0#h-2yl4`A2m$NWf3~0ZCWX!F*!B>Zy*{T<~i1t>B*OzIp4U8mdOz6M>7Svt&JPc_B&3YTp>Bs0C9xs`8a34`K> zJG`WsH{mp;V|%^EKrSPMP%!sMWTG<<#e-@#106zY{0;^s`a4l7_8x!FKwFDRhVxIc z{9|$uY+cB0`o|OsQi~)}Z#l{;?eO34Ky^9hBat$Sx{ z_U^ifb11(Km`4sGvOr#w^iMOmhEMeTJS&p=41Ny+uiK{HoLT^Gu%Ih9QVK97$*9^a z6esK?Uc)tqdM8z<3RXXBGi++L0dXTCmiRw^4&zzFYCwy$DCB{^SF_CG49DWcKoJ6`hNjZ| zzCbjH(DEhoa|05!X?!M35OZGWo|v!|Mf_- zxlV?uMv}(>E%k&u7qjTG?z|$@QZ5Yt;2{{)Z5^;iX0)Z20t1x=tgt1$0IDz@_U*>t zrx#k@2Z?UlrZ0vV-?@+0&uIOo+Y|}WUr@@alJ<~Clp)-XaM)Q(-7}HS%oAY|6T5Ie zD!cP#fUGX-5I_AQM~t14g(nD`AuWxv(GLsNd@`V)v2}@Khrt{)WqLkT#MEm^Q_a*f(>kS3 z*s$)+nR&a+&G$&$&KBZZ@@pIp9}9 zHK9SqKpvnN8!;_ZO{D#?=ThzXma9*4Q0UDNS#O?-6WLFhH7+t`$d~bdS~s@*L(+Iv zdPQSHrL%IPrFNSNy2dc-XaYBI6Amk89xO&vB+|WtdVZ@-Cx9>A;4Hm=<0xz=>azGF zun*%W=#(1bHHu|_;S{A1uX!*Szpcjpk4b9f>S~KmpQ_Gj17>3po-?9exI=moYO#2V zndb2oB2{~faj-x4y7RI*wY_rRb&;*_Bl{9CRi?}1t)qR(@)1b+ zL_4n4#^&fo)AJgFoq}v9<@7p+<%b5B1uff3WaK@AeY8`X@EdAti_M;oO3s>@ei+YD zZ_BLBuAcL5+|wyckXN^>&9oTZsb4F8?!0m4sm)`n_bpi*r~jTWcq@ABP|TR1FKd?ncLRl_z+bK}(=D4K-xxvM+gBIiTYJPxa^$GT zC{oB%{HgjowV_|t?kzv$4c5+{Qp({md>NQ;uwRg6o=V_U~ z|7a~UFZ@f)r>98c;^N5>dlIqoFag?~O@9e89g$NiALQEedr?%csHOE1iG4XWm)i9F zO9I*a*T}F^%0lf3TFWm{CZf+P zqsJSVn>aQRADY^xiC@5C(C0StK<~CQ7~>`ly950};#YrNPPp2^64=9!KFO4qxg^jd zj)g6)q7l3_R{BS^89!D}=TiyXga>!*I4n%ny0$L{rx@Q!Nsv7GSz+!}K%Pee$lH9H z4PD5U2;?7pAh1RF#TWkT*XAQ;%~{5Y-w>(+zfNyXrloLk$**igh(BK*yymb;YeM;1 zkPBqUsgswDt&0e=Ium2$wt^;O6RfUN){bx4CjG)A@I9yk)E&SIOgYgs30)?09<>dc zmQM(Xa&IUFG9n}1IyWOYmjq;+l!xX2{$q-7gcD8cDQ*>@sKQ`ObOHRSm$6^o+U%CD z_43Cg+lAz5CRZK#!;?LuVb)aBd_&Ufx_mQ~g-Dw7k-Kb8OIBu#*yLVHBK0N|Juik4 z=y_%Sj;6)A?<-~Ko^m%CeK|utFu}X1uB z3|c8xsHuB|*HF81ZO>0vg^ZE?Wg*kj1b&o0-)=G$;gBD;TAMEY2K{q1?Wa+V$kVz5 zdW10%84ttjyjoV+C#VQsyLKfF_gDiH!~=cXe($04kj1M?TZ*=|r+Q+cirC+Rf~*!V zSO>$9ArZ#i1LM&4s-&>@X0I$*j+Q-_7R}|dHqaTJh_RRz!p);RK;9!KfPQ_^f57ai z1y;W5YSmn49g|D1%@vb;^^fA3b$tLI&#GY@MEi)W|zi# zQ9BA;31Wq4l`qfq>JPZ2Y!2n zL8Px7$Ql5unRNg)`amqa8U}mYH(8eZRJ#8ZsUG2_6g=%0`W$ryuD@e0K;x%>I^f1; zb_0hv0X%vf$-iFywC*`^gSY#n5zGDRQ(vHe%Lf);f?ijFlR@7x-QZO4MFD(ls-JHm zhI&n&wWd*mXRKSYcG&$d9W&G9_aq4HzMifBVBAM_k@2J^2~*60Xqq_7J8|g zcmiAuSl_i|4ZS_1 z{11c`XmIyY%njbSgwIxwEX(UxvVS`?c{P$}EXEZ8MDhcymY)1q;J5t?q)%I-#0o9F znadDr{>V9>3Wi*#tdfj6k8frXWqQUau9o#}p1!3CWG`N+D=H^)VzZB$JMkzT2#lD6 zU)|ypizm5u0!eUquTannYjRGWBEC&xTeE+b#V|bEjJcoL?#2(11bRzdvp{y5(>vw( zQLvZ);13VBSxXn$@d6Li%=GmLs8CUnZXcd_;7>V7IPTHewvjrtUNKWSa4uUh?=uIA zi+W3jBt@Sqm1L&G{gK^htg4|kykXi${88JR-;RUJ+pIxXPS18^%dl1X*@wB}#~3q4 zJ!24?y39_v*Lzsk4SGwTcVu#!5gm^5y$jc!YjX_-Xs<@_Yh6}K(GsSc;EFfzDI>`i@0GuLW%O`lRWDd>wOMNZb?S`a;$*$qd7XZEL?NbJWp#2XiO*o})ofm$o zC|(7Kk<{_0U9~d@@Wx&q)q5KsqBAiFB zV$f3JMxHkao3`O#=zt&z3dOyHc?C{mhs5m3@7y<0dF+zL+O zw|9TT3jewuqHbT98Nq?6o!Q_S)H?05M{Cf(h4?jlAR4x>6eq?-5yin%0q6vVlc9VF z@WY#*u?q{F^j6%LU2STOcxL#nYV^ZRg_)d8!Rf-Fu{K9XCp%4J`)!bC2&3J6Tm$+| z<=ArVp|4{_#nk(q;+AO{Wa?3fhbC?$F^2>RB<;i>fRji=xCeot^VdH39=GF?Id=!i z=l_165t{?FU9Krvd(tb3OC#rKH%VNXP{G?B-!<#gbvA=Q{q0Zy(VGIz<7Bxh^;ls(}_D8k5_i zmN)BInjfsC-)dcZ74q%!lV6|BxD11S3++k9Wr2f_4tDo+ry8w{PEr4~NA?oIo^}wq zLyYscBa8#Fx(WA#P&!#Pb#?nN+$;kvhwP3KE^-$Xi)lAOck3!n(iCxn?YxeS<5khc z6;JaYI~Xb`T~nF2d*UCqDzb!a0Yp?ZZAxevM3XpNgxiTbztQ5D(?mSl^$ZOeTD_Y5 z0aDktD9;sS%aEXiFKTgecaCos-b$G~;FOx}s9Dds!5WR&{>=k*6#d!lt97!_i%+l5 ze7r?R);+z_jEkV_jy&@cW$Jj4zrYzbFhY*^F+yZ-jUTGyKE{}W;?%pmS9dMy} zcuFe>J=NSU(_JVvrC?a%6D;yjtNw*YtF5UDb1U;79#$i@wXrbnjs;@WhER3pQrLD? z6@nm{i_8x`Z=In@X1YE-%=OAu>|nsF#XRQQ{saEWhQ=Sp{CK>kOMPI8vxOkbz0vQN zzv$Gw)%+%7?!YK3kL@jOetLk~Z5X49P2<{Oj>Oeu2%9uIrD$xwo$GEmQ7E`PJ#dd* zR*x0+dFH+^$2Up9RBI1%pccP!DR%_J#W*d{)OUu zXh7nStyXK%_hT6|Rx66Oa)?`EI*m1ys*h?^&V)OMIuVCbJ``g&0a+J+rB9G*nrPY} zyJX+zl)Duwr(J$lhc5)rG;fS9`;2E!{bL#!n}(z$S`lsH)Z`&wsFF3(;4g&CgnfRIV}G+;JTufy(m?LnO^n4`6%EiMCj&abwy9MKbdy7E zGO>=H{DJmAPkqm_ZJ0P@&T8vj^8WPX%}!jZ9;|-)#P?7Dr8I~OZzG6iPtU~C(x@T@PeU5>2?h#>YMzQ7*qY| zZmV>8)R}EcjO|I<{gunBRcPynW?Mwg@LXz8_v#oObaFe)z;uIBDN`Q;SHf-}G3(8E zpeX^{N>tkVNaD5_(>_(zx%fxTM=e;GZ9?5~#2q*L2kHzM zqB6Gq(vQ;=h}k@=Z_6JklosG&<-TIJg}g^rBl^9ADpAIE$Q~q2ygMCUAE`@5TaM#X zZ$DtYN7$Lt-@8wqzo&As^}0psBLFF>Wwd{#z!yDY)Y@&j8X0F>no|Og4XVoPVldvj zSJ_X>y`N=WLQAc-2QD|RS~qoL=U`Rbp~;q0{0NmC(yNJ8DtpNRY@YCWr-778Krfb( zFkOkF(h!&nadU9ub0$^)VT;#2_}T6Ou5Zn*1#b!`$-xjED#Y_26I-xS6!|*Ddooy5 zTwkc*iss9o*5OH1@fTUXH`5F9_G^GL2pDMpq#${G+$}{MQ0fN17^dZW+CB74=%$6J)}7Rxv>VR6q!~SWhOe#|jWw|kd6jgk^n=eA!f&a@I-`u0 znU~^fKhZyrW;zbkHl5`GWYa@{j#O1NeT+Hy%8*jXMj9kI$d3O>G(6Md_{?3`9L0yn zYR@PxVFr(zg7&vxA$udnDe^#*pn*1UJma?l^Kd=5f)YY`@1&rJHEs8`*>Qu(43Xc` zTGQNv!C}oDN1~5)t#r&B#>Jm9Fyf-IuW7oB6U5#=ZM|=I?+(Z2n|drZ)&0Ol)<(*c z8vil1M>Ld;iYJc7S}dU45GffDKFaG@t1J}m;?#iN*D|TQucSM~YBKj+D^`E-0Y}+R zeU$rEq+nP-RE5Gd=g?VrJ?B^MZRUhw1~`{6(~+>WQ>ww4{<|uSn#QcyaWaoyN?|6V zoJjwewpyT~5Z@*Nd&35Hv>V53dZ{oQQNb0kI4YEMBK2DrmH5~Y{(^|-X)^f1nN2H4OEt1h?L$C z=cbt?OBsLoc8wIbqwt58QGOsEj3$1U9BVT~eo&npJyO8Q1qQrQg;@iA0mAvuj4^R` zf;QNrcE-ML#;4Lw{@KhNbN@+1&ZVOwnMOln3rBRX_@Ac5oj!0PH^~MsqAv74wbZCA zpOayQT!RNX`rbEgla)=L37Ao%(HOsAKpG1LRN?X2NR_d}*>!a-p96<;Va*LPGAGM& z;o;l}(KCo~cp~&BNq~?Ad@ZIFX(AObLz56Y!RUyxvm+GCUi-TD?&)j!&*^Cz;H!-n ziZafKK>C7BF5sJse&vsb!+(VNq#zP2L9XCwlXJd-n0u8@#SM`+H~$=u0hj}oDrY;_ z3G>8>);ZkED2l8IioJ7D9JL`&kfUi*;6w-X!uPY!LHuo}NRM|MqYthsR@a;U>Qnmi zK4VdTKA z=6H@u5?sZzGDzOk2U9K$K~rsMC)((rbp;SH^`gcvVdp&s(l%aMjjs+md{yCd{3<2S zO6}k&J{grHUM9Z+!lIX!SGSIcoe<+s>w}e)MBBF5`(ssLEwPOyExV3RxldxuECn1% ze}kVM>AY_*MaOpkW3oFYC;=uzo{f2A=-3=BHLetBqrI(up$vLkI=){SR-%9M1$FE8 zpKee8f#puL=6PT8P$YqRoyHBG-ox?d6OX-c%`XwWnl0R&`$sXIk8Ic0ZYk^%@%T(QFMD_v>A-J%(@7V{2C&=g~*s z2t$z(Sgg3-B&Ur<{picqA=ihnYg2uKGFhlrpqZPx$Xt0~{%D+_=A*l5f=V4~Y>UTb zaT}%>$aRxg+spn!-PoOiyrBkIS|A;XdOlK5StUpQ{-!E^B0t{cd&YWI+e}vU0KcN9 z!fX;xg@EQsjsLiETB&zwE+n=dCyzxog6OB-VgTu>6H4Np**fop-99@MH=TLu`{lI7 zq_@nhZ7k5i1>hdh(VCB9XAC*(Ho!YoW=+*fN)9u`Chqqp;GY+tWZC=v`qh#YW)xO? zxYbV*?q9m{DIiv61@jc%16L6S zcj?@iJ?saI&Pg&9ow3mR_c6JtrAdc|7=ooAIst91J5X)NDe4_YuYFmMUmQ0Hws@Gl z-NuaH{F*I~i4A|db*ZZ^+&ii2H8ZtiF`&>ZnevFr7S^|b#)5i4SOL5aCbf*X{{`RG z^MSL)tgW)uh3T3Ec}AUXP*k?P8qenS&H9M?A8sEaEFC4=?++g654xQFJv{994?i5)i_0a4WIQ_ zP5=~ndX?#~(9u9jYJIRy{xY)q*qPpLj=i4#i=XGBT2ev4}s%?yes*%2g)qj*2$aL1gG^ zB=nH}BJz;R1ReVgMDA%Qt~M_XT{cqXH{N*vr}b?C%V=e`*BKl}O5Q%Mhv6&#n0(;V zrN9+N`5-+Ue-hRuj5o?yx}V1Jjq~G}edea1CfHIEV0;k3BybwhZU2vn5J440z0p;( zGZhJ`74{LZg?gkpaOeeoZoVR!+rKXH*Q31JOk!a26=V=^^qZoGwir|{94ML7^jlw8 zZaJzhTlCuJn}aA(fShv_LkF1MhTU51EMMpKdz4vgJHn%XX*=VZRqWEl*-!wGnwO^- zEPb@W?IbHcXe=)tcvJ(w_IyNn=sf6^CYv1W%@^RamfvWz7Yj;?L3> zd9Wg*gK?ty5?eGwlcq5Sm7qMn{REYCW>JBC#!-o$Rx#2SqMN(esg)=y3aR8KQZd zsN}?=W6hfu=?O3TU&eLaUFsX~rv<_4HYXC8oED1{-)hi}UG&_p55}k|U9fl1=OoZV z7eL$FUs8u(J{jmKa?8y?vxDvEpK30+Mmmjoq00O$h~@`(!VTRVrkgJQXueZ|eP{~x zIjVYDpsXk%!2mGia#quN_(HWJ7s zdL=^(a-X8P2;-&{;Xv;edUNa>MsgIyJqq0uc=?ITPgqjIRugu*tV>LI4n=%g78oK& zx|auQC|d@sB;{{q!Dp0$uQa?Ey5@u_-$M}Kc<}V95#@+TM&l7t2Racw7HA(jJk%qC z9QK5D@3d5^WQf#JY7^h7|ES+REP%|z$y*%GToZsL8M`8mT{*l7ee&JiefP%i-mG;D zFr4t~^Akto;qL@8A^hWQR>62Dag%mbTicj)Yre^kmY1F`#vJ#b?#3&|Tzacw5X`^8 zt8EMOK@&kVS-Pg`=O8(trDCg^Bk)Wy^PV^XX+2xVt)z7EZWMz0u{jXz_<`!59aa=Rk!A^#V#1;6;EaI$0*?J4lhpD2=)`e zqMIPz23QQL4#H6Ut$S{5o&`BYGv~?=Ed#G-&O6LIIAC>p9_Om@{9{tNcaNj!;9C&K zor1}`^ckJ0c39Pj`8$<(-1prsGbJqbp6b%$(Yi;GVq|1nHAWutl(evQw1|iXv(eb|?(ub-yp>#~;Bdw2DzndFo2r3oEk2nm@6JVV$?ppQSzRO-^!MX18*==V&7-k{ zNNcodyQ%H9C_V0Jytypv-$AAeC(P|>SQ4g(9@GbCV_c+QtSL@k!s>va`_m2}u$}9( zMfm2M(;suy`&o)cpIcy{$^$);B)J{H)rBFZezAq8Nv2_;-FZ(v(`Gztunwvr>HS_O z>*d^|m%Hk|AN~2h_8_2}&xEt3f^_IpmK{6#G*AR8Q6Rf0cPrSU^4ILlgCMLi(7th} zPezVtjv@sh;!*gKBpMs9SDho}nodA*wlTq49U z@X|TMwzH0^6_&~bx&%4qxFMhog*0e*mOdI3e9IO?X3%I1UmJkz)*UKi6P zvmc(UJ<`*BQlvRzCilP;kF2TaE;IPLZhwEhXolVN_Egllj#}+b){D==Gg^Z(v-f)g z26+Ww2EUKx>=f?>?i3d@G`)BH3IXv(*%It5m}Up>NyM!`Mdn-V86A76$mc927XWl8 z=8Y;KIGT6f3>6^TjLobI`3w#AH9h+LUcT5N^fVNFkY7hgIhbCXvX5Lq&#CL@>osNc zMZTE7DJ*l8{?~C=YKQPB%nzLt`+x*N5#yhe4PNG4xm%kRD6~Co@^S2Z6nxoO(NAV_ zU)3-B?1e9ki}dQ`8B}@Nydot?;EkH1xCycMul3=p=z>ha22n!G)z#@HZ__t>BC9%; z?=gODvZ0^Xa?X^_IrQ>4Iuv^bR&|-UDVQ1>OAQ!(KT-c_%Eq2(Ruw>xc3H8To6h1V z-h)F-&iJH`h~97-ZusgD2FAf&BM92kWlpnRvK0^feF-c1+ z`%Qi~O#9xCJqitM>ZQ*dM5Yf8;{DDjCA>6(-zl`)$GHfBX3oZ(-wIYZ;R-~N+tK#F zp&Wq5RR|J}W7iS4Yn)W@1yHa3#)->ed8UTBHM_`RE|Kz%xxQC+jUWv+9XfvNmlXX~Ok!0_-G=*Ls?Eyat)Woq@;-#ArK3T!9CxdU|;B_F?4I z&nR8iW7;Kd#h{`ZHgRG)ijx>dkWCMN#&oNnw6`LG)v zXSmdr=M!J^eS9?uiyb;2dMm5V)^$as8snOx*-3CVsGP>4-YEp9ewWUg$f@h-Wl0cH zO}^;AHT%|FqPe1P)H)Jm@u%y-t=b?n+7yu4a0@Tcx$r@(zM&@q{b%7jH|-<(HE6SH zm61LKo(P6r1O4~6B3FS2F7q)Xj$un0WEj3jZDRj1jTHlHivZo__x~sk|J#fC-~X|n z3^9a=&VvzJ1(Koipj3oMra|WY$=DeE2Twbd*qi<+412c+J3L$f+I1!n*(7vsFA zy?PIKvd}w_@XN^XgZBSewC-Ii4p)7nopgUM>hhn)qwe!bu5aas(cnCOl+#BPZloUP zYwi-ivWx{?5GkY!Q6M|WzNYBIDymVOy-eyuTzq08h(|2_B4O73>&Je3LM;w$B0#a*_8_dki@$|gFs3wW&reauY z`@x@MF{CuTP1_UjNp_%4Zw15R=2=qlT)=r)x#V4w(hDAIo1WHDk-toa%EMlSiX(Y4 z8Zd4sane-kkQ5nQ4L&lU;dF+nYgw|9_bG>r0W)C;l86lp&^yJD!}UVW?egjb_cCQ3KS4>$4-_9BtoL;?9!gf;0kHa3HKO<_{*G8waRK}@M$KEt!D{es$0;iMvb z4nY}*V5MDb9oJTL@d^Lt`EC>aaLjer+ZX+Q-WntRLn{=o+>bVsN9$e*8@Hmo!AFc- z@Dk>aD3JTY#3%VET5onSR2%cbB>#P3Yv6yu;?v?Tc`Fx)`%m8c$;wrl)P=MizSzLZ zl#Wx1KP)>dP91=GHU;v?&4HNE!!bektm8WIzihYJnEd5`DN!+@Uk}vQ#82v;@`8y# zp0+%@On52JZ!>|ojaMZI@pfjLC1`QfEC$VHO&?K1jJ`0?@VX+18A&LN+S@oyY)rhW z+_$uhY|&3EB=z}&>qM09V+g02Z!q{G*mI>sDiNQzivz!DNT!3Q6`_&PSy;_b8G#ws zy#!{_7e%Gk+oooX+B|y03LTP!L12S2I@|+Li}W=YWo<;ZgSZm>83U_3m3No}FiR{9 zKlw4JK<}cWd2h%qrJf;^Lxa(6hXJR@Xs^MtpR3DRMScfqe;Fh_Yo=|BG^-!%GxBpP zw$cdARrOD7waWi`J_^onw?!43*_RfNfxjPCloWf=0#W(V z_YS}bk>eX5=w~4OK(EFRP*XO=zl6sjWl7PiJ*sujxA!T_cN6;aV-u{ImsQ?$U*)=R z$Pxf9oun{RS)nt2#1=9J(3$t30gqvdS2V2xA z^5dbTK8iB_p8bU5p|StP4-TP&X#(u6Nd<+>d@3+<{9WVIBqSfCIj$7V!@5 zXsk`Q6W8)iboP<2Z?UlR%8L7fLP?5tMZ>^N9Qps&+iNv3OK&U2+Rhj9R2BwT;(hC(ymgySngi zbM714gGR{q@9`cjO>gZalHw;?tp=;zNg8;X_so0o$QH?-9Q z6}#fyflWMOFRrc`FBQt6dO$NvWH3bsF&lQp$Ha>|ct%Jou0F#2)vnhlY<7#pq7P$g_&>WRUstxzFd=pnv9C%1G{PZQJ9PjCqzj+DH9EGA& zE93{QJ04yPqsrDu|OX~yyzK_M?}943{rx!Oxk=j zp)Kqa_i6kxRp04_%F1`NHrO6a#MUFQ|TI$MauE( z@Mroq^}zJJZa9J>6GRdhe~)7zqRWxxY61h)$}+zkiyp`Wyy*xp7#(zrUWlENZmMgr zRurwOhXqQ>RI8YZsVghI!e|*puyriFb-qENlUUImVjQuDndhY^$4|J_H9kH$beTMS zA=v%W*<;(j>&e;i;)#~!Hs`H|eFg$U{X%ARY{<9ixoU8%4PMUHEW;tp$naT?ZTq97 z%ga9ar^+jXN!;7jw)?<%(;UkmTSW0NtOWug1j+B?FV`Nkhe-%OzFKtA>e1RMe{ohQ zH$`jFqx-+iR6evyE$KOhs??h+Awc6?wQ6pbq0i)h>QzY?C6BiAlP+*|kYDH{yazzQ zC2{bX2=c>v`ez!K8j7PUJ#4l6=ZLEkJNau%`?ut^xrgwU`5w$3xZD=(X>%y#T;@w| z%FfWX(3i*x`>P*g&6cKjQH`E&vusYFQu(>#8r5M!48CaqT=#yeRQ~5q+s4B_&1^MM z-*a5>-QNL+uSfblKG@tMPfWE#MiyvFOOw$VgEblS8R#a=4Vl`{;O<|i2uequBAPjg zqjhCR+eGoGi*pp2M{(>H+Sus7@9u z_jRz~*@D_;x@>$`#0#@C)BCTqB9B2vMSmEtbwz+_mR3Wge(x)f1YKR)!&XYPThaZd zbDp19!lZ&V!8h$ng6iI6SqaAq*Ay_mzGmtGRww~y+_xUkF)<;cQsTi#%)UMvbp=I;BJUG&Kq#7F%)ySAOH~#APQ~Z#o7AOU|!!W>iD9&p{gY7k6&K2fA>@z6)Cu-pkbldZqY>1&)`XnHFw}1Dsem?});x zwoVdDkGArC)pDgHI-herk@vV` z;@+{NG+9=O2#nBjo9hycjixgy-UAE-6kjUJso%q{>{`WK+7*$@_U6bOYnIOGHL`1m zJFlXQlH>-eaM7hR@9j;*2ddnvM=CZ(`jir&f^?K5C6w4V)6GN46Acu-Me%|?X;oHk z_L}Cn;>uo8cckSXt`z{jJqH2NtCgTPAlD}dK0@X!?3|}5nFNqaxEStSWZTH!J0<0)Zyax#5M*5X_+COi5@7OUm$cJ~#z4QLLs2=;Wy5qr;2xj~` z&79(~_|Zb2Vu1$N*DDKw+6UfIa&QMWB8^E72L)*R%r zGW#Wid`^}wbo2E=)N1L)+ca*Hpr3C;?4-?u@!?#Iy2q;r^b&9B>-DM8u57tySa|Yd z?-)QCH0T&tJJG!FSfeEdO?YL172GyG{gm}aPTH4NpOcvl#`~#)h-g_lSw}AJlDGyT4;^Rt8~C=IsLNS3^&OrkC>!qj_Q(uaQc( zWw~|1rHS&YOAA=1R)TLSrU%Hq22Uc-QJiQ7o`4K%^hHZthGDkmibIfyp_&H=)N%U~ zuOnN}$O+2!!hIqOzmRM9OQdKjVdJfuU%lv^{S{|J$KK`L@R_DYc^g}b-X}YV_~_Fg zlzbda!pHq!>4!PNP9pG9v>oe^#t*j+89CO|o!UzyNLh;5%zg9IcM2uP5Q@vcpM+g= zQNJTQPO!P^#L~EaiFq<-)j-p6iG_FB+M%)K+E@C?Yvtc01?7Xxw~VhLVQ+R0*{S-V zzOI+~L(kXPEWy%GT3~f_xPOO%P2aDVOg2&Lw_xb@mvf#uoK@gEY>H^=KdXE#krPrLCgH_CcqE#`cbdQws-Zu%SB0rF_&UH@ND+j>BhS=DG%iEn- zj$Hia23u`_+l*BM5rJ0Xr1);HjE!537do`QR61Gjmh-HP!_}oPpmXQ(2H-R&l!U=d z>KOA=RWftIl9$`F%=+BU`h!1TuG!`cY>A-QkK-5(I1gao<&2szRXF#LUg&ZhAtTry z-9H?3siSje2=o|OkP?A|b2-C#*F%0wso&h#qgUNe$x*pCW9gvW!Sl5rt%<%rB``i0 zsdZFXln_MUx9O#&fu}5M%McEj zp(J()IgaZGy4EO2zplHxDOJ5OU}yDRpR0m1QU`3OFPZeI?ALgR$D&|ag4J_Oq2R-J zMeLpGk(mqqYp`pDCx~SPFPJOdOHs*;^fG66eKh>+7ow(ogtvolz|RK@F_D{EcA#@zTW}Sx^5+q^9otLy5~UUpeayta64Fd z2Nv0ib22xtY0BQ}*CuD6d3@{NbN(7&>c12;k1CP7iU2DPP6=_HS#W?ClHda7nmuUV z$9YkYmTN;d7B+y^%PY2@|Jz=0@qhpOiuM1$z3QjZ+dS}A5qFPy^3Ec`t4Cd^7<;ot z?Qe5a^3#~U6&!wY%~{*OakyKywJr{aVBlqYKl$QC`HM%B_)U}^Fo!1!>J$Xwrs(M zTDknoM)u2tBH>G35FOr~)p3#zLnGNOx{|X*V2??)@-W9Z7sn(2vO`OlN;s(G6(z3Rpz{EvHmDrbjY$rZ znsqv`0jq|a)dA0|P5Wz!r4*E1zQd1E@1+L2RjK;u3vB7N#@GJ3HAXkA9aOycuN&VD;hNP(Au*KX@=CB=Q2A>GrCDZonzTV@?_XW$X6L_dFoOXZJo z1{bVjTmUSOZN;20MCF=+JZ)$OeshhJinIV9vCT>=W?rjgHo>iH7dYf$7rP4kP;iGq zEnuH6p?Dr@b=F*q$1K|!20ZoO|6p4L2Zdx=LWu=U?*Ux$ne-J!$g707it6us{usR z7WfQQ*-2;QUA%2Q-Jm~Qb(ScZwrf8dvLo)7^X4cEuf(;9Nu#C|5&POe#&mVO&a>w3E3vJup6ME8YxkVCWcl8bM}z;8LuIDYBBQ1kue z%YZC5moz<*x3@4Ev`~s9jvLE{o7?F|2%eE_`r($x9v84O@D{xW3eTo zD^ON6(=4dEb9#WV^ay+S`ij4H!A|qIso1kdnQxI;o`AWV1u=Ag`e&qO*zV4vbt(x& ze%Kb(lOq&U;=}6FB=CA>xLP<+S;{Mrs zW2n{c5aqWHSAli610`QX`ah=A!=Fi1i_ETP;Sc;)op0pc@>Z^&cfLug|A_p97=rT0COqHIhytK#Gx8ebygb)L+!%ReS` zpahMNyR(# ze4U<_GLowNo-fg-3<>4PI+*KTdH81})iy5_fWIJ0s9)IX#3(eekNk#mXIxx!ct;U6 z85~&BjvUurx}XND3F%}HRYR7Yp1*4v!cP^z713^=lxW+Nsu#G!mx-1*qie59jxLAE zdF;eTPcP~nojz45sbP1V3Uk~**CcJi3HrAPP!Wpl!z=GgrV7KJmP{iwF!w1wq>}lP z4*5oHbt1M?^HH^z6L;3wH{_=-jaTeVPxtNK$hYGj6N&`3`0r2k!glBtC`^tQr`vDL zgLOYC&DbIDx9|!e*`)D8e1{e^yX)q)0a`tyMzybHe$WHA>GPNU&PpCth_XFDmwee|x;%WJn=gtBa z`i-?MjV}%yurX9|{?{-0+VwqBU%p)SYbi93FAw$(g!)fclZP6VV$l5BXNhs`hjW^w z?ffi{w8xFk9u5Xjf4S7sp6-bI2dk{2O5NBg`Gxw;t%XMLj`Y)hWRO-?4ck(lF_sfwA;t=x|76isUjU3T$O+*nEB8>de*g*byb zmbb~!Q~lNNo{V1_v&a5igFerjD(M2(jK@Q!gRysr74J5$wOm$vbOo_j+|8}RRg#~0 zwh=0S-=UHSpT~4?C(^Hmx5K#0I7j=QRLH)F3pO-rPY+oAqQzB)xMv!)CWn#QoBUN2 zlo8aMiD!eHK};{2C2S%Y=g;v2al7xUMWfvpm{+n~=x1OKg~Y3-=)2fz%gyO|>|}LY zCB+5zL-D}&Yt{3h)K7e~znJM-B7a`r4fXaml7Tv7CrjG!)v&M(+}yc%`}M5r$zbnLCxK@KnUri=LuSkrX@gD=UQh%@(fz=_cMq|Nik zgCh_%+L__KPh!IxBDdM;*ru6jiNB!qoLT0%_2xfdT-QG@5x^c@082sF;c5Kj>2Bbb zl`!76AyZG163n|lB*pL)d&9u7J!S{Vkf=evvoF{U49m#5$_chsc+lUJnHoH z+%iL`Dj6~V6#E`%c0K8Al8e}I5!EDIlJG=HEb=%)}XmE8cb zmt?1myFy6Y7P;96!KI$-dBLOTxYP^;v=2{ z?@)^gDO!JH zcT*xEa0*oMqo(A#^>oTqMaeHG%DPYxaBG(?X{6C>)2m1iNqXOGMuN}dH>CjM$-f{@ znyFfe-;>KK(^f{J^4etzv1hNoxyMKyjAt7m8Ek$gt`IO?Fb?`qeNf^@uJRI@i2fRu zp08J@>AMQ+59_sBU#JF~bzs6^Y^W5E!3ay5W6LH73`3Zami2>5+Gc@HpA^HH7P8niisBVEOoZe31g^Uh z+ndC%V;ag)7iscMCm-ln5YE%{wKK4for+=Z=1?R~EPXgf|ps3eRUz zs@vvnP_9=XxhS=trje4vHBIg!H%;)_uo~YhGA?}J3+rSqO&KSwlkGJp_QX&d^Em_Up4^kxy#%?zQzMuo8-&0;M1Sl7G=yX`*Y*UF) zchCq)J&rES9X2OAT7!)oQ~PZX*9=pw`IcLBwX~Z9s`)iXh>ztTJ_FH!3T}dBqOVq; z*=Z%3br9B=3v(0 zN>kEd^W-!l?hysHFp!kdD#>i!fE`wK^ge&l<7@MqI$~4(Hef&PmG_*a#BNP_@PIyP zkV2lj_BF-jv6N4(bi&uu2GWZy{^F9PmB@oUp>-8|g!)~Pgg+_UO(v!D-z^_d+al+? z-vqKf#JqUgFc5jR&c<)MKKRpw-WOZFLc5H*%s+|O8C<)BKR)vsfXZ{_roI02zPzYn zz>M?ey^lYS8s9#xx7UInO}wX=d{+kuUfQsd=jyGGvIF66(&jiJXLS>K`HLL-XMbED zD7PDNA-d2c=_13mY9p+A&AI9kPnVqD7Yiw`bKE9P#7?x55@XQBdW)LhH2Ej_`p2!g z&OH5r(ClcB*p-1?)22Xb!>>S$_=u|YfzXSsKZzRol?nMVj{ktlrHs9eq;q}+Qy=+w zCC*x|`#)J5`Eo)nH8Gi5^F9Kk#9sy`_8wM`L7mZ2hgrVf1~0BH#VTJsee-LRLHcyp z-5CLbJl42Sny9L%^4eLsDqn$CE9A7p%$ zSziHpkTbin|9Mu*{8ziq_FX2FBT|jxMAb(?yYYMn++fPF?aA4WF_D~#_^<0KGuf;a zg%{?W651XrOD?+gbTPejJoElzAF2seGX8SbnD=8ec_yRA{bzRYMxRFuEgsB9(>bXx zImtn~m2IFn5y$}er-|YWXJ5p8x;Unn=}vgwXpYVu1btUPRVdtX&C zTN!3#2vm{Y-ajKYQKW}YT#W_a7Lsf84jT4o%MqxcYp(5zC?~n%oN~DQmU!K^cdqov z&OzLypV(dhhK9GaUn{b!H<-U-)JB5G3vx97f*!S)LQU#}OEd2Z0xEehQG6x#n7<8N zT~y<3P=+{sv3;fQS(I|p@hWa71OTEps$G8iNF(vYzIj+Q{BIwC_2}g?%8~qzR zf)Ym@Zo>;|EBnj?(Gi-rMY=M)uO~VkB~!IMAkVxe--c5x9l&Q63MPovc8NksC{2^S z1Au2nAz}O|{8T-f*-0)^Ulk8}uI29h*kgZfmmFZMWhAbDPE5@3$p*}cJ_IA0dIHZJ zj5$LSx=N|X^<}{MleXlYHX=k>N6^)gWYwb>ZT_hl6E{Epc5M!G!$0PUiOm8|k6EM| zq*T|tyt0NLEWn>^EIbeHEt#R`L7nl_jtlBr-0r7SIopUkab)_TVe?B7eKDJ{yMj(+ zkjC~0Z&Wviur2IVpQRNSD01=R@NAw?DkDAU6nd&cWJ3Haq(F(_JCrDi9A8gb{a)%YbqnosBcf+k;HUe! zB#W22C!EL(Vg~uoNe)Dzn?n#H6Q?^W!>DFcDVTD4-_mEP{>GIfNZNr~VM3X!x|f@! zmJNAIFAVYS<+Gc-gHfV`6iOt~ z=yFn^pGskcw7;ME*SB{hB=ygJSjjqsMu4k;1Bg4_qQOGkbtSzrn5n* zP^3m;uoC;6(W1LO1H0gR=wLXy{w3uOy75~}&x5yh21|%|`%A_L(#ePW#ggZQE0;n8 zJk}DA`cclPVIauZ6JW}-J#8Yw+OgG?(oWWI>J9Oc!!KYTXEr*9ln!~en0M5?Ymc6g z_Q-e&p0wpMxKqnbG0;|3G$1=<^^5f6ruO0bwi^L?2ND4SfvzwGoq4G?=F;n|eRt|5 zZf%8b;IxR37Qaf#BLriFD0>#!R-+g$2e)i3%j+z0gFOBgAhyL)7u=jEaQ3Oq_T)m? znl_9cm~>5ZmY2C-t_Vrf)e!5Q4>*r~igxy-T##a}0mOa+5)w}h_s_Xqy`l3h(+sj2 zwLUs&UiP>af@XbH9+TIAnFao#5I(?(+i@ov=n^U_>NSJJ&VGMX#ne2yeCzAtj^2yH zr`BFOwLz7u-1}Zcm-!4FK9tTK-j3z=U8S&O2wGk*DtOXUq?bARitM(`>z|}*iaz!)u{0i-r)wWre^0nU;?mI0)5xkwj>RVC(RIx)TCwiM; zcoBlvwOtl&_Z_9LWp6zD3o8DtbeZ{Z7&33UlKBr`;p0T$sDp{i(VVc!n!8MY`!Tl9 zr&;PgkbQ1jF<0tKUq)MEt9Q2SiAm)6p+}y~>*oh-V;Jq-&c65@$hvJ2WSqIWmb_6z zdW3$S#EWz#V!xY|+s=!<&dP{;v^^;mDew2qksTrfu_eB2p<3=H6hF_(jrpWo>N=ik z1+)2z@KE^UT9go_Z*WuCtU>iyiFyUyewzj-`r5C;&PcV80Ew~2yIC{;UA(Q>N8>K% z%VqCfJPfY4FSll}cNiCjW_ayj@?pxw0Gey5DZg=6FSG$#zDYyDi~D z6EO~_S>Ks$Sxs{9|KzSOEXi}#w1Md8a5ny>{7B)~%Jc<~rMiTbJ5O)M+Bp`p3S)Y9 z67j;1Ps$XU!Rni%B=6~V^&sme&9wA_Pks3_CM$uEdu>Nf)0GqsuRI@E8A>#EnVZ|~ zrNdpt3aQ#x&)%w$q+T<;S0|m;UTF#8;^!+tbU!S|`0R>H^nQ8r7Cw3?t1drWeOF|) zcJi;gwxOEhN9Z@erORKay4Kef-uKEW#i+UXlG>I`ug1|!pCK%7D;R+xV^ZhG zJA=zzz8?~wlgY=z!{^alg{JaqAMzhphP-3-?@rM_=X%tz0duoQa-I}l^Nhuwp-Uj* zx~MeHQY$#nLe!Z*PmSI}jh;J6i+7JE@ILeA?v0P7Z2;Mt(AXWK3qiGZB?)M$9))n- zPSZ9_EvhR2`6|mua#S^p&;uVi^mc@4dfKetqs?&XmX8 zZZpbWpMvjz*0ij&53tXy&xy90+EQ)rTP9JN9G|cxioNbo=wi5zbIfy)AD~~Hra5Mt z5t(Mhg__gXS8&O-%d?^>vLX@dPeP-XJTJ?NdW9!?+kZi7Qj$w(7XVLbQhGLjUzvmE zPu2Zremu9;q70-i`6TD6?3mMxiAf1dLnu*chik(h5lD5BAN}R8`P9idM|0&(%R&+! z2mknExJp!7cB)ReM_%T3@r8w#kW7F@(Lxaer4niYtkIEJGbf(YtfKfe9Rnuo>?Ra6 z=$q5;`lm^|KobHDaAq8|dqXZH$j7sCUy1qAl(o9b)SeRw+3=$}btxz=+bsJLnm3EEiC}=SAHAY+J?wJ$~ z=H>43awq@U*+Un<0v2z*ZYx~DxaH)@irAtSCOQMksR&)TG3+u77#pHp_R3J*4JWxP zD9G*E#2a1?Ob*lN61b%UL+kKPE!Euyy(ND=4TWDm@+z}r>CZG|yu7aQXIu`;p>1)J z=~DLVMR{2*SY>mlKg1*K3H#=oVwkjiSjmi~fUB|Twaoo@Z2O<}K3}U{%b(7N7U3rz z&`q@#@F#&rK(_lIOgVfBKT&Pi`Wx#6m{q$4#eNwOh~uvu)013Iej9o7VHk_H$Z;(8 zPP_d5uw7U(&8TC@{AZ;YqYyvlYnm zc{aSlbq4$#v{!!Dt*AGx@8m8#bVO^s78@z~j(+UlO_ktY$wESSf9F3QV1AEljtW>C zD(;*)N~_s8xoak6Rv#QCn-AOp`q@C+-3{)0sYfd^y_ZcDMYtIrpNyWM()M4g5p88N zJVm++szRC99^C(-==h#^_?iY?fTOEd)7?&^s3&Wp$JM!JyA_GCmtp|N&(Qz)IYKYi zZ@wZLkV9ilu!5`KyY1zp4?>_k4vLzdF9gyI3S;LcHemPeTL{-|$CuuZFx>682miu$ z!6+9zyLLd@$4R*HxNe)9&py?kVqbD-FQ0kSO>b;+`E9wdY?1-$Y*~~U^oN0P>VR7QCG)iFBOnkhf0y!oh z@wv~&yq=J>mqTvae44&K_!b|t(;PS%R^y8G-qz&t-@vT0rOh9?0Rgw%6wcQqjtI>-JsJ|flM(v}Far2q_ z`Df1E3wHIOXGHl!EnFTEICU{jYeM5%C7#=aB{io%`Z~o(X?#CE@-HMFhNSnQP9wGr zBGC071?_u&*10>$ct>6#n&8{Ynz44XNYaCVoQzWFd+vW|nxvhM1l?;G1_bKRq0uK) zK_c)>h#hAEN%h#42&1z;Sp@Pqn?>ro5}rcYCqz}OCIuQ>{Wc;~-{tr0>?O~_4{zNj zlEW8DrH7GPCAMqn(IBhxe5*H(B`l!w*rm@&sTAKEKV>=0BWq#m^GPC<8&oJwhQeEj zenz`Sq-nwpKbZOB`kNc1nulBMTfE*Bd==QNxo>yZw5fV^Z8-kyWedP0<4oLtoWGRr zMf~PPqWof$9sI1O7jYoQxtBgNEb!C~JVpQ?_yngg=Iz8%HDkMKSso@`9vxhD%B+)R zws`b?7jXb3>ym_8>74iPo|-1NsIM!Ek5mNFhrH68RJglUrc}* z*4sU0@s!@xSfdtSfYSW2fk>b}{)0Qo(!RHAmaqF6;ox=^{o5n1i@U}r1|*IX*z?B% zzYtLuptRr_V}3g)an7Vx(fIW0El7BS5c@Sw}n({G|l0XCcEY}?qH~+UvM?O-^ z*f4DE$`&K(R;MQsPcTbdY9zLn7O&r*+o`Pe2h#Rlw?2(!)xT66%(j+}93cl1u3i#&|MG!XxG#Qt;S5IDz8*2R zW;2HWOz9?=f)`Y1g2e5#V#*i+4mXA0bJYHJr!qZI^7R&rj7KP!55CA7&;NnQIhQjp zuS(*eUc+*Ap6Vr^?)ZS8Gi_j$kUDXm5nsH)3B)x3L)%UfP>NKb$xY3*kWJu?o&v9l zo>z@yU-SQQ6w*zUk0*K3_(~VXqdhL4&Rev9)HQ5co%l;=PUu3&r(Z`d-=C$dd?i$U zP9a?05~$h${@zYICFAWygRJKLx4D(}o|A!j@4hjW;L73Tm|<;Q=2)jh!j64?rhE3? z49P4DN%kmt%07mxVGzmhSO`}aIU56e*t84cIVUW^bZ79-j*{A%C+ky2n+-Pm+5(%NCp zkG;i?mTUej=Yxx?N)L$>DUW$@M?F^f_9Z3!-LY7Tr9KVg*+CN{?q`Xn2Sw(T&-Fg8 zKXt)~j}damJ%xp%Vg8^3-$_4DbSRzQVWt>=CvRI4N7Q?zmq#t6)dNg5w@%y9K7<0} zjsi@oaQV&be?fZ8fReP$3fh%K@wI$~Uj49+rm!~=Wx-2eED0-K%jmm~=nmtKo`M}PT|((RXnUL`_(zR9%u9Lo|*VViPUM57T#e>N;u zt~}|=f3n5?uRr@c_McmT{8$3uvn;=Gh->ouM#9HjJ2@pQ{2D;YH+Q7G$Y&TP?pw8rlf=Ubvm)ai4niXmqrOc`c&7fExXV`+ zz0&-o#r5T%|KIh$0@jIACwjhm0jo}xD=JZg(amL>l`pQ8WOx5#DvTWi`!x4q|6vzsD*2@R)> zwxz9}>KNW{oQV*YW=I_&WEa9Uq0!eUweF{nIgPts9QjNeSKp=@J*fSi24sSTkA+_Q zlphQFf5*Ix6Wy6BNGyBVPe^d?Ge}&Dy*-}6I~^nBeKS+o+u%caPf?=lkgJ`MXedBf zj|8Z|V!9ug2kjD@fXc-{aUl8b@FP-9hZJ-E2|L%<3uaYo6vhA$B4;q+(LKGogwDy%EtPYDu{U)StTs}d_=g} z&~r;lk#JQdc2yu8^uwE5B5@y7^S`%`k4|F=SayoJPzCZqne{61*R)h{U-Z@}UU;fl z|K8o}ZJ!PRXu|;a($*dkXIDA20Pq*?9a<{uR1vv*o;^Mn#x1;|BPMpH=a12SrWYR68jQ~BgY`Q3V>DJCC)cg8|TUWJZLr{w=}T6rzUGzZd+!~xS}UBypF&k#4H=T z`sqK+ssCl3G5$9(i;T+yQ!9(#>HT<+_5+d)i}{NA*NDf%3#r>i!QS8H0)oEqw?qkB zNJ*8Wp!6=-*Q1lF8k4-KaB>xpX3dL<_HeO>^w>2+yl*c_XBnO4NZxs5xES}C!D#Ij zEdO6ihKOT82?L$v?tBChezl{@dD=GQi93NQ*Q#vds9onzTFfj7@4>8rkq26ud&m%? zdLr8SgnvOa=(QkSo_N2pKC5%)^02Lmc51hmh`7D-g)#Y@qQEYF9W^|i)t#vR-QgN> zrNb;tGx76$2pl(+P`=#Rl)gQ+F6U&KBK%L;vr~FZ-c^rX(w=mNNKs8t<5*U#Qy=Gb zmNz#$Spz6_=l+evj0)Y((G)vagy!=}LGiH(@tRykBV^c+fleNK=B?!qMTJ|COo#Kw|G^Nb>g`#hD|8P01i zqDC2Ig}ViIFuBX4rzwAY?p@}8+)ulI`H5Iv+0b{PD!rMi3<+-oyC4`4-%HL1 zbOZd)=h%Ml<9Wn_J8kX^tsRdg^JSZyPP`37iu1|qy+K?mPljou+u^}h#m{7` z&e@7OqMqD6;qxi8aS}J)zID*_f|=3>efoWX3Og|7;o>7*MLBmj;Ax<3!0d8Fq?MWWRiKYw7A)i&v7MWZB#BQ~$-?C5~M=KTYO<6pzTbRl>ie>1DX*Z_y#<*bTKACb;a*3I{8 zio|a$am|-FGGFT8eWT0xXANM9VPMrzn*0F8^z#0!x4Suf@%`YUOkq=D$>5OAC-R&3QKVRVM|dHOQ|%C>CSP3ysF zT@eCtypdnwrkwA_4g9n2zyRnc>MdxkhzmU4HR{rRQ zY2lt-^Zlu^_^Y;xMlWN91fKx@^s9KHB|4(Oyegv`&xL%j0Oow|AvkT-SZDp@Qjpep z9{eVE#YREPMJDwv@DHLQvT6+HE1 zwi=2fCux-k_oKd}+qGK75a$5;WEBioZh31^LBaDNK~sy`mllIO+ID){Id@Z?Pg4mh z2jaNLJd78CTv*FHjXHzC5OOi#JokkhNbrEPSh1TFY_{XvdtuQpE_HCC8Trmhb~DiI zF^Q8508rf(I~Cqxd6+9&+Zt?g12Xo2!~4z|ty^bd7v|3R3EffQ?|Y^Ut;wrX$GO7^ zJDWJt*@eT%MYjWcGn0Ut7}Wmo)VG;#K5Eb4Yw=~QI(j}4ErvV`#@%osBgZ|T4$-WO zpI6UK+ss^vHEB>35ExT^A($e2tHq!{jOz*)=mtR2AR56Ye*`1^lwJpDVnVeIp7tt~ zZvo(}q4Z6bJ*k|@*!Mn_zp3LBd0-&>At?e{HSlVy`2or0A!2HgXUfJ_HYYITs@#lZ zeA+L~YL!bB`c`}@8xHEgk_c-(=lTGV*Zpm_IJ~GGXBKd&qbNzwZue*KrLc~D0g5}- z&8QHMZokvYkrdZ?Zh`i7tshSQrNHVY4Tby#WjtxzrS;D^?s(~d4P2o_OQnb?%dfC# zEEmme9>cv}AvkW?RB!G6qu#*JPHn~;iM>J6>?M}zZZL-$v)h&6xIyK!pK&!!j z-&zsg%A&m(1$Qa|`>1O*AahIod4-!_|JvdOAF`zXv{$bGQLgjcM3+V<2o+A#2j01k z#z9O=&kF!HRn85R@?QCeZ>{j#u^QKQZroyVc~|{8NB1br`vVE9I=%rTKEzJ`8ee!6 zMdHT2e4cxLO!Y75x+HO{@Y2+KK3g6c>OBr~Y5TS)j_GzL-S)2T&B^&Z*0nt1T@y~l zKI8h>aZ6&zLFYQl_{jw~V6WD0M*f^D$pvl7|M8@pH;CMqp@fjQJF!oqH@H9BTAzIS za5c^^8k=4g$Y%BDRlCN3t`0aD)q|~<#c@xO6`tb0lht#;w|e+McEW%%<5O5?WypE9BP z(Y!+fAA9n32Y$%`_qV@MBFAIlS5_!to4>A4=T3g$u0wGEo6J2rFU^RO=1DB*b`Tze zoP$rr$8lzmzki6d25;+cs@2?GiI~YYTJ-mF*rDF0Pk}kn&KNO@I-qAr@XfKkhb2{Z zzRyD5(^jS*NS>^vyS_vI;{H+I1#r9vfaffU`|eguQ*0ROFd>1~uf{oYK_*_@q#~zp zQ`KA&CcA_7fCQI|n}_MpvruRI2OVV+h)n|gGc3Yds-f1+NS4~z`+<8Jodg!9sZt@x zOJj6JO6$-z)#&%_3$8C7sW*5+p1tzFj1du43G>FqpU5u`#DVMc!eL~~VTdxq5+^Q~ zJ55kX$H0qK)5?^5O8VVbY>y80Waar-r7jB#|GM6VD9r9G$ighGd0wIyL`eqpG}v$EUofHxc=~LI z$W}HIOi4W(t8I^=MA@5tHy=r70< zqw#^e*d3_jUtrZm%SH2Br-n_(3&)zRk-lrbWWbm#=hDW{*ems)FRxly^}ThawcV`0 z$%0A6&VR<%DMi=VB2c`vYe}8rJmW~M@{U8B5fid0WcPHm;|Cmnyo7=4oc})RrM)?s- zqwXz4ZCxZ5EXsa|55(tj`CS(iM~8bVV|-3Ym{8(#?B<#~ zu^%8oFx{GeTMgk3VZ*!XM9${BqS4D_>AJrl^#HAUj}=u)3zgx>h{R5=h+3GGU6>g5 zgH{~{Qq{S=6`C=?T(W2qT5;BT8VZ?+qO;9XuRn)?JOx+3L9oGPeStDL6`^&%>k8E0 zMsHo`HI;T_{&m(zjiAfG#Tm4+nE|{Q17J631rN#yzZRlK{Z79;ED28^_Tx-g(TNwB z6Xscn6=WDqvvQH(P5D=F8YY7>U;d>C|oELZ8=4v`dW28#P?;o(FWcZlaPg5%sa0a`=5tnphw0 zyU3u)Ia&q}^f|Lr-CyhWEcQAtw^Bm^LytLw>Tba1RF*(DO(2`_it^>WCr<$^hsSh8{wtAh6f(XAQ1;3S%05SRfxNyY7LoM#{;m+61IuZ8;kbOP_jNNH|qn5Aje z9d?2c^)!L%ZV@v8j10vy(9gTRG4(8=q-@(#5!)7zp2aRt$y&UT(u@S{GVn@nWqti6!PlTNYVI2!Z#qBm3z~gs*U=IXPT!B5Q6|?EX07`=w)oR zzeAJiNP^sq6qgC>dAlFNW(-_GznOpM)sMAMA(O~Ulp6t5CyFO*Ox2{puF!@f`Sx#E zHrxks0e?;ETr}HhukazPE=n00M6sr7A=P2gFunt`&3;Kijq*kI<(ZkDzPFJ=?R|}m zBcQ8Rp{$s8C5jxZ12D9Ecmh;HXQgO}iXA-YXTa{PvVF0UfP~qV#JR`!=u_O`6ob9?3X4ZEP!_Nqy!>_U?-!$ofIil$dJ^--M6;3%}wQ` zQwGF-_MY0bc93DR?^<#;Z;*(Oe}HwRdPbU2RS|RGb4BlnzCoPr?(NzUF;5`6abF)# zYv(j?PuIVHFy0SiEK*auE(1azf-aqRZ zt9p#%sMAQ^V+DW0I)Pz1k8n!*A&AI4G7cQs^}(zN56^S7=cL=;kV;?N_}9q(K50x_ z^9?D)AIW(O+)EcW{n(g5s4!G75Tl_Kfv*h!QhEPz3o-?YU9&||$2hYYR*vq=(r%xh z%q_zT>?wvlU4dQrEmo>8Qia05C~p>FUylU` z1t-9!3(bX`uB#scsz`O~`MA3Y?kVe2OG{Q=zckNU(9P-NNbiy4!J0L3)-!f1cfVn<{=YnWc#;6PW z1^LhCK$P&3uA$ZljE+R^L*2%o3VH9o&^oQ;-TRm>v5F>bsQ|O>EJcs1Nf}a#Ab)%G zaWPN0Zb1F+&`n;?WsWZQky5)5kG;}_|2f&ABG&O$^i*Q%{3O7l;6&TkWZn4obk)3@Ur&AV@gB$Av)@aG z)A7%UaNrC#U(Z7k0HPqKoNc`pFSInW=tF+{4uS`oMBjBxtF6SZfIF?xL`$=y^T692 zp7^Hz1sN%PVm!zr3;<3t!GMQ}Q4!^k!1YlkGWjx(UddGc(fIfsN2c?!S5PkOhcS;! zN8fqD_z)&^wn-x1e&q7XI0PuNw~2EFnzrp_sx>~-%ld4^I%ObX?M)X*=y)%{8{7Gc z?arIbhsNAaigF${rF!h%7Qg>3ZY!IL{@k@qg&*#1&Iv+9Ogu03(RoQ8+59iij?^pn z9x$H3GU|osqK;=6j$)|#8`!EK`Yf1L0d<~&?$C_tnC=+NzSUZper;ItQET=l)>S`8 zbSuLLBK6`kV{Y0G>F$`~^$db-H7qM8*D*?`;{hw>@tK~{53qQI7U}VOW zsV|jcKXn9-R;Vt4XySp=B(=9YqZ-%bzu!$R>QYV|%^>A{&lMwKXoAr1;$V`+Yn$9` zX+fVG&FkW_!En~%q@Y|OR$g1?@I(L(EAoZ@>!IET6bSGH#kK>&Lt>8HzhQ2XIBwY* zD|54A`jNGy<>gBL(X{ukdd0ksjYwJqJZ>i(%e_OFrRk0mhPAayx~gup_Z{WB9M{BH zDoFJhvV38c9$zw`31WxQ zOjELg1=JE!LzgTn>#;rCbFpRu$9@YKK34~2B0UEHw()wO*F;*H10H4EQ(IjQbK*!W zX^>dPUW(dGrQSq(QqDe^KRo$Zeqa_wKR;BKBJb)e?-Wxu=!m#)EyP>>bYCLuN??D; z8dZsQv7Q1Z%6`YPx%-WhE~CT}2@I%Ernj{}t9U@pjC~4Ai?Zjuwsgdg{Wv+B0$o9&DK7B0zkU;k0c zt{>s;43Vckq<2AC{(|5)E_5l0mK{4c6`mFpX^FA;7Gd?LtNoR=lx$APn?Uoz_a|O@ zAe0?Kmd={yRsy$C-RV?{r(Yq_(-x@jJT~`BOB>HuTPwWNwoP8jTQV zGHgI=-7~h_A@!e1qRSr1m^2Qd-36MJ}phmG(8R1yC=jk;E zc2aS9NTJP|CDuLB1gsPc=}O{UmgjiV#I}WWC(7dHG$;&S)Em!5B1S}1yOKN$RA+)A zLz4Bk^}Nz9Hoq1=^6DkDe2)bY(UaW()`WE)cnw5Zb92CE)~B3v6h=x_S8vIOe0h|X z)?91Rc3Ee!#D;?D#uH7aKza*avbSxZ~-k>7A`y6#0*oE@sl*MgpD(vE5(hXX-n+&D zq?bMOW)4+`qE6N)7S}I;nbr@FJL?*eN`Ha@i?|?-n5R>%<`P|JwfC})t71X&i{+kV zWujUW^`RTE;YM`Tf9fWPX=>#EU&D9*n#B8(%$^r{+2tW|dLZNxRiFMdj}84C%A^!V zm$TB&AN@I|KKAo`b5&*}J5G}0N*k!Bt@QD;XQv>n!`S&0aei#`*I~L0&5mRiuPs~D z0CyL3i@%qTVPF)NVN-739puvFg<-DUWB!R3fjW6cfrU_v-BhiT%moPl~US(!$uh8&VWs|KD z(q>tFUxx|hO&`o7-bo6Df{|uki&R!6ZEiwS_nwE+wft7opCzf6>$cU7Uhg`n-Rw^9 zUB>DI)4L}Rz<}B%=mJ(7WSgB%nD_=AA`)EM>8^5;E;yyj_D8#FYw=BD)LiSI6QD{t zL0`ja&xX6xq(~RO>l}M2l;EeX#P0U?wC~PDGFdS+3JGY40^^aA9}1PamPe5;d5!TC zgK)>@2_o+=CULmk6&crW%*=l!fM9LtRUGYR?t7@QsuxmAnkHLOf=S);NHEAb1;$Qs z9^9Ooy;hsR!TJmxAR=EXXZI4(AEKfM3SSiqsdB&AP&xQn-?ekof z4HJZYBG?;FFD`Z021WsMLmHt3%#E(v1y82uhsQ0g_5K;&z|~q>ix~f7_N6Zzj$^GA z`JoUsiX&AJ!MzXw*fi}-xfqTeoNEc_I<;?SW408M4y`v8-C5ew`3t%j&(tTb@_u}g zc{78mhL9zvo)kk}71%q^W*jRPtm$$ftc=sjtV8eQd zLmY7O05i7=Q1a|ZdB7JAo7HdQG1hta8$;Eok%1zQ6jBTnJ4hQkU~VMP;)mJOG>Ph+ z?6A1JGtF*gNb_;uNj^Aq3w_`wZP&HC9q8aFyu|SG!|uazpr5Ku+KeMElD`q1k|MP| zRlBl3xejX|dlb3V47}J4t-5H*${@cGfs9s1a@``s2$V#h=wYXL7!<_J`CGab$0RK|3%xr8J2(7}-(W zH!SmGX9$31kTFX4Q{2Q_Em+f3c(6)3D!K60AUVb*{NydN1rfcOJb%Lyu%jBq)MnqX zH4nLKq(s9gKSMkX{c>a&?s1+u`F-kCt0m<#(9_mX0twg{AYHJPpR#C8vHr52qFoz4f8 zUVf$afysu&Yk%~$?sdL?*u|)3qC5+`LmvbG&wVlJpEbzofA__LDL0YW_4~a~mW~q6 zWDZtsJcX-`QG;q%!m&e^w&0y@@Lmd*R5z3Ph{1R;%> zR>&FDPbeFTm3AIWQ6~~Q_qUWD*5{sUDt;of^fQC^U2fnrDUJoRA6EptLO*!HZVl>y zBM z3Hp|>uX*yDR>AyF(cwR-Q2}$*xf#nzzy+ZVUu#C=Us-V3x=t{GM&EkaLWCuhrSUxf z=4$R_g&oawlri$9Gh04l3^QaD*M-vf0v61oX;LA@G-H4fq5XuAjhEziz&7yB*x=Zs zlx+>vvp!Y%djISez3AX`TnX&HGNe>8^S`q41$A8BYt6%GRTBmEg&HG)_hhlOc2jq6NSd_n+Cz6j`V8Q{Qzh$r?BPq{*rDV>Ct;C8jE^0<%9P3QH@zJ1L6lMr(C z&FB5jkZBSl*_Pt62%7}Fc%6oxpxG%m3GBg$aWnSio9TN+Z4x~~Dk>$v`co#Q-|xYU z?(2xl6T$P5uZYa^+X+NRT*jehWM+QR4?Qox>VYRd*HY`YVoTfKyn6MUV>}J@17C@K z1x%j}fL=vWw;56OCl!tj&&2J>^QwM+|0$_YZ~kcO{mO?!3}*EMcjEreL|@yK=axkP z)mpE7U4d|}A@@Qh#B!nyaa$NHn|Y{@I01v80#1jPgrjPNhD>;d!Zt9HjzE%ib?Yqraf90E9Rg zV}-AVc48xf%r;5H={qAd$AT&QDGJ&&(aH&1ZzSbcuk&O57u#2Do5_Je)3z zHIsh33k1qNXhPEJjE}ubeAaS;yHLEu+PTap1o!6y#*Jgef@A-qx$_QZvv2=DMb#); zEh<)xqE@L@qq#fmk{cyTHLY1v)DB|RR->XurIOo-J!-@rwbH7hW{53ChmlYOp}*_- z@Ao{%?|y#A_mA&!+<#t4{`lm&KKbN4-{*P0-pck6AtnDC@3)trVy)XZt@^)0O<-BD z1|?v65BQ-aF4R>#;w7%Dy-{d;!fV_RX#P;3$HSU^Qx*ys1rBm}ujE`UGWSSFI&CVB~UFK6BgTlE^)jBS)LZ z@TID)zD_(WhAv9s?*`Ihyn^_*?>D7AbQGEpy$wH0IAhZiW}-zySij$|sn6Y=vBV&i zQ_R>bJFl;ps*Z)Gn-=e5@0-J}AT;(c;{dxq6&m$^fm^K{@cjOM7`if7)TqJ%4>b#U z_AGY7sZ0FXtCX2rgkT|J3H=Oh1em+-Ji8tyfpEX{Y^fRbR2q*J39YBxfF_!@c@jV= zpXH1ZC8i|#Re}?ujK{Z}L&|B_RwG2(wP0jxYVWFCmyQxprnJCpR_#XdI7RQs`okpJqpTxG#D`^;m3bTmvLsSN2D90GrsO_^oXvTd zJtROXZ2PFIqIa+6H4*k5WWLnqG*hqOg!U-RQfc#?G2U>`JTaL21qB)!IDn)wl?4f< z}cmNd1vIPW0750j}Z?{fW|UAKd|93RO|iG*{Brr7g5J@5zB?R zV?*VCN!Kl(bu)KUKT~JwqY$y+_rRy-4}q9jdis0j@#El(=H`0UR1O!e0^B-Q#i+d= za(BKp2KlNqK);PUOhPhN${Cou?^Yh>dB_0H^>x8#u zOde@o^}vl^aJ-vD&~A-i3@PnrFgWAG(P4A8JpE&J_HTyAUI(hxfUdv0;SJ1$s}P4o zDVCz_@UR;l0BK$Htm#+x!=jqdv*@p-{37@D<*$7+iC|n0s`x>c64I*3n9Ia8XkXL0 zVO-@c-{6140jD;&a#1?G$8y)W2C26U;YJu3G*kH#@!B4uCUp#h{y{3Yn4iS|`N-h1 zz7d1dwwYWnV1k$AN{;JF=Iz@j4-ge5D%22$q!D-ZY@;Y8P~cll;V4Gf8Gs?81Fxiw zL0V|aOP2zsXeNM88;$L)P1?A>!v_Mne4y%}g3u6;Q?INu}-<@gZ0*i)X4$RPX)723l?LpCgmngqc$!HQ6D5dHRQ@UUsRkCOF2_+wR1Cz z3e0!1bhRyxs$0!U`jg*F}!Th z6&l!sVeyGIHO0jSZ9I7uA=Qk()6V*$(-qF|r(>-K2R*Ke&zPS=+|MV=xXoP z(vlrrIurnomYqFiAxel%)Z)CJuUchm_&^Byes4*HPv4!3LTOf({AP-S*-M%s{Ex8p z#b=c~Y@wkj4|RIRa5)en#sg-=yB@56QmwO=O|`c4N1ae|15;J`At7#;89vs2s>HLO z;y5jqv}4beT=88*e6&(+?$n;JiD_rDOJj3mnse;aS!a7;BWsHr%tC_n^5a+j!)I*6 zK6LW8=`APg{5%1{c;H$5>NPj+_&I5a32ekZs@2_wL6E)d`-DHzSfc@W_O1B=3}wgb>+8{HH0y@)^F|RYm-YA z&!h#BS+8?ZW-5W}qn^W)A4W1Vs^Bfsv3}^>klsLwTd)Ue3$wba7b}piIlKAwY7(Z9 zNSFlUvr_9#oB(iJbyUkjG*U{I1%FE(8t#w$jUP`8~ zUrq9DZlxcoGiyHdpvXNq0AEDPhBZ-tbVJ{{x$TWu%=IzFubh|&e`#D$);jSw!<8*c&q-f0keM4USHA>% zUN89mn*r2u77;~OC~RC{eYVmnn8@O$z-#U=&f22)(P`-ODX#m5tQH*AiO=)P>>7(Y z^DaHo6Vi8-2c_4p&R8E<2ACY=RgTWi z+ar5*fN*@P+PykU)hi~v7TQM`X@f3qaQEcACS4qC-_4zT^{Pi+Wu~QHgzQn%uxY++ z$c{B8(!L)V0zndPK6NQ$ObKZT!~y3}EuINQVj-mwnkwpsYy#lkP%bUO5~z1nhIQrK zB?pxz>uNy(0SMZTVBOM)$*~K;Iez=Jra*tPq}_RbmZm~{P=|)q#&}APjrze$^1@ps zy18~Or_!QeDl~(=cv`haf?qE0GJx!H?B27T`)vL02hZ?XI**Yu3F~%lslWoE@Pai= zZdKTKxhjEz-rJ|1RF^)Oci$(T2#sy{9cw_Mhl4_V`fT*hCox8_xj0<>qhBreA<`j# zlGxB_;K>yIb0XK+hxS@1OD-wB&X{Og)$%j%uu%`GyR97|{H;TTCOXRNTC-dMqomzR*E z6g2;@VfooGe7*PU*@xBnvpepTRh5f*cTn4?mO4|li6XS^g9Upn%z++KI`HT*ZQ=dx zv#w-px3FUuhQBFILaV}8FZ^P4_Vc3e;s*tn7Sk~BA7CNmLs~dxe`y@ZB#u&m&{$U&5N7saGFY*(fR=8ICn9x2 zPveRf#4b9fyCvl-tCZa$R_G)Q~%#ER3@ zm~BqqG<2v704&Dw{9k;H|8#2kpIrZ)s|(Xg^^EI@spT@{_{H!^lJR!9;6Hh8^R(3LKE8=BCV9Rn(w7`_OtwCO{Q)K$_4(^N?1QD|M8e-odNigTIRC zbwxH9)jjvhc=z1r>V70rxR9<(afw^BKl9OV7{)j*Qvj$;WkN^W8kSO2g2X(45h%x6O$r(a%qdiKTv=TGrBnex=Oh~pcCQ20-9 zBAAITO!cy-c*P_C+?VrT^tkIDAaWOdVTk?O&S|cdPh6T?R$*#%20(QM{5RiG-}2rG z!I`X0xvTqRD?M8|`DIZp@k&L@`{%>$_x;8eFYJo4@^#CEtc?9iMteV*Zr_i_wLTne zao@6Y^Y_P%^q))-PXxw*3V*6-=Sy24ioRA@4U)G$)s87Ot7w` zMM~gwvqM-%voz{?OaHta@eb!<5_$JCbEu=>W# zKk|Lq1+w`tstg@ai?7W0+}~|n3|Y}o_A!#4(jQ=Rf^ z!||E08dkzWqOAvQ8!1bkQ}^P1s_eM$XahmP?~BwL&b(v2U4Rd()PJElsrKzJMjA8; z38S%4*2vNGAnb<*FP{^>PLrcMp!I>O>mcfm{uC&ygTl zr9~uyYpe2>LQVDpD9rQR2(($MVMFRUM>Z8PURkDsAapJFjcO#~35u^V?P9*O%%2hW zO9jMIvOl=fiXcW&*@J^3l9Q{IFFPM^| zV`1vCq`5S1D}QdU@42A^IR369D@C|$ZQah*4ai;w_Aj--oU5b5WA0`i8bThlF$PM5wSE{BD!4xDnH+BpM)8B8{hxfo< zbnws~EJFkl1wCFqM2z-0xE{}_gZ=`^0=Ei)pldq5q`HFWlHyCoGmZNN)ym64@+DRU zdj0dcnsSx1lIV^ql7hxMGL3rPL{|wQ-%)nVspzY2M2t{ttNkD;$io zUxO15TlW&jb(D*c9Mp;7BoWEcp)^G5nj>7hcR+A)SL)MZFmSA2Iyp_hN!wa#YP&%m z&`4kRdHGViJn-7wgCQ*=&hk%~(;-5fzXYhdpgnm?h^iYE)He?dSGl=YH8kW*Y@y*y zDi^p{#H`Z32$dVPvSRQBIBP}kL{k$$7JD5ebc{Dd;;V`^LfU1l~Z_Nn@h7SQxL)}Xm8cY zAuuxZ%|N3(Iq++G2{Am#NNG@%*Bv7_8t#}%it2+N((^wot@W9(?E*jshbY@xi!nlb z6l9!hom@-I2J$pPBK_KM-&XR8dVF!>iQM|6CljYCANXnVGmqFH_B7$Cb(nDZFIRH- zdvF}Ko-4AL=DRFak;T{I{59}hHg@3o_b&S>uw3cYo~Ej}Wk~lhC(@keRf?1+_P%?X z^Y+|~p#plqKSka8&p1{AlD1!kbOd_MUVh`p{%?lJAHhvw@;!Y|eyAQ7tLheP33~X< z43_%_^@+ae#GlU9UFcqzb|G5;X0)Pb`}_-~_HbVHAXz7T)cOu;>D7hHALY*oT;Zjel6In)+M70!SE%e<8%~(xA)X z(~#SpZSaM-lZz92(Tz}N+v_Tl9B`>FhRYxI#mEe&H((`N_|0_3!bk3cvo(4}w>VJ` zjg&=ydB&QFRVqa*dtRS$(sJe6vi9{`MI@}q$>6qIdf0m`Xmy0&CHG2c z6~2}%5HT~8P0&U*;b=slHSikudy}7dSIS1lTAcHwRoK?FPS(TDBhaVRQrqJTl!f(S z3b`A5773#i>)U1fi3-}!&)fg?>U8o}hP7UFWb_|AECS1|9->REk-s{l_rh7K);sY? zCAZ?D?8j}#OPTX(>`WMySyu^W5&g41Jqp;mui*n;xtk$)+8AdIWYmClcSl!frNK7nHXDfIYrJn$Y%ETNa(bMgGs_I$;}}nXk~QOfFcbO zFsf9IWIqr;K#knB+mVW&@_BLpk0R%X&uh=ijjAq8r!Ng5*wwHSS?M?yI-cyW0ajGV+GZfojtOnq$US*AMVeZo5lb87^eL8B;lY3D-S|qv`r(>1E}(_ z2u^s?y#?^?lSon5;}=&>c(>8((CyZ8d?0Cby9c%bUl9Xo{Wz$YI@r}>W{w~bfV;FEOW zBT2U$?u2^aL}>hQB10|Ax$xcJ(W4BuN|MdLkxY`wV2*)Egaxg(dl}Q@mV_n9d^OaL ziA_knTB|_)!9?NbQv!x^c;%kwD!%sqmo19ryz#mFyEjAF61pExAS}z)hD{0dO1Xwy zeBiV@5(nHvb}JY+U8dPQ;R&@Pqvkh5f{%@m$aKZbcMm_H!`&Y@oMI!Xg^l_w0t!mq zJozFCyCDwz#owiKg?V{n-`!L~@=;O=$zLK> zLw}eZ)uh0;eec&0f2B_SX4rEbzbP(>r6k)RP;9%(GTM#+#lSS(6no&pQ+1s%ir;rG zUI#B88~j<9wd|FwV?t%RH&*z-a9Huu467x4m%g*MyQ~-iy^y6T{7oLj49Ou9U)OGWxfey^{ieRe$JY!vlr07oE>? zeY?fNyDuS#-Tej2Y>KGqlP^^A3l4vY#KYHN-5sD8Q?x(`EtSjxK9g2=#HV}j_^sY= z2A${_zuW!DRc^jGap}j-q{M*SFvbqowT(x)fivubo4q<>x_l%u{56G zf3DMUgFM)x>o>=QKRge?D@w)nfPW|`hcPjAuszV;{Xoo`p!~g6a+@WkT zcJy`5FOPQsG1PMS{|O}dJJ8O5c^&-UEHVBa1@LcU+yB3@?*Hxgnf@CQ^&h?NzkxXY sgV+48XUX*U-^94F#VqXKN6zz=>Px# literal 0 HcmV?d00001 diff --git a/docs/conf.py b/docs/source/conf.py similarity index 95% rename from docs/conf.py rename to docs/source/conf.py index ded1330..d745823 100644 --- a/docs/conf.py +++ b/docs/source/conf.py @@ -23,5 +23,6 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'alabaster' +html_theme = 'pyramid' html_static_path = ['_static'] +html_logo = "ccb_logo.jpg" diff --git a/docs/index.rst b/docs/source/index.rst similarity index 61% rename from docs/index.rst rename to docs/source/index.rst index 6456e30..46ed444 100644 --- a/docs/index.rst +++ b/docs/source/index.rst @@ -3,18 +3,26 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to text2term's documentation! +.. .. image:: ccb_logo.jpg +.. :alt: CCB's Logo +.. :scale: 50 % +.. :align: left + +Text2term ===================================== .. toctree:: :maxdepth: 2 - :caption: Contents: - +.. include:: ../../README.md + :parser: myst_parser.sphinx_ +.. include:: ../../README-UI.md + :parser: myst_parser.sphinx_ Indices and tables ================== * :ref:`genindex` -* :ref:`modindex` * :ref:`search` + + From 58ea820a036552f8a8bca96cfa8d8dcf5a61b541 Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Wed, 11 Oct 2023 11:13:33 -0400 Subject: [PATCH 31/39] Update .readthedocs.yaml Updates the readthedocs file to account for changes in document structure --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index c409646..8d044c5 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,7 +17,7 @@ build: # Build documentation in the "docs/" directory with Sphinx sphinx: - configuration: docs/conf.py + configuration: docs/source/conf.py # Optionally build your docs in additional formats such as PDF and ePub # formats: From 1aecce4f062c814cf29a2aaba1097c5cd4026dcd Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Wed, 11 Oct 2023 14:24:43 -0400 Subject: [PATCH 32/39] Set up term collector instance before tests are run --- test/simple_tests.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/simple_tests.py b/test/simple_tests.py index 90a798a..642194d 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -8,6 +8,7 @@ pd.set_option('display.max_columns', None) EFO_URL = "https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl" +EFO_TERM_COLLECTOR = OntologyTermCollector(ontology_iri=EFO_URL) MAPPED_TERM_CURIE_COLUMN = "Mapped Term CURIE" TAGS_COLUMN = "Tags" @@ -117,7 +118,7 @@ def test_mapping_zooma_ontologies(): mapper=Mapper.ZOOMA, term_type=OntologyTermType.ANY) print(f"{df_zooma}\n") assert df_zooma.size > 0 - assert df_zooma[MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() # returns true if any of the values contains EFO + assert df_zooma[MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() assert df_zooma[MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() @@ -132,10 +133,6 @@ def test_mapping_bioportal_ontologies(): assert df_bioportal[MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() -# TEST ONTOLOGY TERM COLLECTOR -EFO_TERM_COLLECTOR = OntologyTermCollector(ontology_iri=EFO_URL) - - def test_term_collector(): expected_nr_efo_terms = 50867 terms = EFO_TERM_COLLECTOR.get_ontology_terms() From 83ea587de1cefaded18c410b1e28f4e5c6260351 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 12 Oct 2023 10:15:35 -0400 Subject: [PATCH 33/39] Turn simple_tests into a more modular test suite Each test should have a descriptive name and message, be executable on its own, and have indicators of success or failure via 'assert' statements --- test/simple_tests.py | 346 ++++++++++++++++++++++--------------------- 1 file changed, 181 insertions(+), 165 deletions(-) diff --git a/test/simple_tests.py b/test/simple_tests.py index 642194d..b9c7db4 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -1,4 +1,5 @@ import os +import unittest import pandas as pd import text2term from term import OntologyTermType @@ -7,172 +8,187 @@ pd.set_option('display.max_columns', None) -EFO_URL = "https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl" -EFO_TERM_COLLECTOR = OntologyTermCollector(ontology_iri=EFO_URL) - -MAPPED_TERM_CURIE_COLUMN = "Mapped Term CURIE" -TAGS_COLUMN = "Tags" - - -def run_tests(): - pizza = "https://protege.stanford.edu/ontologies/pizza/pizza.owl" - ncit = "http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl" - hpo = "http://purl.obolibrary.org/obo/hp/releases/2022-06-11/hp.owl" - ecto = "http://purl.obolibrary.org/obo/ecto/releases/2022-12-12/ecto.owl" - - # ONTOLOGY CACHING - # Test caching an ontology loaded from a URL - print("Test caching an ontology loaded from a URL...") - efo_cache = text2term.cache_ontology(ontology_url=EFO_URL, ontology_acronym="EFO") - print(f"Cache exists: {efo_cache.cache_exists()}\n") - - # Test caching an ontology by resolving its acronym using bioregistry - print("Test caching an ontology by resolving its acronym using bioregistry...") - clo_cache = text2term.cache_ontology(ontology_url="CLO", ontology_acronym="CLO") - print(f"Cache exists: {clo_cache.cache_exists()}\n") - - # Test caching the set of ontologies specified in resources/ontologies.csv - caches = text2term.cache_ontology_set(os.path.join("..", "text2term", "resources", "ontologies.csv")) - - # MAPPING TO A (CACHED) ONTOLOGY - # Test mapping a list of terms to cached EFO ontology - print("Test mapping a list of terms to cached EFO ontology...") - mappings_efo_cache = efo_cache.map_terms(["asthma", "disease location", "food allergy"], - term_type=OntologyTermType.ANY) - print(f"{mappings_efo_cache}\n") - - # Test mapping a list of terms to EFO loaded from a URL - print("Test mapping a list of terms to EFO loaded from a URL...") - mappings_efo_url = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology=EFO_URL, - term_type=OntologyTermType.ANY) - print(f"{mappings_efo_url}\n") - - # Test that mapping to cached ontology is the same as to ontology loaded from its URL - print("Test that mapping to cached ontology is the same as to ontology loaded from its URL...") - mappings_match = check_df_equals(drop_source_term_ids(mappings_efo_cache), - drop_source_term_ids(mappings_efo_url)) - print(f"...{mappings_match}") - - # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric - print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...") - df1 = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", min_score=.8, - mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, - term_type=OntologyTermType.ANY) - print(f"{df1}\n") - - # Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry - print("Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry") - df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS) - print(f"{df2}\n") - - -def test_mapping_tagged_terms(): - # Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output - print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...") - df3 = text2term.map_terms( - {"asthma": "disease", "allergy": ["ignore", "response"], "protein level": ["measurement"], "isdjfnsdfwd": None}, - target_ontology="EFO", excl_deprecated=True, use_cache=True, incl_unmapped=True) - print(f"{df3}\n") - assert df3.size > 0 - assert df3[TAGS_COLUMN].str.contains("disease").any() - assert df3[TAGS_COLUMN].str.contains("measurement").any() - - -def test_preprocessing_from_file(): - # Test processing tagged terms where the tags are provided in a file - print("Test processing tagged terms where the tags are provided in a file...") - tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt") - df4 = text2term.map_terms(tagged_terms, target_ontology="EFO", use_cache=True, incl_unmapped=True) - print(f"{df4}\n") - assert df4.size > 0 - assert df4[TAGS_COLUMN].str.contains("disease").any() - assert df4[TAGS_COLUMN].str.contains("important").any() - - -def test_mapping_to_properties(): - # Test mapping a list of properties to EFO loaded from a URL and restrict search to properties - print("Test mapping a list of properties to EFO loaded from a URL and restrict search to properties...") - df5 = text2term.map_terms(source_terms=["contains", "location"], target_ontology=EFO_URL, - term_type=OntologyTermType.PROPERTY) - print(f"{df5}\n") - assert df5.size > 0 - - # Test mapping a list of properties to EFO loaded from cache and restrict search to properties - print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...") - if not text2term.cache_exists("EFO"): - text2term.cache_ontology(ontology_url=EFO_URL, ontology_acronym="EFO") - df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True, - term_type=OntologyTermType.PROPERTY) - print(f"{df6}\n") - assert df6.size > 0 - - # Test that mapping to properties in cached ontology is the same as to ontology loaded from its URL - properties_df_match = check_df_equals(drop_source_term_ids(df5), drop_source_term_ids(df6)) - print(f"...{properties_df_match}") - - -def test_mapping_zooma_ontologies(): - # Test mapping a list of terms to multiple ontologies using the Zooma mapper - print("Test mapping a list of terms to multiple ontologies using the Zooma mapper...") - df_zooma = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", - mapper=Mapper.ZOOMA, term_type=OntologyTermType.ANY) - print(f"{df_zooma}\n") - assert df_zooma.size > 0 - assert df_zooma[MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() - assert df_zooma[MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() - - -def test_mapping_bioportal_ontologies(): - # Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper - print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...") - df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", - mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY) - print(f"{df_bioportal}\n") - assert df_bioportal.size > 0 - assert df_bioportal[MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() - assert df_bioportal[MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() - - -def test_term_collector(): - expected_nr_efo_terms = 50867 - terms = EFO_TERM_COLLECTOR.get_ontology_terms() - assert len(terms) == expected_nr_efo_terms - - -def test_term_collector_classes_only(): - expected_nr_efo_classes = 50643 - terms = EFO_TERM_COLLECTOR.get_ontology_terms(term_type=OntologyTermType.CLASS) - assert len(terms) == expected_nr_efo_classes - - -def test_term_collector_properties_only(): - expected_nr_efo_properties = 224 - terms = EFO_TERM_COLLECTOR.get_ontology_terms(term_type=OntologyTermType.PROPERTY) - assert len(terms) == expected_nr_efo_properties - - -def test_term_collector_iri_limit(): - iri = "http://www.ebi.ac.uk/efo/" - expected_nr_terms_with_efo_iri = 17383 - terms = EFO_TERM_COLLECTOR.get_ontology_terms(base_iris=[iri], term_type=OntologyTermType.ANY) - assert len(terms) == expected_nr_terms_with_efo_iri - -def test_term_collector_iri_limit_properties_only(): - iri = "http://www.ebi.ac.uk/efo/" - expected_nr_properties_with_efo_iri = 29 - terms = EFO_TERM_COLLECTOR.get_ontology_terms(base_iris=[iri], term_type=OntologyTermType.PROPERTY) - assert len(terms) == expected_nr_properties_with_efo_iri - - -def drop_source_term_ids(df): - return df.drop('Source Term ID', axis=1) - - -def check_df_equals(df, expected_df): - pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True) - return True +class Text2TermTestSuite(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super(Text2TermTestSuite, cls).setUpClass() + print("Setting up test suite global variables...") + cls.EFO_URL = "https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl" + cls.SOURCE_TERM_ID_COLUMN = "Source Term ID" + cls.MAPPED_TERM_CURIE_COLUMN = "Mapped Term CURIE" + cls.TAGS_COLUMN = "Tags" + + def test_caching_ontology_from_url(self): + # Test caching an ontology loaded from a URL + print("Test caching an ontology loaded from a URL...") + efo_cache = text2term.cache_ontology(ontology_url=self.EFO_URL, ontology_acronym="EFO") + print(f"Cache exists: {efo_cache.cache_exists()}\n") + assert efo_cache.cache_exists() is True + + print("Test using the returned ontology cache object to map a list of terms...") + mappings_efo_cache = efo_cache.map_terms(["asthma", "disease location", "food allergy"], + term_type=OntologyTermType.ANY) + assert mappings_efo_cache.size > 0 + + def test_caching_ontology_from_acronym(self): + # Test caching an ontology by resolving its acronym using bioregistry + print("Test caching an ontology by resolving its acronym using bioregistry...") + clo_cache = text2term.cache_ontology(ontology_url="CLO", ontology_acronym="CLO") + print(f"Cache exists: {clo_cache.cache_exists()}\n") + assert clo_cache.cache_exists() is True + + def test_caching_ontology_set(self): + nr_ontologies_in_registry = 8 + # Test caching the set of ontologies specified in resources/ontologies.csv + caches = text2term.cache_ontology_set(os.path.join("..", "text2term", "resources", "ontologies.csv")) + assert len(caches) == nr_ontologies_in_registry + + def test_mapping_to_cached_ontology(self): + # Test mapping a list of terms to EFO loaded from cache + print("Test mapping a list of terms to EFO loaded from cache...") + mappings_efo_cache = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology="EFO", + use_cache=True, term_type=OntologyTermType.ANY) + print(f"{mappings_efo_cache}\n") + assert mappings_efo_cache.size > 0 + + # Test mapping a list of terms to EFO loaded from a URL + print("Test mapping a list of terms to EFO loaded from a URL...") + mappings_efo_url = text2term.map_terms(["asthma", "disease location", "food allergy"], + target_ontology=self.EFO_URL, term_type=OntologyTermType.ANY) + print(f"{mappings_efo_url}\n") + assert mappings_efo_url.size > 0 + + # Test that mapping to cached ontology is the same as to ontology loaded from its URL + print("Test that mapping to cached ontology is the same as to ontology loaded from its URL...") + mappings_match = self.check_df_equals(self.drop_source_term_ids(mappings_efo_cache), + self.drop_source_term_ids(mappings_efo_url)) + print(f"...{mappings_match}") + assert mappings_match is True + + def test_mapping_to_cached_efo_using_syntactic_mapper(self): + # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric + print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...") + df = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", min_score=.8, + mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, + term_type=OntologyTermType.ANY) + print(f"{df}\n") + assert df.size > 0 + + def test_mapping_to_efo_using_ontology_acronym(self): + # Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry + print( + "Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry") + df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS) + print(f"{df2}\n") + assert df2.size > 0 + + def test_mapping_tagged_terms(self): + # Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output + print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...") + df3 = text2term.map_terms( + {"asthma": "disease", "allergy": ["ignore", "response"], "protein level": ["measurement"], + "isdjfnsdfwd": None}, target_ontology="EFO", excl_deprecated=True, use_cache=True, incl_unmapped=True) + print(f"{df3}\n") + assert df3.size > 0 + assert df3[self.TAGS_COLUMN].str.contains("disease").any() + assert df3[self.TAGS_COLUMN].str.contains("measurement").any() + + def test_preprocessing_from_file(self): + # Test processing tagged terms where the tags are provided in a file + print("Test processing tagged terms where the tags are provided in a file...") + tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt") + df4 = text2term.map_terms(tagged_terms, target_ontology="EFO", use_cache=True, incl_unmapped=True) + print(f"{df4}\n") + assert df4.size > 0 + assert df4[self.TAGS_COLUMN].str.contains("disease").any() + assert df4[self.TAGS_COLUMN].str.contains("important").any() + + def test_mapping_to_properties(self): + # Test mapping a list of properties to EFO loaded from a URL and restrict search to properties + print("Test mapping a list of properties to EFO loaded from a URL and restrict search to properties...") + df5 = text2term.map_terms(source_terms=["contains", "location"], target_ontology=self.EFO_URL, + term_type=OntologyTermType.PROPERTY) + print(f"{df5}\n") + assert df5.size > 0 + + # Test mapping a list of properties to EFO loaded from cache and restrict search to properties + print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...") + if not text2term.cache_exists("EFO"): + text2term.cache_ontology(ontology_url=self.EFO_URL, ontology_acronym="EFO") + df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True, + term_type=OntologyTermType.PROPERTY) + print(f"{df6}\n") + assert df6.size > 0 + + # Test that mapping to properties in cached ontology is the same as to ontology loaded from its URL + properties_df_match = self.check_df_equals(self.drop_source_term_ids(df5), self.drop_source_term_ids(df6)) + print(f"Properties match: {properties_df_match}") + assert properties_df_match is True + + def test_mapping_zooma_ontologies(self): + # Test mapping a list of terms to multiple ontologies using the Zooma mapper + print("Test mapping a list of terms to multiple ontologies using the Zooma mapper...") + df_zooma = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", + mapper=Mapper.ZOOMA, term_type=OntologyTermType.ANY) + print(f"{df_zooma}\n") + assert df_zooma.size > 0 + assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() + assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() + + def test_mapping_bioportal_ontologies(self): + # Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper + print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...") + df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT", + mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY) + print(f"{df_bioportal}\n") + assert df_bioportal.size > 0 + assert df_bioportal[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any() + assert df_bioportal[self.MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any() + + def test_term_collector(self): + expected_nr_efo_terms = 50867 + efo_term_collector = OntologyTermCollector(ontology_iri=self.EFO_URL) + terms = efo_term_collector.get_ontology_terms() + assert len(terms) == expected_nr_efo_terms + + def test_term_collector_classes_only(self): + expected_nr_efo_classes = 50643 + efo_term_collector = OntologyTermCollector(ontology_iri=self.EFO_URL) + terms = efo_term_collector.get_ontology_terms(term_type=OntologyTermType.CLASS) + assert len(terms) == expected_nr_efo_classes + + def test_term_collector_properties_only(self): + expected_nr_efo_properties = 224 + efo_term_collector = OntologyTermCollector(ontology_iri=self.EFO_URL) + terms = efo_term_collector.get_ontology_terms(term_type=OntologyTermType.PROPERTY) + assert len(terms) == expected_nr_efo_properties + + def test_term_collector_iri_limit(self): + efo_base_iri = "http://www.ebi.ac.uk/efo/" + expected_nr_terms_with_efo_iri = 17383 + efo_term_collector = OntologyTermCollector(ontology_iri=self.EFO_URL) + terms = efo_term_collector.get_ontology_terms(base_iris=[efo_base_iri], term_type=OntologyTermType.ANY) + assert len(terms) == expected_nr_terms_with_efo_iri + + def test_term_collector_iri_limit_properties_only(self): + efo_base_iri = "http://www.ebi.ac.uk/efo/" + expected_nr_properties_with_efo_iri = 29 + efo_term_collector = OntologyTermCollector(ontology_iri=self.EFO_URL) + terms = efo_term_collector.get_ontology_terms(base_iris=[efo_base_iri], term_type=OntologyTermType.PROPERTY) + assert len(terms) == expected_nr_properties_with_efo_iri + + def drop_source_term_ids(self, df): + # Unless specified, source term IDs are randomly generated UUIDs. We have to drop the ID column to be able to + # get a meaningful diff between two dataframes. Otherwise, the dataframes would always differ because of the IDs + return df.drop(self.SOURCE_TERM_ID_COLUMN, axis=1) + + def check_df_equals(self, df, expected_df): + # Use pandas::assert_frame_equal function to determine if two data frames are equal + pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True) + return True if __name__ == '__main__': - run_tests() + unittest.main() From 2fa6aebe72ae18b0ffb321fe67d42fd07179017f Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 12 Oct 2023 10:16:00 -0400 Subject: [PATCH 34/39] Change logging message type from info to warning --- text2term/term_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text2term/term_collector.py b/text2term/term_collector.py index 31f9a23..5d93db4 100644 --- a/text2term/term_collector.py +++ b/text2term/term_collector.py @@ -90,7 +90,7 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type owl_term_type = OntologyTermType.PROPERTY else: owl_term_type = "undetermined" - self.logger.info("Term has undetermined type %s %s", iri, labels) + self.logger.warn("Term has undetermined type %s %s", iri, labels) term_details = OntologyTerm(iri, labels, definitions=definitions, synonyms=synonyms, parents=named_parents, children=children, instances=instances, restrictions=complex_parents, deprecated=is_deprecated, From e8c3f0255bf45868b65002ea88e5a4498b3bf893 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 12 Oct 2023 18:52:51 -0400 Subject: [PATCH 35/39] Add 'min_score' filter tests --- test/simple_tests.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/test/simple_tests.py b/test/simple_tests.py index b9c7db4..0a9e5e2 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -18,6 +18,7 @@ def setUpClass(cls): cls.EFO_URL = "https://github.com/EBISPOT/efo/releases/download/v3.57.0/efo.owl" cls.SOURCE_TERM_ID_COLUMN = "Source Term ID" cls.MAPPED_TERM_CURIE_COLUMN = "Mapped Term CURIE" + cls.MAPPING_SCORE_COLUMN = "Mapping Score" cls.TAGS_COLUMN = "Tags" def test_caching_ontology_from_url(self): @@ -70,9 +71,8 @@ def test_mapping_to_cached_ontology(self): def test_mapping_to_cached_efo_using_syntactic_mapper(self): # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...") - df = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", min_score=.8, - mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, - term_type=OntologyTermType.ANY) + df = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", use_cache=True, + mapper=text2term.Mapper.JARO_WINKLER, term_type=OntologyTermType.ANY) print(f"{df}\n") assert df.size > 0 @@ -179,6 +179,25 @@ def test_term_collector_iri_limit_properties_only(self): terms = efo_term_collector.get_ontology_terms(base_iris=[efo_base_iri], term_type=OntologyTermType.PROPERTY) assert len(terms) == expected_nr_properties_with_efo_iri + def test_mapping_with_min_score_filter(self): + min_score = 0.6 + search_terms = ["asthma attack", "location"] + + print("Test mapping to cached EFO using Zooma mapper and min_score filter...") + df_zooma = text2term.map_terms(search_terms, target_ontology="EFO,NCIT", mapper=Mapper.ZOOMA, + term_type=OntologyTermType.ANY, min_score=min_score) + assert (df_zooma[self.MAPPING_SCORE_COLUMN] >= min_score).all() + + print("Test mapping to cached EFO using TFIDF similarity metric and min_score filter...") + df_tfidf = text2term.map_terms(search_terms, target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF, + term_type=OntologyTermType.ANY, min_score=min_score) + assert (df_tfidf[self.MAPPING_SCORE_COLUMN] >= min_score).all() + + print("Test mapping to cached EFO using Levenshtein similarity metric and min_score filter...") + df_leven = text2term.map_terms(search_terms, target_ontology="EFO", use_cache=True, mapper=Mapper.LEVENSHTEIN, + term_type=OntologyTermType.ANY, min_score=min_score) + assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all() + def drop_source_term_ids(self, df): # Unless specified, source term IDs are randomly generated UUIDs. We have to drop the ID column to be able to # get a meaningful diff between two dataframes. Otherwise, the dataframes would always differ because of the IDs From c44728468058cd0ba5f17695d427209f6e2bb9e4 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 12 Oct 2023 18:53:09 -0400 Subject: [PATCH 36/39] Fix min_score filter issue --- text2term/t2t.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index 890088c..77ab182 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -212,8 +212,8 @@ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappi # Add tags, process, and filter df = _filter_mappings(mappings_df, min_score) if incl_unmapped: - df = _add_unmapped_terms(mappings_df, tags, source_terms, source_term_ids) - df = _add_tags_to_df(mappings_df, tags) + df = _add_unmapped_terms(df, tags, source_terms, source_term_ids) + df = _add_tags_to_df(df, tags) return df From 997e4a6513e0aee40e8d56db0a6aa0fd35f05882 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 12 Oct 2023 19:34:00 -0400 Subject: [PATCH 37/39] Add warning about mapping score when using bioportal mapper --- text2term/t2t.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index 77ab182..b0ce36d 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -209,10 +209,17 @@ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappi raise ValueError("Unsupported mapper: " + mapper) LOGGER.info("...done (mapping time: %.2fs seconds)", time.time() - start) - # Add tags, process, and filter - df = _filter_mappings(mappings_df, min_score) + # Filter terms by the mapping score specified + if mapper == Mapper.BIOPORTAL: + LOGGER.warning("The BioPortal mapper does not return a 'mapping score' for its mappings, so the min_score " + "filter has no effect on BioPortal mappings. The mapping score is hardcoded to 1 by text2term.") + df = mappings_df + else: + df = _filter_mappings(mappings_df, min_score) + # Include in output data frame any input terms that did not meet min_score threshold if incl_unmapped: df = _add_unmapped_terms(df, tags, source_terms, source_term_ids) + # Add tags df = _add_tags_to_df(df, tags) return df From 66cc57394e2899f3d3cef6b0183bf9fb476ffe53 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 13 Oct 2023 11:19:32 -0400 Subject: [PATCH 38/39] Fix issue accessing df when mappings df is empty --- text2term/t2t.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/text2term/t2t.py b/text2term/t2t.py index b0ce36d..bf03965 100644 --- a/text2term/t2t.py +++ b/text2term/t2t.py @@ -269,7 +269,10 @@ def _filter_mappings(mappings_df, min_score): def _add_unmapped_terms(mappings_df, tags, source_terms, source_terms_ids): - mapped = pd.unique(mappings_df["Source Term"]) + if mappings_df.size == 0: + mapped = () + else: + mapped = pd.unique(mappings_df["Source Term"]) for (term, term_id) in zip(source_terms, source_terms_ids): if term not in mapped: non_mapping = TermMapping(term, term_id, "", "", 0) From 3c993e767164dd97ba774d496974fbc510fbaec2 Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Fri, 13 Oct 2023 11:20:03 -0400 Subject: [PATCH 39/39] Add tests for include_unmapped feature --- test/simple_tests.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/simple_tests.py b/test/simple_tests.py index 0a9e5e2..7e65169 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -198,6 +198,16 @@ def test_mapping_with_min_score_filter(self): term_type=OntologyTermType.ANY, min_score=min_score) assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all() + def test_include_unmapped_terms(self): + df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF, + incl_unmapped=True, min_score=0.8) + assert df[self.TAGS_COLUMN].str.contains("unmapped").any() + + def test_include_unmapped_terms_when_no_mappings_are_returned(self): + df = text2term.map_terms(["mojito", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF, + incl_unmapped=True, min_score=0.8) + assert df[self.TAGS_COLUMN].str.contains("unmapped").any() + def drop_source_term_ids(self, df): # Unless specified, source term IDs are randomly generated UUIDs. We have to drop the ID column to be able to # get a meaningful diff between two dataframes. Otherwise, the dataframes would always differ because of the IDs