Skip to content

Commit

Permalink
fix: during their init, matchers skip pipes with non-required assigne…
Browse files Browse the repository at this point in the history
…d extensions
  • Loading branch information
percevalw committed Oct 24, 2023
1 parent d6d4c92 commit a2962c4
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 12 deletions.
8 changes: 7 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Changelog

## v0.9.2

### Changed

- Fix matchers to skip pipes with assigned extensions that are not required by the matcher during the initialization

## v0.9.1

## Changed
### Changed

- Improve negation patterns
- Abstent disorders now set the negation to True when matched as `ABSENT`
Expand Down
13 changes: 7 additions & 6 deletions edsnlp/matchers/phrase.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ from spacy.tokens.token cimport Token
from spacy.typedefs cimport attr_t
from spacy.vocab cimport Vocab

from edsnlp.matchers.utils import Patterns

from edsnlp.matchers.utils import Patterns, normalize_token_attr

def get_normalized_variant(doclike) -> str:
tokens = [t.text + t.whitespace_ for t in doclike if not t._.excluded]
Expand Down Expand Up @@ -88,13 +87,15 @@ cdef class EDSPhraseMatcher(PhraseMatcher):
if not terms:
terms = dict()

matched_attr = normalize_token_attr(self.vocab.strings[self.attr])
assert matched_attr is not None, "Unsupported attribute for matching"
token_pipelines = [
name
for name, pipe in nlp.pipeline
if any(
"token" in assign and not assign == "token.is_sent_start"
for assign in nlp.get_pipe_meta(name).assigns
)
if name not in nlp.disabled and matched_attr in {
normalize_token_attr(ass)
for ass in nlp.get_pipe_meta(name).assigns
}
]
with nlp.select_pipes(enable=token_pipelines):
for key, expressions in (tqdm(
Expand Down
12 changes: 7 additions & 5 deletions edsnlp/matchers/simstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from spacy.tokens import Doc, Span
from tqdm import tqdm

from edsnlp.matchers.utils import ATTRIBUTES, get_text
from edsnlp.matchers.utils import ATTRIBUTES, get_text, normalize_token_attr


class SimstringWriter:
Expand Down Expand Up @@ -129,13 +129,15 @@ def build_patterns(
self.syn2cuis = None

syn2cuis = defaultdict(lambda: [])

matched_attr = normalize_token_attr(self.vocab.strings[self.attr])
assert matched_attr is not None, "Unsupported attribute for matching"
token_pipelines = [
name
for name, pipe in nlp.pipeline
if any(
"token" in assign and not assign == "token.is_sent_start"
for assign in nlp.get_pipe_meta(name).assigns
)
if name not in nlp.disabled
and matched_attr
in {normalize_token_attr(ass) for ass in nlp.get_pipe_meta(name).assigns}
]
with nlp.select_pipes(enable=token_pipelines):
with SimstringWriter(self.path) as ss_db:
Expand Down
9 changes: 9 additions & 0 deletions edsnlp/matchers/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@
DictOrPattern = Union[Dict[str, ListOrStr], ListOrStr]
Patterns = Dict[str, DictOrPattern]


def normalize_token_attr(attr):
if attr.startswith("doc.") or attr.startswith("span."):
return None
attr = attr.replace("token.", "")
lower = attr.replace("_", "").lower()
return "text" if lower == "orth" else lower


ATTRIBUTES = {
"LOWER": "lower_",
"TEXT": "text",
Expand Down

0 comments on commit a2962c4

Please sign in to comment.