-
Notifications
You must be signed in to change notification settings - Fork 3
/
spacyAnonymizers.py
32 lines (23 loc) · 1.01 KB
/
spacyAnonymizers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import spacy
from ..Anonymization import Anonymization
class _NamedEntitiesAnonymizer():
'''
Replace all named entities with fake ones
This class requires spacy and a spacy model:
$ pip install spacy
$ python -m spacy download <model>
Call NamedEntitiesAnonymizer if you want to pass an instance to an AnonymizerChain
'''
def __init__(self, anonymization: Anonymization, model: str):
self.anonymization = anonymization
self.processor = spacy.load(model)
def anonymize(self, text: str) -> str:
doc = self.processor(text)
# remove whitespace entities and trim the entities
ents = [ent.text.strip() for ent in doc.ents if not ent.text.isspace()]
return self.anonymization.replace_all(text, ents, 'first_name')
def NamedEntitiesAnonymizer(model: str) -> _NamedEntitiesAnonymizer:
'''
Context wrapper for _NamedEntitiesAnonymizer, takes a spacy model.
'''
return lambda anonymization: _NamedEntitiesAnonymizer(anonymization, model)