From b415da9544e1cafe3081900829fa5d96480354b6 Mon Sep 17 00:00:00 2001 From: Jeff Lerman Date: Fri, 29 Mar 2024 12:59:02 -0700 Subject: [PATCH 1/2] Add flake8 and fix issues raised by it. NOTE: For now, ignoring checks for missing docstrings, since there are so many. Those should be added and the checks enabled. --- .flake8 | 3 + .pre-commit-config.yaml | 9 ++- philter_lite/__init__.py | 11 ++++ philter_lite/asterisk.py | 4 +- philter_lite/coordinate_map.py | 83 +++++++++++++--------------- philter_lite/filters/__init__.py | 12 ++-- philter_lite/filters/stanford_ner.py | 14 ++--- philter_lite/main.py | 30 ++++------ philter_lite/philter.py | 34 +++++------- pyproject.toml | 2 + tests/test_philter.py | 12 ++-- 11 files changed, 104 insertions(+), 110 deletions(-) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..70f988c --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] + ignore = E203, E266, E501, W503, B028, D100, D101, D102, D103, D104 + max-line-length = 120 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b99214..aefd491 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ --- repos: - repo: https://github.com/psf/black - rev: 22.8.0 + rev: 23.9.1 hooks: - id: black language_version: python3 @@ -11,3 +11,10 @@ repos: hooks: - id: isort args: [ --profile, black ] + + - repo: https://github.com/pycqa/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + language: python + additional_dependencies: [flake8-bugbear, flake8-comprehensions, flake8-print, flake8-docstrings] \ No newline at end of file diff --git a/philter_lite/__init__.py b/philter_lite/__init__.py index 7aec798..10cfb0c 100644 --- a/philter_lite/__init__.py +++ b/philter_lite/__init__.py @@ -1,3 +1,4 @@ +"""The philter_lite package.""" from importlib import metadata from philter_lite.coordinate_map import CoordinateMap @@ -8,6 +9,16 @@ from .asterisk import transform_text_asterisk from .i2b2 import transform_text_i2b2 +__all__ = [ + "CoordinateMap", + "Filter", + "filter_from_dict", + "load_filters", + "detect_phi", + "transform_text_asterisk", + "transform_text_i2b2", +] + _DISTRIBUTION_METADATA = metadata.metadata("philter_lite") __author__ = _DISTRIBUTION_METADATA["Author"] diff --git a/philter_lite/asterisk.py b/philter_lite/asterisk.py index 9f3bbda..388d163 100644 --- a/philter_lite/asterisk.py +++ b/philter_lite/asterisk.py @@ -1,7 +1,8 @@ -from .coordinate_map import CoordinateMap, PUNCTUATION_MATCHER +from .coordinate_map import PUNCTUATION_MATCHER, CoordinateMap def save_to_asterisk(contents, output_file): + """Write some data to a text file, using utf-8 encoding.""" with open(output_file, "w", encoding="utf-8", errors="surrogateescape") as f: f.write(contents) @@ -11,7 +12,6 @@ def transform_text_asterisk(txt, include_map: CoordinateMap): # read the text by character, any non-punc non-overlaps will be replaced contents = [] for i in range(len(txt)): - if i < last_marker: continue diff --git a/philter_lite/coordinate_map.py b/philter_lite/coordinate_map.py index b35d426..ea6afca 100644 --- a/philter_lite/coordinate_map.py +++ b/philter_lite/coordinate_map.py @@ -6,31 +6,32 @@ class CoordinateMap: - """Hits are stored in a coordinate map data structure + """Internal data structure mapping filepaths to a map of int:string (coordinate start --> stop). - This class stores start coordinates for any matches found for this pattern""" + Hits are stored in a coordinate map data structure. - def __init__(self): - """internal data structure maps filepaths to a map of int:string (coordinate start --> stop) - - map is the internal structure of - { filename : { startcoordinate : stop_coordinate}} - eg: { "data/foo.txt": {123:126, 19:25} } - - coord2pattern keeps reference of the patterns - that matched this coorinate (can be multiple patterns) + This class stores start coordinates for any matches found for this pattern. - all_coords keeps a reference of all coordinates mapped by filename, - allowing us to easily check if these coordinates have been matched yet""" + Attributes: + map: Has the internal structure of + { filename : { startcoordinate : stop_coordinate}} + eg: { "data/foo.txt": {123:126, 19:25} } + coord2pattern: Keeps reference of the patterns that matched this coordinate + (can be multiple patterns). + all_coords: Keeps a reference of all coordinates mapped by filename, + allowing us to easily check if these coordinates have been matched yet. + """ + def __init__(self): + """Initialize.""" self.map = {} self.coord2pattern = {} self.all_coords = {} def add(self, start, stop, overlap=False, pattern=""): - """adds a new coordinate to the coordinate map + """Add a new coordinate to the coordinate map. - if overlap is false, this will reject any overlapping hits (usually from multiple regex scan runs) + If overlap is false, this will reject any overlapping hits (usually from multiple regex scan runs). """ if not overlap: if self.does_overlap(start, stop): @@ -47,13 +48,15 @@ def add(self, start, stop, overlap=False, pattern=""): return True, None def add_pattern(self, start, stop, pattern): - """adds this pattern to this start coord""" + """Add this pattern to this start coord.""" self.coord2pattern[start] = [] self.coord2pattern[start].append(pattern) def add_extend(self, start, stop, pattern=""): - """adds a new coordinate to the coordinate map - if overlaps with another, will extend to the larger size""" + """Add a new coordinate to the coordinate map. + + If overlaps with another, will extend to the larger size. + """ overlaps = self.max_overlap(start, stop) def clear_overlaps(lst): @@ -78,7 +81,7 @@ def clear_overlaps(lst): return True, None def remove(self, start, stop): - """Removes this coordinate pairing from the map, all_coords, and coord2pattern""" + """Remove this coordinate pairing from the map, all_coords, and coord2pattern.""" # delete from our map structure if start in self.map: del self.map[start] @@ -89,7 +92,7 @@ def remove(self, start, stop): return True, None def scan(self): - """does an inorder scan of the coordinates and their values""" + """Do an inorder scan of the coordinates and their values.""" for fn in self.map: coords = list(self.map[fn].keys()) coords.sort() @@ -105,20 +108,19 @@ def get_coords(self, start): return start, stop def filecoords(self): - """generator does an inorder scan of the coordinates for this file""" + """Provide a generator of an in-order scan of the coordinates for this file.""" coords = sorted(self.map.keys()) for coord in coords: yield coord, self.map[coord] def does_exist(self, index): - """Simple check to see if this index is a hit (start of coordinates)""" + """Simply check to see if this index is a hit (start of coordinates).""" if index in self.map: return True return False def does_overlap(self, start, stop): - """Check if this coordinate overlaps with any existing range""" - + """Check if this coordinate overlaps with any existing range.""" ranges = [list(range(key, self.map[key] + 1)) for key in self.map] all_coords = [item for sublist in ranges for item in sublist] # removing all_coords implementation until we write some tests @@ -128,10 +130,11 @@ def does_overlap(self, start, stop): return False def calc_overlap(self, start, stop): - """given a set of coordinates, will calculate all overlaps - perf: stop after we know we won't hit any more - perf: use binary search approach""" + """Given a set of coordinates, calculate all overlaps. + perf: stop after we know we won't hit any more + perf: use binary search approach + """ overlaps = [] for s in self.map: e = self.map[s] @@ -149,10 +152,11 @@ def calc_overlap(self, start, stop): return overlaps def max_overlap(self, start, stop): - """given a set of coordinates, will calculate max of all overlaps - perf: stop after we know we won't hit any more - perf: use binary search approach""" + """Given a set of coordinates, calculate max of all overlaps. + perf: stop after we know we won't hit any more + perf: use binary search approach + """ overlaps = [] for s in self.map: e = self.map[s] @@ -168,9 +172,7 @@ def max_overlap(self, start, stop): } ) else: - overlaps.append( - {"orig_start": s, "orig_end": e, "new_start": s, "new_stop": e} - ) + overlaps.append({"orig_start": s, "orig_end": e, "new_start": s, "new_stop": e}) elif s <= stop <= e: if start <= s: @@ -183,15 +185,12 @@ def max_overlap(self, start, stop): } ) else: - overlaps.append( - {"orig_start": s, "orig_end": e, "new_start": s, "new_stop": e} - ) + overlaps.append({"orig_start": s, "orig_end": e, "new_start": s, "new_stop": e}) return overlaps def get_complement(self, text): - """get the complementary coordinates of the input coordinate map (excludes punctuation)""" - + """Get the complementary coordinates of the input coordinate map (excludes punctuation).""" complement_coordinate_map = {} current_map_coordinates: List[int] = [] @@ -201,9 +200,7 @@ def get_complement(self, text): current_map_coordinates += range(start, stop) text_coordinates = list(range(0, len(text))) - complement_coordinates = list( - set(text_coordinates) - set(current_map_coordinates) - ) + complement_coordinates = list(set(text_coordinates) - set(current_map_coordinates)) # Remove punctuation from complement coordinates for i in range(0, len(text)): @@ -214,9 +211,7 @@ def get_complement(self, text): # Group complement coordinates into ranges def to_ranges(iterable): iterable = sorted(set(iterable)) - for key, group in itertools.groupby( - enumerate(iterable), lambda t: t[1] - t[0] - ): + for _key, group in itertools.groupby(enumerate(iterable), lambda t: t[1] - t[0]): group_list = list(group) yield group_list[0][1], group_list[-1][1] + 1 diff --git a/philter_lite/filters/__init__.py b/philter_lite/filters/__init__.py index 061fffc..9639abd 100644 --- a/philter_lite/filters/__init__.py +++ b/philter_lite/filters/__init__.py @@ -130,9 +130,9 @@ def filter_from_dict( def load_filters(filter_path) -> List[Filter]: - """Loads filters from a file on disk. + """Load filters from a file on disk. - File must be a toml file with a key of `filters` + File must be a toml file with a key of `filters`. """ if not os.path.exists(filter_path): raise Exception("Filepath does not exist", filter_path) @@ -140,13 +140,11 @@ def load_filters(filter_path) -> List[Filter]: return [filter_from_dict(x) for x in toml.loads(fil_file.read())["filters"]] -def _precompile(regex: str): - """precompiles our regex to speed up pattern matching""" +def _precompile(regex: str) -> Pattern[str]: + """Precompile our regex to speed up pattern matching.""" # NOTE: this is not thread safe! but we want to print a more detailed warning message with warnings.catch_warnings(): - warnings.simplefilter( - action="error", category=FutureWarning - ) # in order to print a detailed message + warnings.simplefilter(action="error", category=FutureWarning) # in order to print a detailed message try: re_compiled = re.compile(regex) except FutureWarning: diff --git a/philter_lite/filters/stanford_ner.py b/philter_lite/filters/stanford_ner.py index 1e93228..afc999d 100644 --- a/philter_lite/filters/stanford_ner.py +++ b/philter_lite/filters/stanford_ner.py @@ -1,6 +1,7 @@ import os import re import subprocess +import sys from nltk.tag.stanford import StanfordNERTagger @@ -9,16 +10,12 @@ from . import NerFilter -def build_ner_tagger( - classifier, tagger_jar, download: bool = True -) -> StanfordNERTagger: +def build_ner_tagger(classifier, tagger_jar, download: bool = True) -> StanfordNERTagger: if not os.path.exists(classifier) and not download: raise Exception("Filepath does not exist", classifier) else: # download the ner data - process = subprocess.Popen( - "cd generate_dataset && ./download_ner.sh".split(), stdout=subprocess.PIPE - ) + process = subprocess.Popen("cd generate_dataset && ./download_ner.sh".split(), stdout=subprocess.PIPE) process.communicate() if not os.path.exists(tagger_jar): @@ -34,7 +31,7 @@ def map_ner( stanford_ner_tagger: StanfordNERTagger, pre_process=r"[^a-zA-Z0-9]+", ) -> CoordinateMap: - """map NER tagging""" + """Map NER tagging.""" pos_set = set() if pattern.pos: pos_set = set(pattern.pos) @@ -62,7 +59,6 @@ def map_ner( # add these coordinates to our coordinate map start_coordinate = 0 for word in cleaned: - word_clean = re.sub(pre_process, "", word.lower().strip()) if len(word_clean) == 0: # got a blank space or something without any characters or digits, move forward @@ -75,7 +71,7 @@ def map_ner( if ner_tag in pos_set: stop = start + len(word) coord_map.add_extend(start, stop) - print("FOUND: ", word, "NER: ", ner_tag, start, stop) + sys.stdout.write(f"FOUND: {word} NER: {ner_tag} {start} {stop}") # advance our start coordinate start_coordinate += len(word) diff --git a/philter_lite/main.py b/philter_lite/main.py index 578c5d1..c562c0f 100644 --- a/philter_lite/main.py +++ b/philter_lite/main.py @@ -1,6 +1,7 @@ import argparse import distutils.util import os +import sys import philter_lite @@ -13,16 +14,14 @@ def main(): "-i", "--input", default="./data/i2b2_notes/", - help="Path to the directory or the file that contains the PHI note, the default is " - "./data/i2b2_notes/", + help="Path to the directory or the file that contains the PHI note, the default is ./data/i2b2_notes/", type=str, ) ap.add_argument( "-a", "--anno", default="./data/i2b2_anno/", - help="Path to the directory or the file that contains the PHI annotation, the default is " - "./data/i2b2_anno/", + help="Path to the directory or the file that contains the PHI annotation, the default is ./data/i2b2_anno/", type=str, ) ap.add_argument( @@ -84,8 +83,7 @@ def main(): ap.add_argument( "--outputformat", default="i2b2", - help='Define format of annotation, allowed values are "asterisk", "i2b2". Default ' - 'is "asterisk"', + help='Define format of annotation, allowed values are "asterisk", "i2b2". Default is "asterisk"', type=str, ) ap.add_argument( @@ -142,29 +140,21 @@ def main(): } if verbose: - print("RUNNING ", philter_config["filters"]) + sys.stderr.write(f'RUNNING {philter_config["filters"]}') filters = philter_lite.load_filters(philter_config["filters"]) - for root, dirs, files in os.walk(philter_config["finpath"]): + for root, _dirs, files in os.walk(philter_config["finpath"]): for file in files: with open(os.path.join(root, file)) as inf: text = inf.read() - include_map, exclude_map, data_tracker = philter_lite.detect_phi( - text, patterns=filters - ) + include_map, exclude_map, data_tracker = philter_lite.detect_phi(text, patterns=filters) if philter_config["outformat"] == "i2b2": - with open( - os.path.join(philter_config["foutpath"], f"{file}.txt"), "w" - ) as fout: + with open(os.path.join(philter_config["foutpath"], f"{file}.txt"), "w") as fout: fout.write(philter_lite.transform_text_i2b2(data_tracker)) elif philter_config["outformat"] == "asterisk": - with open( - os.path.join(philter_config["foutpath"], f"{file}.txt"), "w" - ) as fout: - fout.write( - philter_lite.transform_text_asterisk(text, include_map) - ) + with open(os.path.join(philter_config["foutpath"], f"{file}.txt"), "w") as fout: + fout.write(philter_lite.transform_text_asterisk(text, include_map)) if __name__ == "__main__": diff --git a/philter_lite/philter.py b/philter_lite/philter.py index 5be585d..e208659 100644 --- a/philter_lite/philter.py +++ b/philter_lite/philter.py @@ -53,8 +53,8 @@ def detect_phi( patterns: List[Filter], phi_type_list: List[str] = DEFAULT_PHI_TYPE_LIST, ): - """Runs the set, or regex on the input data - generating a coordinate map of hits given + """Run the set or regex on the input data, generating a coordinate map of hits given. + (this performs a dry run on the data and doesn't transform) """ # create coordinate maps for each pattern @@ -81,8 +81,8 @@ def detect_phi( pos_list = _get_pos(text_data) - # Create inital self.exclude/include for file - for i, pat in enumerate(patterns): + # Create initial self.exclude/include for file + for pat in patterns: pattern_coord = pattern_coords[pat.title] if pat.type == "regex" and isinstance(pat, RegexFilter): @@ -98,9 +98,7 @@ def detect_phi( pattern=pat, ) elif pat.type == "pos_matcher" and isinstance(pat, PosFilter): - _map_parts_of_speech( - pos_list=pos_list, coord_map=pattern_coord, pattern=pat - ) + _map_parts_of_speech(pos_list=pos_list, coord_map=pattern_coord, pattern=pat) elif pat.type == "match_all": _match_all(text=text_data, coord_map=pattern_coord) else: @@ -161,8 +159,9 @@ def _map_regex( coord_map: CoordinateMap, pre_process=REGEX_NON_ALPHANUM_CHAR, ) -> CoordinateMap: - """Creates a coordinate map from the pattern on this data - generating a coordinate map of hits given (dry run doesn't transform) + """Create a coordinate map from the pattern on this data. + + Generates a coordinate map of hits given (dry run doesn't transform). """ regex = pattern.data @@ -217,9 +216,7 @@ def _map_regex_context( include_map: CoordinateMap, pre_process=REGEX_NON_ALPHANUM_CHAR, ) -> CoordinateMap: - """map_regex_context creates a coordinate map from combined regex + PHI coordinates - of all previously mapped patterns - """ + """Create a CoordinateMap from combined regex + PHI coordinates of all previously mapped patterns.""" regex = pattern.data context = pattern.context try: @@ -248,7 +245,6 @@ def _map_regex_context( # 2. Find all patterns expressions that match regular expression matches = regex.finditer(text) for m in matches: - # initialize phi_left and phi_right phi_left = False phi_right = False @@ -301,15 +297,14 @@ def _map_regex_context( def _match_all(text, coord_map: CoordinateMap) -> CoordinateMap: - """Simply maps to the entirety of the file""" + """Simply map to the entirety of the file.""" # add the entire length of the file coord_map.add(0, len(text)) return coord_map def _map_set(pos_list, coord_map: CoordinateMap, pattern: SetFilter) -> CoordinateMap: - """Creates a coordinate mapping of words any words in this set""" - + """Create a coordinate mapping of words any words in this set.""" set_data = pattern.data # get part of speech we will be sending through this set @@ -343,11 +338,8 @@ def _map_set(pos_list, coord_map: CoordinateMap, pattern: SetFilter) -> Coordina return coord_map -def _map_parts_of_speech( - pos_list, pattern: PosFilter, coord_map: CoordinateMap -) -> CoordinateMap: - """Creates a coordinate mapping of words which match this part of speech (POS)""" - +def _map_parts_of_speech(pos_list, pattern: PosFilter, coord_map: CoordinateMap) -> CoordinateMap: + """Create a coordinate mapping of words which match this part of speech (POS).""" pos_set = set(pattern.pos) # Use pre-process to split sentence by spaces AND symbols, while preserving spaces in the split list diff --git a/pyproject.toml b/pyproject.toml index d0dea46..6714869 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,8 @@ philter_lite = 'philter_lite.main:main' requires = ["poetry_core>=1.8.1"] build-backend = "poetry.core.masonry.api" +[tool.black] +line-length = 120 [tool.mypy] check_untyped_defs = true diff --git a/tests/test_philter.py b/tests/test_philter.py index 47a94d6..6a944d4 100644 --- a/tests/test_philter.py +++ b/tests/test_philter.py @@ -100,16 +100,16 @@ def test_filter_from_dict_missing_file(): "filepath": "filters/regex/addresses/non_existent.txt", } - with pytest.raises(Exception): + # TODO: This test appears to be intended to test that a reference to a file that doesn't exist + # causes an exception - but the excption raised has nothing to do with that file-path. + # Instead, the complaint is about a missing value for the "keyword" key. + # Should be investigated. + with pytest.raises(Exception): # noqa: B017 filter_from_dict(filter_dict) def test_default_config(): - filters = load_filters( - os.path.join( - os.path.dirname(philter_lite.__file__), "configs/philter_delta.toml" - ) - ) + filters = load_filters(os.path.join(os.path.dirname(philter_lite.__file__), "configs/philter_delta.toml")) assert len(filters) > 0 From 7e607c72bc65355d5db2b61944d14dbb8a9eae10 Mon Sep 17 00:00:00 2001 From: Jeff Lerman Date: Fri, 29 Mar 2024 13:13:12 -0700 Subject: [PATCH 2/2] add end-of-file fixer, and fix remaining flake8 issues --- .pre-commit-config.yaml | 7 ++++++- data/i2b2_anno/110-01.txt | 4 ---- data/i2b2_anno/110-02.txt | 5 ----- data/i2b2_anno/110-03.txt | 4 ---- data/i2b2_anno/110-04.txt | 3 --- data/i2b2_anno/111-01.txt | 5 ----- data/i2b2_notes/110-01.txt | 4 ---- data/i2b2_notes/110-02.txt | 5 ----- data/i2b2_notes/110-03.txt | 4 ---- data/i2b2_notes/110-04.txt | 3 --- data/i2b2_notes/111-01.txt | 5 ----- data/i2b2_xml/110-01.xml | 2 +- data/i2b2_xml/110-02.xml | 2 +- data/i2b2_xml/110-03.xml | 2 +- data/i2b2_xml/110-04.xml | 2 +- data/i2b2_xml/111-01.xml | 2 +- data/phi/fn.txt | 2 +- data/phi/fp.txt | 2 +- data/phi/summary.json | 2 +- data/phi_notes_i2b2.json | 2 +- philter_lite/configs/philter_delta.toml | 1 - philter_lite/data/i2b2_anno/110-01.txt | 4 ---- philter_lite/data/i2b2_anno/110-02.txt | 5 ----- philter_lite/data/i2b2_anno/110-03.txt | 4 ---- philter_lite/data/i2b2_anno/110-04.txt | 3 --- philter_lite/data/i2b2_anno/111-01.txt | 5 ----- philter_lite/data/i2b2_notes/110-01.txt | 4 ---- philter_lite/data/i2b2_notes/110-02.txt | 5 ----- philter_lite/data/i2b2_notes/110-03.txt | 4 ---- philter_lite/data/i2b2_notes/110-04.txt | 3 --- philter_lite/data/i2b2_notes/111-01.txt | 5 ----- philter_lite/data/i2b2_xml/110-01.xml | 2 +- philter_lite/data/i2b2_xml/110-02.xml | 2 +- philter_lite/data/i2b2_xml/110-03.xml | 2 +- philter_lite/data/i2b2_xml/110-04.xml | 2 +- philter_lite/data/i2b2_xml/111-01.xml | 2 +- philter_lite/data/phi/fn.txt | 2 +- philter_lite/data/phi/fp.txt | 2 +- philter_lite/data/phi/summary.json | 2 +- philter_lite/data/phi_notes_i2b2.json | 2 +- philter_lite/i2b2.py | 2 +- tests/test_package_metadata.py | 1 - 42 files changed, 25 insertions(+), 106 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aefd491..e4fd591 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,4 +17,9 @@ repos: hooks: - id: flake8 language: python - additional_dependencies: [flake8-bugbear, flake8-comprehensions, flake8-print, flake8-docstrings] \ No newline at end of file + additional_dependencies: [flake8-bugbear, flake8-comprehensions, flake8-print, flake8-docstrings] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: end-of-file-fixer diff --git a/data/i2b2_anno/110-01.txt b/data/i2b2_anno/110-01.txt index 5a53ead..8538d0b 100644 --- a/data/i2b2_anno/110-01.txt +++ b/data/i2b2_anno/110-01.txt @@ -48,7 +48,3 @@ DD: **/**/** DT: **/**/** DV: **/**/** Approved but not reviewed by Attending Provider - - - - diff --git a/data/i2b2_anno/110-02.txt b/data/i2b2_anno/110-02.txt index e238bbe..17a86f1 100644 --- a/data/i2b2_anno/110-02.txt +++ b/data/i2b2_anno/110-02.txt @@ -66,8 +66,3 @@ P: Follow up with Dr. ***** in 3 months. GPP/******/******* - - - - - diff --git a/data/i2b2_anno/110-03.txt b/data/i2b2_anno/110-03.txt index bd7b4ff..d4fdcfa 100644 --- a/data/i2b2_anno/110-03.txt +++ b/data/i2b2_anno/110-03.txt @@ -100,7 +100,3 @@ GI prophylaxis: Zantac _______________________ **** ****, MD, EHMS pager ***** - - - - diff --git a/data/i2b2_anno/110-04.txt b/data/i2b2_anno/110-04.txt index 97408d3..5cadd64 100644 --- a/data/i2b2_anno/110-04.txt +++ b/data/i2b2_anno/110-04.txt @@ -176,6 +176,3 @@ Plan ______________________________ **** * *********, M.D. - - - diff --git a/data/i2b2_anno/111-01.txt b/data/i2b2_anno/111-01.txt index e5a2271..cdae610 100644 --- a/data/i2b2_anno/111-01.txt +++ b/data/i2b2_anno/111-01.txt @@ -66,8 +66,3 @@ ___________________________________ *****/***** Dictated by: ******* ******, M.D. **** Not reviewed by Attending Physician - - - - - diff --git a/data/i2b2_notes/110-01.txt b/data/i2b2_notes/110-01.txt index 45a6405..da5997e 100644 --- a/data/i2b2_notes/110-01.txt +++ b/data/i2b2_notes/110-01.txt @@ -48,7 +48,3 @@ DD: 04/07/69 DT: 04/15/69 DV: 04/07/69 Approved but not reviewed by Attending Provider - - - - diff --git a/data/i2b2_notes/110-02.txt b/data/i2b2_notes/110-02.txt index 0b5d6fb..f2b4f20 100644 --- a/data/i2b2_notes/110-02.txt +++ b/data/i2b2_notes/110-02.txt @@ -66,8 +66,3 @@ Gilbert P. Perez, M.D. GPP/church/olinger - - - - - diff --git a/data/i2b2_notes/110-03.txt b/data/i2b2_notes/110-03.txt index eae3007..575250b 100644 --- a/data/i2b2_notes/110-03.txt +++ b/data/i2b2_notes/110-03.txt @@ -100,7 +100,3 @@ GI prophylaxis: Zantac _______________________ Mike Ivan, MD, EHMS pager 84710 - - - - diff --git a/data/i2b2_notes/110-04.txt b/data/i2b2_notes/110-04.txt index dbf9bf6..1d45058 100644 --- a/data/i2b2_notes/110-04.txt +++ b/data/i2b2_notes/110-04.txt @@ -176,6 +176,3 @@ Plan ______________________________ Owen M Gallagher, M.D. - - - diff --git a/data/i2b2_notes/111-01.txt b/data/i2b2_notes/111-01.txt index 85196e1..39018f9 100644 --- a/data/i2b2_notes/111-01.txt +++ b/data/i2b2_notes/111-01.txt @@ -66,8 +66,3 @@ FILBERT BRIGHT, M.D. FB59 D:07/20/83 Dictated by: FILBERT BRIGHT, M.D. FB59 Not reviewed by Attending Physician - - - - - diff --git a/data/i2b2_xml/110-01.xml b/data/i2b2_xml/110-01.xml index c0c8326..92180df 100644 --- a/data/i2b2_xml/110-01.xml +++ b/data/i2b2_xml/110-01.xml @@ -65,4 +65,4 @@ DV: 04/07/69 - \ No newline at end of file + diff --git a/data/i2b2_xml/110-02.xml b/data/i2b2_xml/110-02.xml index a4653d7..7ed6313 100644 --- a/data/i2b2_xml/110-02.xml +++ b/data/i2b2_xml/110-02.xml @@ -83,4 +83,4 @@ GPP/church/olinger - \ No newline at end of file + diff --git a/data/i2b2_xml/110-03.xml b/data/i2b2_xml/110-03.xml index 547b70b..f40dffa 100644 --- a/data/i2b2_xml/110-03.xml +++ b/data/i2b2_xml/110-03.xml @@ -119,4 +119,4 @@ Mike Ivan, MD, EHMS pager 84710 - \ No newline at end of file + diff --git a/data/i2b2_xml/110-04.xml b/data/i2b2_xml/110-04.xml index 50c3069..0ebd892 100644 --- a/data/i2b2_xml/110-04.xml +++ b/data/i2b2_xml/110-04.xml @@ -192,4 +192,4 @@ Owen M Gallagher, M.D. - \ No newline at end of file + diff --git a/data/i2b2_xml/111-01.xml b/data/i2b2_xml/111-01.xml index 9f7470e..ea6b9f9 100644 --- a/data/i2b2_xml/111-01.xml +++ b/data/i2b2_xml/111-01.xml @@ -89,4 +89,4 @@ Dictated by: FILBERT BRIGHT, M.D. FB59 - \ No newline at end of file + diff --git a/data/phi/fn.txt b/data/phi/fn.txt index b979605..120c3b1 100644 --- a/data/phi/fn.txt +++ b/data/phi/fn.txt @@ -1,4 +1,4 @@ [ "SILVER", "church" -] \ No newline at end of file +] diff --git a/data/phi/fp.txt b/data/phi/fp.txt index a694fda..0dca58b 100644 --- a/data/phi/fp.txt +++ b/data/phi/fp.txt @@ -30,4 +30,4 @@ "Will", "Will", "Will" -] \ No newline at end of file +] diff --git a/data/phi/summary.json b/data/phi/summary.json index 2504d4b..5c380c2 100644 --- a/data/phi/summary.json +++ b/data/phi/summary.json @@ -74,4 +74,4 @@ "num_false_negatives": 0 } } -} \ No newline at end of file +} diff --git a/data/phi_notes_i2b2.json b/data/phi_notes_i2b2.json index 24a7dfe..c50b356 100644 --- a/data/phi_notes_i2b2.json +++ b/data/phi_notes_i2b2.json @@ -392,4 +392,4 @@ } ] } -} \ No newline at end of file +} diff --git a/philter_lite/configs/philter_delta.toml b/philter_lite/configs/philter_delta.toml index bb40411..d10707f 100644 --- a/philter_lite/configs/philter_delta.toml +++ b/philter_lite/configs/philter_delta.toml @@ -2245,4 +2245,3 @@ context = "left_or_right" context_filter = "all" notes = "" keyword = "regex_context.initials" - diff --git a/philter_lite/data/i2b2_anno/110-01.txt b/philter_lite/data/i2b2_anno/110-01.txt index 5a53ead..8538d0b 100644 --- a/philter_lite/data/i2b2_anno/110-01.txt +++ b/philter_lite/data/i2b2_anno/110-01.txt @@ -48,7 +48,3 @@ DD: **/**/** DT: **/**/** DV: **/**/** Approved but not reviewed by Attending Provider - - - - diff --git a/philter_lite/data/i2b2_anno/110-02.txt b/philter_lite/data/i2b2_anno/110-02.txt index e238bbe..17a86f1 100644 --- a/philter_lite/data/i2b2_anno/110-02.txt +++ b/philter_lite/data/i2b2_anno/110-02.txt @@ -66,8 +66,3 @@ P: Follow up with Dr. ***** in 3 months. GPP/******/******* - - - - - diff --git a/philter_lite/data/i2b2_anno/110-03.txt b/philter_lite/data/i2b2_anno/110-03.txt index bd7b4ff..d4fdcfa 100644 --- a/philter_lite/data/i2b2_anno/110-03.txt +++ b/philter_lite/data/i2b2_anno/110-03.txt @@ -100,7 +100,3 @@ GI prophylaxis: Zantac _______________________ **** ****, MD, EHMS pager ***** - - - - diff --git a/philter_lite/data/i2b2_anno/110-04.txt b/philter_lite/data/i2b2_anno/110-04.txt index 97408d3..5cadd64 100644 --- a/philter_lite/data/i2b2_anno/110-04.txt +++ b/philter_lite/data/i2b2_anno/110-04.txt @@ -176,6 +176,3 @@ Plan ______________________________ **** * *********, M.D. - - - diff --git a/philter_lite/data/i2b2_anno/111-01.txt b/philter_lite/data/i2b2_anno/111-01.txt index e5a2271..cdae610 100644 --- a/philter_lite/data/i2b2_anno/111-01.txt +++ b/philter_lite/data/i2b2_anno/111-01.txt @@ -66,8 +66,3 @@ ___________________________________ *****/***** Dictated by: ******* ******, M.D. **** Not reviewed by Attending Physician - - - - - diff --git a/philter_lite/data/i2b2_notes/110-01.txt b/philter_lite/data/i2b2_notes/110-01.txt index 45a6405..da5997e 100644 --- a/philter_lite/data/i2b2_notes/110-01.txt +++ b/philter_lite/data/i2b2_notes/110-01.txt @@ -48,7 +48,3 @@ DD: 04/07/69 DT: 04/15/69 DV: 04/07/69 Approved but not reviewed by Attending Provider - - - - diff --git a/philter_lite/data/i2b2_notes/110-02.txt b/philter_lite/data/i2b2_notes/110-02.txt index 0b5d6fb..f2b4f20 100644 --- a/philter_lite/data/i2b2_notes/110-02.txt +++ b/philter_lite/data/i2b2_notes/110-02.txt @@ -66,8 +66,3 @@ Gilbert P. Perez, M.D. GPP/church/olinger - - - - - diff --git a/philter_lite/data/i2b2_notes/110-03.txt b/philter_lite/data/i2b2_notes/110-03.txt index eae3007..575250b 100644 --- a/philter_lite/data/i2b2_notes/110-03.txt +++ b/philter_lite/data/i2b2_notes/110-03.txt @@ -100,7 +100,3 @@ GI prophylaxis: Zantac _______________________ Mike Ivan, MD, EHMS pager 84710 - - - - diff --git a/philter_lite/data/i2b2_notes/110-04.txt b/philter_lite/data/i2b2_notes/110-04.txt index dbf9bf6..1d45058 100644 --- a/philter_lite/data/i2b2_notes/110-04.txt +++ b/philter_lite/data/i2b2_notes/110-04.txt @@ -176,6 +176,3 @@ Plan ______________________________ Owen M Gallagher, M.D. - - - diff --git a/philter_lite/data/i2b2_notes/111-01.txt b/philter_lite/data/i2b2_notes/111-01.txt index 85196e1..39018f9 100644 --- a/philter_lite/data/i2b2_notes/111-01.txt +++ b/philter_lite/data/i2b2_notes/111-01.txt @@ -66,8 +66,3 @@ FILBERT BRIGHT, M.D. FB59 D:07/20/83 Dictated by: FILBERT BRIGHT, M.D. FB59 Not reviewed by Attending Physician - - - - - diff --git a/philter_lite/data/i2b2_xml/110-01.xml b/philter_lite/data/i2b2_xml/110-01.xml index c0c8326..92180df 100644 --- a/philter_lite/data/i2b2_xml/110-01.xml +++ b/philter_lite/data/i2b2_xml/110-01.xml @@ -65,4 +65,4 @@ DV: 04/07/69 - \ No newline at end of file + diff --git a/philter_lite/data/i2b2_xml/110-02.xml b/philter_lite/data/i2b2_xml/110-02.xml index a4653d7..7ed6313 100644 --- a/philter_lite/data/i2b2_xml/110-02.xml +++ b/philter_lite/data/i2b2_xml/110-02.xml @@ -83,4 +83,4 @@ GPP/church/olinger - \ No newline at end of file + diff --git a/philter_lite/data/i2b2_xml/110-03.xml b/philter_lite/data/i2b2_xml/110-03.xml index 547b70b..f40dffa 100644 --- a/philter_lite/data/i2b2_xml/110-03.xml +++ b/philter_lite/data/i2b2_xml/110-03.xml @@ -119,4 +119,4 @@ Mike Ivan, MD, EHMS pager 84710 - \ No newline at end of file + diff --git a/philter_lite/data/i2b2_xml/110-04.xml b/philter_lite/data/i2b2_xml/110-04.xml index 50c3069..0ebd892 100644 --- a/philter_lite/data/i2b2_xml/110-04.xml +++ b/philter_lite/data/i2b2_xml/110-04.xml @@ -192,4 +192,4 @@ Owen M Gallagher, M.D. - \ No newline at end of file + diff --git a/philter_lite/data/i2b2_xml/111-01.xml b/philter_lite/data/i2b2_xml/111-01.xml index 9f7470e..ea6b9f9 100644 --- a/philter_lite/data/i2b2_xml/111-01.xml +++ b/philter_lite/data/i2b2_xml/111-01.xml @@ -89,4 +89,4 @@ Dictated by: FILBERT BRIGHT, M.D. FB59 - \ No newline at end of file + diff --git a/philter_lite/data/phi/fn.txt b/philter_lite/data/phi/fn.txt index 946ea46..2fb7825 100644 --- a/philter_lite/data/phi/fn.txt +++ b/philter_lite/data/phi/fn.txt @@ -2,4 +2,4 @@ "SILVER", "church", "holmes" -] \ No newline at end of file +] diff --git a/philter_lite/data/phi/fp.txt b/philter_lite/data/phi/fp.txt index e8eb5a9..46ba7db 100644 --- a/philter_lite/data/phi/fp.txt +++ b/philter_lite/data/phi/fp.txt @@ -35,4 +35,4 @@ "EHMS", "New", "XGT" -] \ No newline at end of file +] diff --git a/philter_lite/data/phi/summary.json b/philter_lite/data/phi/summary.json index d298511..b2e6a06 100644 --- a/philter_lite/data/phi/summary.json +++ b/philter_lite/data/phi/summary.json @@ -80,4 +80,4 @@ "num_false_negatives": 1 } } -} \ No newline at end of file +} diff --git a/philter_lite/data/phi_notes_i2b2.json b/philter_lite/data/phi_notes_i2b2.json index 24a7dfe..c50b356 100644 --- a/philter_lite/data/phi_notes_i2b2.json +++ b/philter_lite/data/phi_notes_i2b2.json @@ -392,4 +392,4 @@ } ] } -} \ No newline at end of file +} diff --git a/philter_lite/i2b2.py b/philter_lite/i2b2.py index 40e1abf..1a04123 100644 --- a/philter_lite/i2b2.py +++ b/philter_lite/i2b2.py @@ -7,7 +7,7 @@ def save_to_i2b2(contents, output_file): def transform_text_i2b2(tagdata: DataTracker): - """creates a string in i2b2-XML format""" + """Create a string in i2b2-XML format.""" root = "Philter" contents = [ '\n', diff --git a/tests/test_package_metadata.py b/tests/test_package_metadata.py index 2d6d451..28c1d0b 100644 --- a/tests/test_package_metadata.py +++ b/tests/test_package_metadata.py @@ -1,6 +1,5 @@ """Confirm that project metadata is set correctly.""" import os -import re from typing import Any, Mapping import toml