From 83f0dc3d8c0e7c6312313c2832a2739b9516d60c Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sun, 28 Aug 2022 19:47:50 +0300 Subject: [PATCH] 35% faster than `pathspec` \o/. #24 cpburnz/python-path-specification#38 --- gitignorefile/__init__.py | 35 ++++++++++++++++++++++------------- tests/test_match.py | 8 ++++++++ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/gitignorefile/__init__.py b/gitignorefile/__init__.py index 2fc08a5..df2e17b 100644 --- a/gitignorefile/__init__.py +++ b/gitignorefile/__init__.py @@ -198,11 +198,7 @@ def _rule_from_pattern(pattern): pattern = pattern[:i] i -= 1 - regexp = _fnmatch_pathname_to_regexp(pattern, directory_only) - - if anchored: - regexp = f"^{regexp}" - + regexp = _fnmatch_pathname_to_regexp(pattern, anchored, directory_only) return _IgnoreRule(regexp, negation, directory_only) @@ -244,19 +240,24 @@ def __init__(self, regexp, negation, directory_only): self.__regexp = re.compile(regexp) self.__negation = negation self.__directory_only = directory_only + self.__match = self.__regexp.match + + @property + def regexp(self): + return self.__regexp @property def negation(self): return self.__negation def match(self, rel_path, is_dir): - match = self.__regexp.search(rel_path) + m = self.__match(rel_path) # If we need a directory, check there is something after slash and if there is not, target must be a directory. # If there is something after slash then it's a directory irrelevant to type of target. # `self.directory_only` implies we have group number 1. # N.B. Question mark inside a group without a name can shift indices. :( - return match and (not self.__directory_only or match.group(1) is not None or is_dir) + return m and (not self.__directory_only or m.group(1) is not None or is_dir) if os.altsep is not None: @@ -269,14 +270,22 @@ def match(self, rel_path, is_dir): # Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME # option that `.gitignore`'s behavior depends on. -def _fnmatch_pathname_to_regexp(pattern, directory_only): +def _fnmatch_pathname_to_regexp(pattern, anchored, directory_only): """ Implements `fnmatch` style-behavior, as though with `FNM_PATHNAME` flagged; the path separator will not match shell-style `*` and `.` wildcards. """ + + if not pattern: + if directory_only: # Empty name means no path fragment. + return "[^/]+(/.+)?$" + + else: + return ".*" + i, n = 0, len(pattern) - res = ["(?:^|/)"] if pattern else [] # Empty name means no path fragment. + res = ["(?:^|.+/)" if not anchored else ""] while i < n: c = pattern[i] i += 1 @@ -290,10 +299,10 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only): res.append("/?") else: - res.append(f"[^/]*") + res.append("[^/]*") except IndexError: - res.append(f"[^/]*") + res.append("[^/]*") elif c == "?": res.append("[^/]") @@ -322,9 +331,9 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only): res.append(re.escape(c)) if directory_only: # In this case we are interested if there is something after slash. - res.append(f"(/.+)?$") + res.append("(/.+)?$") else: - res.append(f"(?:/|$)") + res.append("(?:/.+)?$") return "".join(res) diff --git a/tests/test_match.py b/tests/test_match.py index 78e8769..a4ccbb9 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -108,6 +108,14 @@ def test_second_level_directories_unchained(self): self.assertTrue(matches("/home/michael/a/doc/frotz", is_dir=True)) self.assertFalse(matches("/home/michael/a/b/doc/frotz", is_dir=False)) self.assertTrue(matches("/home/michael/a/b/doc/frotz", is_dir=True)) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/doc/frotz/file", is_dir=False)) + self.assertTrue(matches("/home/michael/doc/frotz/file", is_dir=True)) + self.assertTrue(matches("/home/michael/a/doc/frotz/file", is_dir=False)) + self.assertTrue(matches("/home/michael/a/doc/frotz/file", is_dir=True)) + self.assertTrue(matches("/home/michael/a/b/doc/frotz/file", is_dir=False)) + self.assertTrue(matches("/home/michael/a/b/doc/frotz/file", is_dir=True)) def test_second_level_files(self): matches = self.__parse_gitignore_string(["doc/frotz"], fake_base_dir="/home/michael")