diff --git a/gitignorefile/__init__.py b/gitignorefile/__init__.py index 31bf1fd..24722dc 100644 --- a/gitignorefile/__init__.py +++ b/gitignorefile/__init__.py @@ -29,35 +29,20 @@ def ignored(path, is_dir=None): class Cache: def __init__(self): - self.__gitignores = {} - - def __get_parents(self, path, is_dir): - if not is_dir: - path = os.path.dirname(path) - yield path - - while True: - new_path = os.path.dirname(path) - if not os.path.samefile(path, new_path): - yield new_path - path = new_path - else: - break + self.__gitignores = {tuple(): []} def __call__(self, path, is_dir=None): - if is_dir is None: - is_dir = os.path.isdir(path) - + path = _Path(path) add_to_children = {} plain_paths = [] - for parent in self.__get_parents(os.path.abspath(path), is_dir=is_dir): - if parent in self.__gitignores: + for parent in path.parents(): + if parent.parts in self.__gitignores: break - parent_gitignore = os.path.join(parent, ".gitignore") - if os.path.isfile(parent_gitignore): - p = parse(parent_gitignore, base_path=parent) - add_to_children[parent] = (p, plain_paths) + parent_gitignore = parent.join(".gitignore") + if parent_gitignore.isfile(): + matches = parse(str(parent_gitignore), base_path=parent) + add_to_children[parent] = (matches, plain_paths) plain_paths = [] else: @@ -65,25 +50,82 @@ def __call__(self, path, is_dir=None): else: for plain_path in plain_paths: - self.__gitignores[plain_path] = [] + # assert plain_path.parts not in self.__gitignores + self.__gitignores[plain_path.parts] = [] + + if add_to_children: + plain_paths.clear() - if not add_to_children: + else: return False for parent, (_, parent_plain_paths) in reversed(list(add_to_children.items())): - self.__gitignores[parent] = [] + # assert parent.parts not in self.__gitignores + self.__gitignores[parent.parts] = self.__gitignores[parent.parts[:-1]].copy() for parent_to_add, (gitignore_to_add, _) in reversed(list(add_to_children.items())): - self.__gitignores[parent].append(gitignore_to_add) + self.__gitignores[parent.parts].append(gitignore_to_add) if parent_to_add == parent: break - self.__gitignores[parent].reverse() + self.__gitignores[parent.parts].reverse() + for plain_path in parent_plain_paths: - self.__gitignores[plain_path] = self.__gitignores[parent] + # assert plain_path.parts not in self.__gitignores + self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts] + + # This parent comes either from first or second loop. + for plain_path in plain_paths: + # assert plain_path.parts not in self.__gitignores + self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts] + + return any((m(path, is_dir=is_dir) for m in self.__gitignores[parent.parts])) + + +class _Path: + def __init__(self, path): + if isinstance(path, str): + abs_path = os.path.abspath(path) + self.__parts = tuple(_path_split(abs_path)) + self.__joined = abs_path + self.__is_dir = None + + else: + self.__parts = path + self.__joined = None + self.__is_dir = None + + @property + def parts(self): + return self.__parts + + def join(self, name): + return _Path(self.__parts + (name,)) + + def relpath(self, base_path): + # assert self.__parts[: len(base_path.__parts)] == base_path.__parts + return "/".join(self.__parts[len(base_path.__parts) :]) - return any( - (m(path, is_dir=is_dir) for m in self.__gitignores[parent]) - ) # This parent comes either from first or second loop. + def parents(self): + for i in range(len(self.__parts) - 1, 0, -1): + yield _Path(self.__parts[:i]) + + def isfile(self): + if self.__joined is None: + self.__joined = "/".join(self.__parts) + return os.path.isfile(self.__joined) + + def isdir(self): + if self.__is_dir is not None: + return self.__is_dir + if self.__joined is None: + self.__joined = "/".join(self.__parts) + self.__is_dir = os.path.isdir(self.__joined) + return self.__is_dir + + def __str__(self): + if self.__joined is None: + self.__joined = "/".join(self.__parts) if self.__parts != ("",) else "/" + return self.__joined def _rule_from_pattern(pattern): @@ -160,11 +202,7 @@ def _rule_from_pattern(pattern): pattern = pattern[:i] i -= 1 - regexp = _fnmatch_pathname_to_regexp(pattern, directory_only) - - if anchored: - regexp = f"^{regexp}" - + regexp = _fnmatch_pathname_to_regexp(pattern, anchored, directory_only) return _IgnoreRule(regexp, negation, directory_only) @@ -172,20 +210,21 @@ class _IgnoreRules: def __init__(self, rules, base_path): self.__rules = rules self.__can_return_immediately = not any((r.negation for r in rules)) - self.__base_path = base_path + self.__base_path = _Path(base_path) if isinstance(base_path, str) else base_path def match(self, path, is_dir=None): """ Because Git allows for nested `.gitignore` files, a `base_path` value is required for correct behavior. """ - if is_dir is None: - is_dir = os.path.isdir(path) - rel_path = os.path.relpath(path, self.__base_path) + if isinstance(path, str): + path = _Path(path) + + if is_dir is None: + is_dir = path.isdir() # TODO Pass callable here. - if rel_path.startswith(f".{os.sep}"): - rel_path = rel_path[2:] + rel_path = path.relpath(self.__base_path) if self.__can_return_immediately: return any((r.match(rel_path, is_dir) for r in self.__rules)) @@ -205,44 +244,52 @@ def __init__(self, regexp, negation, directory_only): self.__regexp = re.compile(regexp) self.__negation = negation self.__directory_only = directory_only + self.__match = self.__regexp.match + + @property + def regexp(self): + return self.__regexp @property def negation(self): return self.__negation def match(self, rel_path, is_dir): - match = self.__regexp.search(rel_path) + m = self.__match(rel_path) # If we need a directory, check there is something after slash and if there is not, target must be a directory. # If there is something after slash then it's a directory irrelevant to type of target. # `self.directory_only` implies we have group number 1. # N.B. Question mark inside a group without a name can shift indices. :( - return match and (not self.__directory_only or match.group(1) is not None or is_dir) - + return m and (not self.__directory_only or m.group(1) is not None or is_dir) -def _seps_non_sep_expr(): - if os.altsep is None: - seps = re.escape(os.sep) - non_sep = f"[^{re.escape(os.sep)}]" - else: - seps = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]" - non_sep = f"[^{re.escape(os.sep)}{re.escape(os.altsep)}]" +if os.altsep is not None: + _all_seps_expr = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]" + _path_split = lambda path: re.split(_all_seps_expr, path) - return seps, non_sep +else: + _path_split = lambda path: path.split(os.sep) # Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME # option that `.gitignore`'s behavior depends on. -def _fnmatch_pathname_to_regexp(pattern, directory_only): +def _fnmatch_pathname_to_regexp(pattern, anchored, directory_only): """ - Implements fnmatch style-behavior, as though with FNM_PATHNAME flagged; - the path separator will not match shell-style '*' and '.' wildcards. + Implements `fnmatch` style-behavior, as though with `FNM_PATHNAME` flagged; + the path separator will not match shell-style `*` and `.` wildcards. """ + + if not pattern: + if directory_only: + return "[^/]+(/.+)?$" # Empty name means no path fragment. + + else: + return ".*" + i, n = 0, len(pattern) - seps_group, non_sep = _seps_non_sep_expr() - res = [f"(?:^|{seps_group})"] if pattern else [] # Empty name means no path fragment. + res = ["(?:^|.+/)" if not anchored else ""] while i < n: c = pattern[i] i += 1 @@ -253,19 +300,16 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only): res.append(".*") if pattern[i] == "/": i += 1 - res.append(f"{seps_group}?") + res.append("/?") else: - res.append(f"{non_sep}*") + res.append("[^/]*") except IndexError: - res.append(f"{non_sep}*") + res.append("[^/]*") elif c == "?": - res.append(non_sep) - - elif c == "/": - res.append(seps_group) + res.append("[^/]") elif c == "[": j = i @@ -291,9 +335,9 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only): res.append(re.escape(c)) if directory_only: # In this case we are interested if there is something after slash. - res.append(f"({seps_group}.+)?$") + res.append("(/.+)?$") else: - res.append(f"(?:{seps_group}|$)") + res.append("(?:/.+)?$") return "".join(res) diff --git a/tests/test_cache.py b/tests/test_cache.py index 53a061b..8d55edc 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,3 +1,4 @@ +import itertools import os import stat import tempfile @@ -18,6 +19,12 @@ def __init__(self, is_file=False): self.st_dev = 0 self.st_mode = stat.S_IFREG if is_file else stat.S_IFDIR + def isdir(self): + return self.st_mode == stat.S_IFDIR + + def isfile(self): + return self.st_mode == stat.S_IFREG + class Stat: def __init__(self, directories, files): self.__filesystem = {} @@ -43,16 +50,20 @@ def __call__(self, path): "/", ], [ + "/home/vladimir/project/directory/subdirectory/subdirectory/file.txt", + "/home/vladimir/project/directory/subdirectory/subdirectory/file2.txt", + "/home/vladimir/project/directory/subdirectory/subdirectory/file3.txt", "/home/vladimir/project/directory/subdirectory/file.txt", "/home/vladimir/project/directory/subdirectory/file2.txt", "/home/vladimir/project/directory/.gitignore", + "/home/vladimir/project/directory/file.txt", + "/home/vladimir/project/directory/file2.txt", "/home/vladimir/project/file.txt", "/home/vladimir/project/.gitignore", + "/home/vladimir/file.txt", ], ) - statistics = {"open": 0, "stat": 0} - def mock_open(path): data = { normalize_path("/home/vladimir/project/directory/.gitignore"): ["file.txt"], @@ -66,23 +77,45 @@ def mock_open(path): except KeyError: raise FileNotFoundError() - def mock_stat(path): - statistics["stat"] += 1 - return my_stat(path) - - with unittest.mock.patch("builtins.open", mock_open): - with unittest.mock.patch("os.stat", mock_stat): - matches = gitignorefile.Cache() - self.assertTrue(matches("/home/vladimir/project/directory/subdirectory/file.txt")) - self.assertTrue(matches("/home/vladimir/project/directory/subdirectory/file2.txt")) - self.assertTrue(matches("/home/vladimir/project/directory/file.txt")) - self.assertTrue(matches("/home/vladimir/project/directory/file2.txt")) - self.assertFalse(matches("/home/vladimir/project/file.txt")) - - self.assertEqual(statistics["open"], 2) + def mock_isdir(path): + statistics["isdir"] += 1 + try: + return my_stat(path).isdir() + except FileNotFoundError: + return False - # On Windows and Python 3.7 `os.path.isdir()` does not use `os.stat`. See `Modules/getpath.c`. - self.assertIn(statistics["stat"], (6 * (2 + 1) + 5, 6 * (2 + 1))) + def mock_isfile(path): + statistics["isfile"] += 1 + try: + return my_stat(path).isfile() + except FileNotFoundError: + return False + + data = { + "/home/vladimir/project/directory/subdirectory/file.txt": True, + "/home/vladimir/project/directory/subdirectory/file2.txt": True, + "/home/vladimir/project/directory/subdirectory/subdirectory/file.txt": True, + "/home/vladimir/project/directory/subdirectory/subdirectory/file2.txt": True, + "/home/vladimir/project/directory/subdirectory/subdirectory/file3.txt": False, + "/home/vladimir/project/directory/file.txt": True, + "/home/vladimir/project/directory/file2.txt": True, + "/home/vladimir/project/file.txt": False, + "/home/vladimir/file.txt": False, # No rules and no `isdir` calls for this file. + } + + for permutation in itertools.islice(itertools.permutations(data.items()), 0, None, 100): + statistics = {"open": 0, "isdir": 0, "isfile": 0} + + with unittest.mock.patch("builtins.open", mock_open): + with unittest.mock.patch("os.path.isdir", mock_isdir): + with unittest.mock.patch("os.path.isfile", mock_isfile): + matches = gitignorefile.Cache() + for path, expected in permutation: + self.assertEqual(matches(path), expected) + + self.assertEqual(statistics["open"], 2) + self.assertEqual(statistics["isdir"], len(data) - 1) + self.assertEqual(statistics["isfile"], 7) # Unique path fragments. def test_wrong_symlink(self): with tempfile.TemporaryDirectory() as d: diff --git a/tests/test_match.py b/tests/test_match.py index 78e8769..0c6626f 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -108,6 +108,14 @@ def test_second_level_directories_unchained(self): self.assertTrue(matches("/home/michael/a/doc/frotz", is_dir=True)) self.assertFalse(matches("/home/michael/a/b/doc/frotz", is_dir=False)) self.assertTrue(matches("/home/michael/a/b/doc/frotz", is_dir=True)) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/doc/frotz/file", is_dir=False)) + self.assertTrue(matches("/home/michael/doc/frotz/file", is_dir=True)) + self.assertTrue(matches("/home/michael/a/doc/frotz/file", is_dir=False)) + self.assertTrue(matches("/home/michael/a/doc/frotz/file", is_dir=True)) + self.assertTrue(matches("/home/michael/a/b/doc/frotz/file", is_dir=False)) + self.assertTrue(matches("/home/michael/a/b/doc/frotz/file", is_dir=True)) def test_second_level_files(self): matches = self.__parse_gitignore_string(["doc/frotz"], fake_base_dir="/home/michael") @@ -124,6 +132,16 @@ def test_ignore_file(self): self.assertTrue(matches("/home/michael/.venv/folder", is_dir=is_dir)) self.assertTrue(matches("/home/michael/.venv/file.txt", is_dir=is_dir)) + def test_ignore_core_file(self): + matches = self.__parse_gitignore_string(["core", "!core/"], fake_base_dir="/home/michael") + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/core/a", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/core", is_dir=False)) + self.assertFalse(matches("/home/michael/core", is_dir=True)) + self.assertTrue(matches("/home/michael/a/core", is_dir=False)) + self.assertFalse(matches("/home/michael/a/core", is_dir=True)) + def test_ignore_directory(self): matches = self.__parse_gitignore_string([".venv/"], fake_base_dir="/home/michael") for is_dir in (False, True):