Skip to content

Commit

Permalink
Correct and fast Cache.
Browse files Browse the repository at this point in the history
  • Loading branch information
excitoon committed Aug 28, 2022
1 parent 241a54b commit cfa6678
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 78 deletions.
151 changes: 91 additions & 60 deletions gitignorefile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,61 +29,99 @@ def ignored(path, is_dir=None):

class Cache:
def __init__(self):
self.__gitignores = {}

def __get_parents(self, path, is_dir):
if not is_dir:
path = os.path.dirname(path)
yield path

while True:
new_path = os.path.dirname(path)
if not os.path.samefile(path, new_path):
yield new_path
path = new_path
else:
break
self.__gitignores = {tuple(): []}

def __call__(self, path, is_dir=None):
if is_dir is None:
is_dir = os.path.isdir(path)

path = _Path(path)
add_to_children = {}
plain_paths = []
for parent in self.__get_parents(os.path.abspath(path), is_dir=is_dir):
if parent in self.__gitignores:
for parent in path.parents():
if parent.parts in self.__gitignores:
break

parent_gitignore = os.path.join(parent, ".gitignore")
if os.path.isfile(parent_gitignore):
p = parse(parent_gitignore, base_path=parent)
add_to_children[parent] = (p, plain_paths)
parent_gitignore = parent.join(".gitignore")
if parent_gitignore.isfile():
matches = parse(str(parent_gitignore), base_path=parent)
add_to_children[parent] = (matches, plain_paths)
plain_paths = []

else:
plain_paths.append(parent)

else:
for plain_path in plain_paths:
self.__gitignores[plain_path] = []
self.__gitignores[plain_path.parts] = []

if not add_to_children:
if add_to_children:
plain_paths.clear()

else:
return False

for parent, (_, parent_plain_paths) in reversed(list(add_to_children.items())):
self.__gitignores[parent] = []
self.__gitignores[parent.parts] = self.__gitignores[parent.parts[:-1]].copy()
for parent_to_add, (gitignore_to_add, _) in reversed(list(add_to_children.items())):
self.__gitignores[parent].append(gitignore_to_add)
self.__gitignores[parent.parts].append(gitignore_to_add)
if parent_to_add == parent:
break

self.__gitignores[parent].reverse()
self.__gitignores[parent.parts].reverse()

for plain_path in parent_plain_paths:
self.__gitignores[plain_path] = self.__gitignores[parent]
self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts]

# This parent comes either from first or second loop.
for plain_path in plain_paths:
self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts]

return any((m(path, is_dir=is_dir) for m in self.__gitignores[parent.parts]))


class _Path:
def __init__(self, path):
if isinstance(path, str):
abs_path = os.path.abspath(path)
self.__parts = tuple(_path_split(abs_path))
self.__joined = abs_path
self.__is_dir = None

else:
self.__parts = path
self.__joined = None
self.__is_dir = None

return any(
(m(path, is_dir=is_dir) for m in self.__gitignores[parent])
) # This parent comes either from first or second loop.
@property
def parts(self):
return self.__parts

def join(self, name):
return _Path(self.__parts + (name,))

def relpath(self, base_path):
assert self.__parts[: len(base_path.__parts)] == base_path.__parts
return "/".join(self.__parts[len(base_path.__parts) :])

def parents(self):
for i in range(len(self.__parts) - 1, 0, -1):
yield _Path(self.__parts[:i])

def isfile(self):
if self.__joined is None:
self.__joined = "/".join(self.__parts)
return os.path.isfile(self.__joined)

def isdir(self):
if self.__is_dir is not None:
return self.__is_dir
if self.__joined is None:
self.__joined = "/".join(self.__parts)
self.__is_dir = os.path.isdir(self.__joined)
return self.__is_dir

def __str__(self):
if self.__joined is None:
self.__joined = "/".join(self.__parts) if self.__parts != ("",) else "/"
return self.__joined


def _rule_from_pattern(pattern):
Expand Down Expand Up @@ -172,20 +210,21 @@ class _IgnoreRules:
def __init__(self, rules, base_path):
self.__rules = rules
self.__can_return_immediately = not any((r.negation for r in rules))
self.__base_path = base_path
self.__base_path = _Path(base_path) if isinstance(base_path, str) else base_path

def match(self, path, is_dir=None):
"""
Because Git allows for nested `.gitignore` files, a `base_path` value
is required for correct behavior.
"""
if is_dir is None:
is_dir = os.path.isdir(path)

rel_path = os.path.relpath(path, self.__base_path)
if isinstance(path, str):
path = _Path(path)

if is_dir is None:
is_dir = path.isdir() # TODO Pass callable here.

if rel_path.startswith(f".{os.sep}"):
rel_path = rel_path[2:]
rel_path = path.relpath(self.__base_path)

if self.__can_return_immediately:
return any((r.match(rel_path, is_dir) for r in self.__rules))
Expand Down Expand Up @@ -220,29 +259,24 @@ def match(self, rel_path, is_dir):
return match and (not self.__directory_only or match.group(1) is not None or is_dir)


def _seps_non_sep_expr():
if os.altsep is None:
seps = re.escape(os.sep)
non_sep = f"[^{re.escape(os.sep)}]"
if os.altsep is not None:
_all_seps_expr = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]"
_path_split = lambda path: re.split(_all_seps_expr, path)

else:
seps = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]"
non_sep = f"[^{re.escape(os.sep)}{re.escape(os.altsep)}]"

return seps, non_sep
else:
_path_split = lambda path: path.split(os.sep)


# Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME
# option that `.gitignore`'s behavior depends on.
def _fnmatch_pathname_to_regexp(pattern, directory_only):
"""
Implements fnmatch style-behavior, as though with FNM_PATHNAME flagged;
the path separator will not match shell-style '*' and '.' wildcards.
Implements `fnmatch` style-behavior, as though with `FNM_PATHNAME` flagged;
the path separator will not match shell-style `*` and `.` wildcards.
"""
i, n = 0, len(pattern)

seps_group, non_sep = _seps_non_sep_expr()
res = [f"(?:^|{seps_group})"] if pattern else [] # Empty name means no path fragment.
res = ["(?:^|/)"] if pattern else [] # Empty name means no path fragment.
while i < n:
c = pattern[i]
i += 1
Expand All @@ -253,19 +287,16 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only):
res.append(".*")
if pattern[i] == "/":
i += 1
res.append(f"{seps_group}?")
res.append("/?")

else:
res.append(f"{non_sep}*")
res.append(f"[^/]*")

except IndexError:
res.append(f"{non_sep}*")
res.append(f"[^/]*")

elif c == "?":
res.append(non_sep)

elif c == "/":
res.append(seps_group)
res.append("[^/]")

elif c == "[":
j = i
Expand All @@ -291,9 +322,9 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only):
res.append(re.escape(c))

if directory_only: # In this case we are interested if there is something after slash.
res.append(f"({seps_group}.+)?$")
res.append(f"(/.+)?$")

else:
res.append(f"(?:{seps_group}|$)")
res.append(f"(?:/|$)")

return "".join(res)
69 changes: 51 additions & 18 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
import os
import stat
import tempfile
Expand All @@ -18,6 +19,12 @@ def __init__(self, is_file=False):
self.st_dev = 0
self.st_mode = stat.S_IFREG if is_file else stat.S_IFDIR

def isdir(self):
return self.st_mode == stat.S_IFDIR

def isfile(self):
return self.st_mode == stat.S_IFREG

class Stat:
def __init__(self, directories, files):
self.__filesystem = {}
Expand All @@ -43,16 +50,20 @@ def __call__(self, path):
"/",
],
[
"/home/vladimir/project/directory/subdirectory/subdirectory/file.txt",
"/home/vladimir/project/directory/subdirectory/subdirectory/file2.txt",
"/home/vladimir/project/directory/subdirectory/subdirectory/file3.txt",
"/home/vladimir/project/directory/subdirectory/file.txt",
"/home/vladimir/project/directory/subdirectory/file2.txt",
"/home/vladimir/project/directory/.gitignore",
"/home/vladimir/project/directory/file.txt",
"/home/vladimir/project/directory/file2.txt",
"/home/vladimir/project/file.txt",
"/home/vladimir/project/.gitignore",
"/home/vladimir/file.txt",
],
)

statistics = {"open": 0, "stat": 0}

def mock_open(path):
data = {
normalize_path("/home/vladimir/project/directory/.gitignore"): ["file.txt"],
Expand All @@ -66,23 +77,45 @@ def mock_open(path):
except KeyError:
raise FileNotFoundError()

def mock_stat(path):
statistics["stat"] += 1
return my_stat(path)

with unittest.mock.patch("builtins.open", mock_open):
with unittest.mock.patch("os.stat", mock_stat):
matches = gitignorefile.Cache()
self.assertTrue(matches("/home/vladimir/project/directory/subdirectory/file.txt"))
self.assertTrue(matches("/home/vladimir/project/directory/subdirectory/file2.txt"))
self.assertTrue(matches("/home/vladimir/project/directory/file.txt"))
self.assertTrue(matches("/home/vladimir/project/directory/file2.txt"))
self.assertFalse(matches("/home/vladimir/project/file.txt"))

self.assertEqual(statistics["open"], 2)
def mock_isdir(path):
statistics["isdir"] += 1
try:
return my_stat(path).isdir()
except FileNotFoundError:
return False

# On Windows and Python 3.7 `os.path.isdir()` does not use `os.stat`. See `Modules/getpath.c`.
self.assertIn(statistics["stat"], (6 * (2 + 1) + 5, 6 * (2 + 1)))
def mock_isfile(path):
statistics["isfile"] += 1
try:
return my_stat(path).isfile()
except FileNotFoundError:
return False

data = {
"/home/vladimir/project/directory/subdirectory/file.txt": True,
"/home/vladimir/project/directory/subdirectory/file2.txt": True,
"/home/vladimir/project/directory/subdirectory/subdirectory/file.txt": True,
"/home/vladimir/project/directory/subdirectory/subdirectory/file2.txt": True,
"/home/vladimir/project/directory/subdirectory/subdirectory/file3.txt": False,
"/home/vladimir/project/directory/file.txt": True,
"/home/vladimir/project/directory/file2.txt": True,
"/home/vladimir/project/file.txt": False,
"/home/vladimir/file.txt": False, # No rules and no `isdir` calls for this file.
}

for permutation in itertools.islice(itertools.permutations(data.items()), 0, None, 100):
statistics = {"open": 0, "isdir": 0, "isfile": 0}

with unittest.mock.patch("builtins.open", mock_open):
with unittest.mock.patch("os.path.isdir", mock_isdir):
with unittest.mock.patch("os.path.isfile", mock_isfile):
matches = gitignorefile.Cache()
for path, expected in permutation:
self.assertEqual(matches(path), expected)

self.assertEqual(statistics["open"], 2)
self.assertEqual(statistics["isdir"], len(data) - 1)
self.assertEqual(statistics["isfile"], 7) # Unique path fragments.

def test_wrong_symlink(self):
with tempfile.TemporaryDirectory() as d:
Expand Down

0 comments on commit cfa6678

Please sign in to comment.