Skip to content

Commit

Permalink
Custom pathlib, 18% faster than pathspec. #24 cpburnz/python-path…
Browse files Browse the repository at this point in the history
  • Loading branch information
excitoon committed Aug 28, 2022
1 parent 241a54b commit 14f1214
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 42 deletions.
106 changes: 67 additions & 39 deletions gitignorefile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,17 @@ class Cache:
def __init__(self):
self.__gitignores = {}

def __get_parents(self, path, is_dir):
if not is_dir:
path = os.path.dirname(path)
yield path

while True:
new_path = os.path.dirname(path)
if not os.path.samefile(path, new_path):
yield new_path
path = new_path
else:
break

def __call__(self, path, is_dir=None):
if is_dir is None:
is_dir = os.path.isdir(path)

path = _Path(path)
add_to_children = {}
plain_paths = []
for parent in self.__get_parents(os.path.abspath(path), is_dir=is_dir):
if parent in self.__gitignores:
for parent in path.parents():
if parent.parts in self.__gitignores:
break

parent_gitignore = os.path.join(parent, ".gitignore")
if os.path.isfile(parent_gitignore):
p = parse(parent_gitignore, base_path=parent)
parent_gitignore = parent.join(".gitignore")
if parent_gitignore.isfile():
p = parse(str(parent_gitignore), base_path=parent)
add_to_children[parent] = (p, plain_paths)
plain_paths = []

Expand All @@ -71,21 +56,68 @@ def __call__(self, path, is_dir=None):
return False

for parent, (_, parent_plain_paths) in reversed(list(add_to_children.items())):
self.__gitignores[parent] = []
self.__gitignores[parent.parts] = []
for parent_to_add, (gitignore_to_add, _) in reversed(list(add_to_children.items())):
self.__gitignores[parent].append(gitignore_to_add)
self.__gitignores[parent.parts].append(gitignore_to_add)
if parent_to_add == parent:
break

self.__gitignores[parent].reverse()
self.__gitignores[parent.parts].reverse()
for plain_path in parent_plain_paths:
self.__gitignores[plain_path] = self.__gitignores[parent]
self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts]

return any(
(m(path, is_dir=is_dir) for m in self.__gitignores[parent])
(m(path, is_dir=is_dir) for m in self.__gitignores[parent.parts])
) # This parent comes either from first or second loop.


class _Path:
def __init__(self, path):
if isinstance(path, str):
abs_path = os.path.abspath(path)
self.__parts = tuple(abs_path.split(os.sep))
self.__joined = abs_path
self.__is_dir = None

else:
self.__parts = path
self.__joined = None
self.__is_dir = None

@property
def parts(self):
return self.__parts

def join(self, name):
return _Path(self.__parts + (name,))

def relpath(self, base_path):
assert self.__parts[: len(base_path.__parts)] == base_path.__parts
return "/".join(self.__parts[len(base_path.__parts) :])

def parents(self):
for i in range(len(self.__parts) - 1, 0, -1):
yield _Path(self.__parts[:i])

def isfile(self):
if self.__joined is None:
self.__joined = "/".join(self.__parts)
return os.path.isfile(self.__joined)

def isdir(self):
if self.__is_dir is not None:
return self.__is_dir
if self.__joined is None:
self.__joined = "/".join(self.__parts)
self.__is_dir = os.path.isdir(self.__joined)
return self.__is_dir

def __str__(self):
if self.__joined is None:
self.__joined = "/".join(self.__parts)
return self.__joined


def _rule_from_pattern(pattern):
"""
Take a `.gitignore` match pattern, such as "*.py[cod]" or "**/*.bak",
Expand Down Expand Up @@ -172,20 +204,21 @@ class _IgnoreRules:
def __init__(self, rules, base_path):
self.__rules = rules
self.__can_return_immediately = not any((r.negation for r in rules))
self.__base_path = base_path
self.__base_path = _Path(base_path) if isinstance(base_path, str) else base_path

def match(self, path, is_dir=None):
"""
Because Git allows for nested `.gitignore` files, a `base_path` value
is required for correct behavior.
"""
if is_dir is None:
is_dir = os.path.isdir(path)

rel_path = os.path.relpath(path, self.__base_path)
if isinstance(path, str):
path = _Path(path)

if is_dir is None:
is_dir = path.isdir()

if rel_path.startswith(f".{os.sep}"):
rel_path = rel_path[2:]
rel_path = path.relpath(self.__base_path)

if self.__can_return_immediately:
return any((r.match(rel_path, is_dir) for r in self.__rules))
Expand Down Expand Up @@ -221,13 +254,8 @@ def match(self, rel_path, is_dir):


def _seps_non_sep_expr():
if os.altsep is None:
seps = re.escape(os.sep)
non_sep = f"[^{re.escape(os.sep)}]"

else:
seps = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]"
non_sep = f"[^{re.escape(os.sep)}{re.escape(os.altsep)}]"
seps = re.escape(os.sep)
non_sep = f"[^{re.escape(os.sep)}]"

return seps, non_sep

Expand Down
4 changes: 1 addition & 3 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ def mock_stat(path):
self.assertFalse(matches("/home/vladimir/project/file.txt"))

self.assertEqual(statistics["open"], 2)

# On Windows and Python 3.7 `os.path.isdir()` does not use `os.stat`. See `Modules/getpath.c`.
self.assertIn(statistics["stat"], (6 * (2 + 1) + 5, 6 * (2 + 1)))
self.assertEqual(statistics["stat"], 6 + 5)

def test_wrong_symlink(self):
with tempfile.TemporaryDirectory() as d:
Expand Down

0 comments on commit 14f1214

Please sign in to comment.