From 92a9066197f26f95a0438a70b2c8f0373c37125c Mon Sep 17 00:00:00 2001 From: cpburnz <2126043+cpburnz@users.noreply.github.com> Date: Sat, 9 Dec 2023 15:25:09 -0500 Subject: [PATCH] Improve debugging --- CHANGES.rst | 18 ++- DEV.md | 2 +- pathspec/_meta.py | 2 +- pathspec/gitignore.py | 75 ++++++++---- pathspec/pathspec.py | 158 ++++++++++++++++++------ pathspec/pattern.py | 111 +++++++++-------- pathspec/util.py | 120 +++++++++++++++---- tests/test_04_gitignore.py | 240 ++++++++++++++++++++++++------------- tests/util.py | 69 ++++++++++- 9 files changed, 570 insertions(+), 225 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 16cf8d8..9afe2d2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,13 +2,29 @@ Change History ============== -0.11.3 (TDB) +0.12.0 (TDB) ------------ +API changes: + +- Signature of protected method `pathspec.pathspec.PathSpec._match_file()` has been changed from `def _match_file(patterns: Iterable[Pattern], file: str) -> bool` to `def _match_file(patterns: Iterable[Tuple[int, Pattern]], file: str) -> Tuple[Optional[bool], Optional[int]]`. + +New features: + +- Added `pathspec.pathspec.PathSpec.check_*()` methods. These methods behave similarly to `.match_*()` but return additional information in the `pathspec.util.CheckResult` objects (e.g., `CheckResult.index` indicates the index of the last pattern that matched the file). +- Added `pathspec.pattern.RegexPattern.pattern` attribute which stores the original, uncompiled pattern. + + Bug fixes: - `Pull #83`_: Fix ReadTheDocs builds. +Improvements: + +- Improve test debugging. +- Improve type hint on *on_error* parameter on `pathspec.pathspec.PathSpec.match_tree_entries()`. +- Improve type hint on *on_error* parameter on `pathspec.util.iter_tree_entries()`. + .. _`Pull #83`: https://github.com/cpburnz/python-pathspec/pull/83 diff --git a/DEV.md b/DEV.md index ee4d876..365a37f 100644 --- a/DEV.md +++ b/DEV.md @@ -96,7 +96,7 @@ Review the following Linux distributions. - [Releases](https://wiki.ubuntu.com/Releases) - Package: [python3](https://packages.ubuntu.com/focal/python3) (focal) - Package: [python3](https://packages.ubuntu.com/jammy/python3) (jammy) - - Package: [python3-pathspec](https://packages.ubuntu.com/focal/python3-pathspec) (flocal) + - Package: [python3-pathspec](https://packages.ubuntu.com/focal/python3-pathspec) (focal) - Package: [python3-pathspec](https://packages.ubuntu.com/jammy/python3-pathspec) (jammy) diff --git a/pathspec/_meta.py b/pathspec/_meta.py index ab5405a..193fa9d 100644 --- a/pathspec/_meta.py +++ b/pathspec/_meta.py @@ -55,4 +55,4 @@ "kurtmckee ", ] __license__ = "MPL 2.0" -__version__ = "0.11.3.dev1" +__version__ = "0.12.0.dev1" diff --git a/pathspec/gitignore.py b/pathspec/gitignore.py index e5f7d48..d5eac8e 100644 --- a/pathspec/gitignore.py +++ b/pathspec/gitignore.py @@ -5,12 +5,15 @@ from typing import ( AnyStr, - Callable, - Collection, - Iterable, - Type, + Callable, # Replaced by `collections.abc.Callable` in 3.9. + Iterable, # Replaced by `collections.abc.Iterable` in 3.9. + Optional, # Replaced by `X | None` in 3.10. + Tuple, # Replaced by `tuple` in 3.9. + Type, # Replaced by `type` in 3.9. TypeVar, - Union) + Union, # Replaced by `X | Y` in 3.10. + cast, + overload) from .pathspec import ( PathSpec) @@ -48,6 +51,25 @@ def __eq__(self, other: object) -> bool: else: return NotImplemented + # Support reversed order of arguments from PathSpec. + @overload + @classmethod + def from_lines( + cls: Type[Self], + pattern_factory: Union[str, Callable[[AnyStr], Pattern]], + lines: Iterable[AnyStr], + ) -> Self: + ... + + @overload + @classmethod + def from_lines( + cls: Type[Self], + lines: Iterable[AnyStr], + pattern_factory: Union[str, Callable[[AnyStr], Pattern], None] = None, + ) -> Self: + ... + @classmethod def from_lines( cls: Type[Self], @@ -74,36 +96,40 @@ def from_lines( if pattern_factory is None: pattern_factory = GitWildMatchPattern - elif (isinstance(lines, str) or callable(lines)) and _is_iterable(pattern_factory): + elif (isinstance(lines, (str, bytes)) or callable(lines)) and _is_iterable(pattern_factory): # Support reversed order of arguments from PathSpec. pattern_factory, lines = lines, pattern_factory self = super().from_lines(pattern_factory, lines) - return self # type: ignore + return cast(Self, self) @staticmethod def _match_file( - patterns: Collection[GitWildMatchPattern], + patterns: Iterable[Tuple[int, GitWildMatchPattern]], file: str, - ) -> bool: + ) -> Tuple[Optional[bool], Optional[int]]: """ - Matches the file to the patterns. + Check the file against the patterns. - .. NOTE:: Subclasses of :class:`.PathSpec` may override this - method as an instance method. It does not have to be a static - method. + .. NOTE:: Subclasses of :class:`~pathspec.pathspec.PathSpec` may override + this method as an instance method. It does not have to be a static + method. The signature for this method is subject to change. - *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`) - contains the patterns to use. + *patterns* (:class:`~collections.abc.Iterable`) yields each indexed pattern + (:class:`tuple`) which contains the pattern index (:class:`int`) and actual + pattern (:class:`~pathspec.pattern.Pattern`). - *file* (:class:`str`) is the normalized file path to be matched - against *patterns*. + *file* (:class:`str`) is the normalized file path to be matched against + *patterns*. - Returns :data:`True` if *file* matched; otherwise, :data:`False`. + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). """ - out_matched = False + out_include: Optional[bool] = None + out_index: Optional[int] = None out_priority = 0 - for pattern in patterns: + for index, pattern in patterns: if pattern.include is not None: match = pattern.match_file(file) if match is not None: @@ -112,6 +138,9 @@ def _match_file( # Check for directory marker. dir_mark = match.match.groupdict().get(_DIR_MARK) + # TODO: A exclude (whitelist) dir pattern here needs to deprioritize + # for 81-c. + if dir_mark: # Pattern matched by a directory pattern. priority = 1 @@ -120,10 +149,10 @@ def _match_file( priority = 2 if pattern.include and dir_mark: - out_matched = pattern.include + out_include = pattern.include out_priority = priority elif priority >= out_priority: - out_matched = pattern.include + out_include = pattern.include out_priority = priority - return out_matched + return out_include, out_index diff --git a/pathspec/pathspec.py b/pathspec/pathspec.py index 9153baa..ebffede 100644 --- a/pathspec/pathspec.py +++ b/pathspec/pathspec.py @@ -8,24 +8,25 @@ zip_longest) from typing import ( AnyStr, - Callable, - Collection, - Iterable, - Iterator, - Optional, - Type, + Callable, # Replaced by `collections.abc.Callable` in 3.9. + Collection, # Replaced by `collections.abc.Collection` in 3.9. + Iterable, # Replaced by `collections.abc.Iterable` in 3.9. + Iterator, # Replaced by `collections.abc.Iterator` in 3.9. + Optional, # Replaced by `X | None` in 3.10. + Type, # Replaced by `type` in 3.9. TypeVar, - Union) + Union) # Replaced by `X | Y` in 3.10. from . import util from .pattern import ( Pattern) from .util import ( + CheckResult, StrPath, + TStrPath, TreeEntry, - _filter_patterns, + _filter_check_patterns, _is_iterable, - match_file, normalize_file) Self = TypeVar("Self", bound="PathSpec") @@ -48,8 +49,10 @@ def __init__(self, patterns: Iterable[Pattern]) -> None: *patterns* (:class:`~collections.abc.Collection` or :class:`~collections.abc.Iterable`) yields each compiled pattern (:class:`.Pattern`). """ + if not isinstance(patterns, CollectionType): + patterns = list(patterns) - self.patterns = patterns if isinstance(patterns, CollectionType) else list(patterns) + self.patterns: Collection[Pattern] = patterns """ *patterns* (:class:`~collections.abc.Collection` of :class:`.Pattern`) contains the compiled patterns. @@ -94,6 +97,88 @@ def __iadd__(self: Self, other: "PathSpec") -> Self: else: return NotImplemented + def check_file( + self, + file: TStrPath, + separators: Optional[Collection[str]] = None, + ) -> CheckResult[TStrPath]: + """ + Check the files against this path-spec. + + *file* (:class:`str` or :class:`os.PathLike`) is the file path to be + matched against :attr:`self.patterns `. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. See + :func:`~pathspec.util.normalize_file` for more information. + + Returns the file check result (:class:`CheckResult`). + """ + norm_file = normalize_file(file, separators) + include, index = self._match_file(enumerate(self.patterns), norm_file) + return CheckResult(file, include, index) + + def check_files( + self, + files: Iterable[TStrPath], + separators: Optional[Collection[str]] = None, + ) -> Iterator[CheckResult[TStrPath]]: + """ + Check the files against this path-spec. + + *files* (:class:`~collections.abc.Iterable` of :class:`str` or + :class:`os.PathLike`) contains the file paths to be checked against + :attr:`self.patterns `. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. See + :func:`~pathspec.util.normalize_file` for more information. + + Returns an :class:`~collections.abc.Iterator` yielding each file check + result (:class:`CheckResult`). + """ + if not _is_iterable(files): + raise TypeError(f"files:{files!r} is not an iterable.") + + use_patterns = _filter_check_patterns(self.patterns) + for orig_file in files: + norm_file = normalize_file(orig_file, separators) + include, index = self._match_file(use_patterns, norm_file) + yield CheckResult(orig_file, include, index) + + def check_tree_files( + self, + root: StrPath, + on_error: Optional[Callable[[OSError], None]] = None, + follow_links: Optional[bool] = None, + ) -> Iterator[CheckResult[str]]: + """ + Walks the specified root path for all files and checks them against this + path-spec. + + *root* (:class:`str` or :class:`os.PathLike`) is the root directory to + search for files. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally + is the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. + + *negate* (:class:`bool` or :data:`None`) is whether to negate the match + results of the patterns. If :data:`True`, a pattern matching a file will + exclude the file rather than include it. Default is :data:`None` for + :data:`False`. + + Returns an :class:`~collections.abc.Iterator` yielding each file check + result (:class:`CheckResult`). + """ + files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links) + yield from self.check_files(files) + @classmethod def from_lines( cls: Type[Self], @@ -155,21 +240,23 @@ def match_entries( if not _is_iterable(entries): raise TypeError(f"entries:{entries!r} is not an iterable.") - use_patterns = _filter_patterns(self.patterns) + use_patterns = _filter_check_patterns(self.patterns) for entry in entries: norm_file = normalize_file(entry.path, separators) - is_match = self._match_file(use_patterns, norm_file) + include, _index = self._match_file(use_patterns, norm_file) if negate: - is_match = not is_match + include = not include - if is_match: + if include: yield entry - # Match files using the `match_file()` utility function. Subclasses may - # override this method as an instance method. It does not have to be a static - # method. - _match_file = staticmethod(match_file) + _match_file = staticmethod(util.check_match_file) + """ + Match files using the `check_match_file()` utility function. Subclasses may + override this method as an instance method. It does not have to be a static + method. The signature for this method is subject to change. + """ def match_file( self, @@ -188,8 +275,9 @@ def match_file( Returns :data:`True` if *file* matched; otherwise, :data:`False`. """ - norm_file = util.normalize_file(file, separators=separators) - return self._match_file(self.patterns, norm_file) + norm_file = normalize_file(file, separators) + include, _index = self._match_file(enumerate(self.patterns), norm_file) + return include def match_files( self, @@ -220,21 +308,21 @@ def match_files( if not _is_iterable(files): raise TypeError(f"files:{files!r} is not an iterable.") - use_patterns = _filter_patterns(self.patterns) + use_patterns = _filter_check_patterns(self.patterns) for orig_file in files: norm_file = normalize_file(orig_file, separators) - is_match = self._match_file(use_patterns, norm_file) + include, _index = self._match_file(use_patterns, norm_file) if negate: - is_match = not is_match + include = not include - if is_match: + if include: yield orig_file def match_tree_entries( self, root: StrPath, - on_error: Optional[Callable] = None, + on_error: Optional[Callable[[OSError], None]] = None, follow_links: Optional[bool] = None, *, negate: Optional[bool] = None, @@ -247,12 +335,13 @@ def match_tree_entries( search. *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally - is the error handler for file-system exceptions. See - :func:`~pathspec.util.iter_tree_entries` for more information. + is the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk - symbolic links that resolve to directories. See - :func:`~pathspec.util.iter_tree_files` for more information. + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. *negate* (:class:`bool` or :data:`None`) is whether to negate the match results of the patterns. If :data:`True`, a pattern matching a file will @@ -268,7 +357,7 @@ def match_tree_entries( def match_tree_files( self, root: StrPath, - on_error: Optional[Callable] = None, + on_error: Optional[Callable[[OSError], None]] = None, follow_links: Optional[bool] = None, *, negate: Optional[bool] = None, @@ -281,12 +370,13 @@ def match_tree_files( search for files. *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally - is the error handler for file-system exceptions. See - :func:`~pathspec.util.iter_tree_files` for more information. + is the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk - symbolic links that resolve to directories. See - :func:`~pathspec.util.iter_tree_files` for more information. + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. *negate* (:class:`bool` or :data:`None`) is whether to negate the match results of the patterns. If :data:`True`, a pattern matching a file will diff --git a/pathspec/pattern.py b/pathspec/pattern.py index 8f20e73..d081557 100644 --- a/pathspec/pattern.py +++ b/pathspec/pattern.py @@ -8,13 +8,13 @@ from typing import ( Any, AnyStr, - Iterable, - Iterator, - Match as MatchHint, - Optional, - Pattern as PatternHint, - Tuple, - Union) + Iterable, # Replaced by `collections.abc.Iterable` in 3.9. + Iterator, # Replaced by `collections.abc.Iterator` in 3.9. + Match as MatchHint, # Replaced by `re.Match` in 3.9. + Optional, # Replaced by `X | None` in 3.10. + Pattern as PatternHint, # Replaced by `re.Pattern` in 3.9. + Tuple, # Replaced by `tuple` in 3.9. + Union) # Replaced by `X | Y` in 3.10. class Pattern(object): @@ -23,44 +23,45 @@ class Pattern(object): """ # Make the class dict-less. - __slots__ = ('include',) + __slots__ = ( + 'include', + ) def __init__(self, include: Optional[bool]) -> None: """ Initializes the :class:`Pattern` instance. - *include* (:class:`bool` or :data:`None`) is whether the matched - files should be included (:data:`True`), excluded (:data:`False`), - or is a null-operation (:data:`None`). + *include* (:class:`bool` or :data:`None`) is whether the matched files + should be included (:data:`True`), excluded (:data:`False`), or is a + null-operation (:data:`None`). """ self.include = include """ - *include* (:class:`bool` or :data:`None`) is whether the matched - files should be included (:data:`True`), excluded (:data:`False`), - or is a null-operation (:data:`None`). + *include* (:class:`bool` or :data:`None`) is whether the matched files + should be included (:data:`True`), excluded (:data:`False`), or is a + null-operation (:data:`None`). """ def match(self, files: Iterable[str]) -> Iterator[str]: """ DEPRECATED: This method is no longer used and has been replaced by - :meth:`.match_file`. Use the :meth:`.match_file` method with a loop - for similar results. + :meth:`.match_file`. Use the :meth:`.match_file` method with a loop for + similar results. Matches this pattern against the specified files. - *files* (:class:`~collections.abc.Iterable` of :class:`str`) - contains each file relative to the root directory (e.g., - ``"relative/path/to/file"``). + *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains each + file relative to the root directory (e.g., ``"relative/path/to/file"``). - Returns an :class:`~collections.abc.Iterable` yielding each matched - file path (:class:`str`). + Returns an :class:`~collections.abc.Iterable` yielding each matched file + path (:class:`str`). """ warnings.warn(( - "{0.__module__}.{0.__qualname__}.match() is deprecated. Use " - "{0.__module__}.{0.__qualname__}.match_file() with a loop for " + "{cls.__module__}.{cls.__qualname__}.match() is deprecated. Use " + "{cls.__module__}.{cls.__qualname__}.match_file() with a loop for " "similar results." - ).format(self.__class__), DeprecationWarning, stacklevel=2) + ).format(cls=self.__class__), DeprecationWarning, stacklevel=2) for file in files: if self.match_file(file) is not None: @@ -75,22 +76,25 @@ def match_file(self, file: str) -> Optional[Any]: Returns the match result if *file* matched; otherwise, :data:`None`. """ raise NotImplementedError(( - "{0.__module__}.{0.__qualname__} must override match_file()." - ).format(self.__class__)) + "{cls.__module__}.{cls.__qualname__} must override match_file()." + ).format(cls=self.__class__)) class RegexPattern(Pattern): """ - The :class:`RegexPattern` class is an implementation of a pattern - using regular expressions. + The :class:`RegexPattern` class is an implementation of a pattern using + regular expressions. """ # Keep the class dict-less. - __slots__ = ('regex',) + __slots__ = ( + 'pattern', + 'regex', + ) def __init__( self, - pattern: Union[AnyStr, PatternHint], + pattern: Union[AnyStr, PatternHint, None], include: Optional[bool] = None, ) -> None: """ @@ -99,20 +103,18 @@ def __init__( *pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or :data:`None`) is the pattern to compile into a regular expression. - *include* (:class:`bool` or :data:`None`) must be :data:`None` - unless *pattern* is a precompiled regular expression (:class:`re.Pattern`) - in which case it is whether matched files should be included - (:data:`True`), excluded (:data:`False`), or is a null operation - (:data:`None`). + *include* (:class:`bool` or :data:`None`) must be :data:`None` unless + *pattern* is a precompiled regular expression (:class:`re.Pattern`) in which + case it is whether matched files should be included (:data:`True`), excluded + (:data:`False`), or is a null operation (:data:`None`). - .. NOTE:: Subclasses do not need to support the *include* - parameter. + .. NOTE:: Subclasses do not need to support the *include* parameter. """ if isinstance(pattern, (str, bytes)): assert include is None, ( - "include:{!r} must be null when pattern:{!r} is a string." - ).format(include, pattern) + f"include:{include!r} must be null when pattern:{pattern!r} is a string." + ) regex, include = self.pattern_to_regex(pattern) # NOTE: Make sure to allow a null regular expression to be # returned for a null-operation. @@ -128,18 +130,23 @@ def __init__( # NOTE: Make sure to allow a null pattern to be passed for a # null-operation. assert include is None, ( - "include:{!r} must be null when pattern:{!r} is null." - ).format(include, pattern) + f"include:{include!r} must be null when pattern:{pattern!r} is null." + ) else: - raise TypeError("pattern:{!r} is not a string, re.Pattern, or None.".format(pattern)) + raise TypeError(f"pattern:{pattern!r} is not a string, re.Pattern, or None.") super(RegexPattern, self).__init__(include) + self.pattern: Union[AnyStr, PatternHint, None] = pattern + """ + *pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or + :data:`None`) is the uncompiled, input pattern. This is for reference. + """ + self.regex: PatternHint = regex """ - *regex* (:class:`re.Pattern`) is the regular expression for the - pattern. + *regex* (:class:`re.Pattern`) is the regular expression for the pattern. """ def __eq__(self, other: 'RegexPattern') -> bool: @@ -157,11 +164,11 @@ def match_file(self, file: str) -> Optional['RegexMatchResult']: """ Matches this pattern against the specified file. - *file* (:class:`str`) - contains each file relative to the root directory (e.g., "relative/path/to/file"). + *file* (:class:`str`) contains each file relative to the root directory + (e.g., "relative/path/to/file"). - Returns the match result (:class:`.RegexMatchResult`) if *file* - matched; otherwise, :data:`None`. + Returns the match result (:class:`.RegexMatchResult`) if *file* matched; + otherwise, :data:`None`. """ if self.include is not None: match = self.regex.match(file) @@ -179,8 +186,8 @@ def pattern_to_regex(cls, pattern: str) -> Tuple[str, bool]: expression. Returns the uncompiled regular expression (:class:`str` or :data:`None`), - and whether matched files should be included (:data:`True`), - excluded (:data:`False`), or is a null-operation (:data:`None`). + and whether matched files should be included (:data:`True`), excluded + (:data:`False`), or is a null-operation (:data:`None`). .. NOTE:: The default implementation simply returns *pattern* and :data:`True`. @@ -191,8 +198,8 @@ def pattern_to_regex(cls, pattern: str) -> Tuple[str, bool]: @dataclasses.dataclass() class RegexMatchResult(object): """ - The :class:`RegexMatchResult` data class is used to return information - about the matched regular expression. + The :class:`RegexMatchResult` data class is used to return information about + the matched regular expression. """ # Keep the class dict-less. diff --git a/pathspec/util.py b/pathspec/util.py index 9408f25..54e1b2c 100644 --- a/pathspec/util.py +++ b/pathspec/util.py @@ -12,21 +12,26 @@ from collections.abc import ( Collection as CollectionType, Iterable as IterableType) +from dataclasses import ( + dataclass) from os import ( PathLike) from typing import ( Any, AnyStr, - Callable, - Collection, - Dict, - Iterable, - Iterator, - List, - Optional, - Sequence, - Set, - Union) + Callable, # Replaced by `collections.abc.Callable` in 3.9. + Collection, # Replaced by `collections.abc.Collection` in 3.9. + Dict, # Replaced by `dict` in 3.9. + Generic, + Iterable, # Replaced by `collections.abc.Iterable` in 3.9. + Iterator, # Replaced by `collections.abc.Iterator` in 3.9. + List, # Replaced by `list` in 3.9. + Optional, # Replaced by `X | None` in 3.10. + Sequence, # Replaced by `collections.abc.Sequence` in 3.9. + Set, # Replaced by `set` in 3.9. + Tuple, # Replaced by `tuple` in 3.9. + TypeVar, + Union) # Replaced by `X | Y` in 3.10. from .pattern import ( Pattern) @@ -36,6 +41,8 @@ else: StrPath = Union[str, PathLike] +TStrPath = TypeVar("TStrPath", bound=StrPath) + NORMALIZE_PATH_SEPS = [ __sep for __sep in [os.sep, os.altsep] @@ -73,6 +80,34 @@ def append_dir_sep(path: pathlib.Path) -> str: return str_path +def check_match_file( + patterns: Iterable[Tuple[int, Pattern]], + file: str, +) -> Tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *patterns* (:class:`~collections.abc.Iterable`) yields each indexed pattern + (:class:`tuple`) which contains the pattern index (:class:`int`) and actual + pattern (:class:`~pathspec.pattern.Pattern`). + + *file* (:class:`str`) is the normalized file path to be matched + against *patterns*. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + out_include: Optional[bool] = None + out_index: Optional[int] = None + for index, pattern in patterns: + if pattern.include is not None and pattern.match_file(file) is not None: + out_include = pattern.include + out_index = index + + return out_include, out_index + + def detailed_match_files( patterns: Iterable[Pattern], files: Iterable[str], @@ -119,18 +154,22 @@ def detailed_match_files( return return_files -def _filter_patterns(patterns: Iterable[Pattern]) -> List[Pattern]: +def _filter_check_patterns( + patterns: Iterable[Pattern], +) -> List[Tuple[int, Pattern]]: """ Filters out null-patterns. *patterns* (:class:`Iterable` of :class:`.Pattern`) contains the patterns. - Returns the patterns (:class:`list` of :class:`.Pattern`). + Returns a :class:`list` containing each indexed pattern (:class:`tuple`) which + contains the pattern index (:class:`int`) and the actual pattern + (:class:`~pathspec.pattern.Pattern`). """ return [ - __pat - for __pat in patterns + (__index, __pat) + for __index, __pat in enumerate(patterns) if __pat.include is not None ] @@ -148,7 +187,7 @@ def _is_iterable(value: Any) -> bool: def iter_tree_entries( root: StrPath, - on_error: Optional[Callable] = None, + on_error: Optional[Callable[[OSError], None]] = None, follow_links: Optional[bool] = None, ) -> Iterator['TreeEntry']: """ @@ -185,7 +224,7 @@ def _iter_tree_entries_next( root_full: str, dir_rel: str, memo: Dict[str, str], - on_error: Callable, + on_error: Callable[[OSError], None], follow_links: bool, ) -> Iterator['TreeEntry']: """ @@ -264,7 +303,7 @@ def _iter_tree_entries_next( def iter_tree_files( root: StrPath, - on_error: Optional[Callable] = None, + on_error: Optional[Callable[[OSError], None]] = None, follow_links: Optional[bool] = None, ) -> Iterator[str]: """ @@ -330,9 +369,8 @@ def match_file(patterns: Iterable[Pattern], file: str) -> bool: """ matched = False for pattern in patterns: - if pattern.include is not None: - if pattern.match_file(file) is not None: - matched = pattern.include + if pattern.include is not None and pattern.match_file(file) is not None: + matched = pattern.include return matched @@ -342,8 +380,8 @@ def match_files( files: Iterable[str], ) -> Set[str]: """ - DEPRECATED: This is an old function no longer used. Use the :func:`.match_file` - function with a loop for better results. + DEPRECATED: This is an old function no longer used. Use the + :func:`~pathspec.util.match_file` function with a loop for better results. Matches the files to the patterns. @@ -356,11 +394,11 @@ def match_files( Returns the matched files (:class:`set` of :class:`str`). """ warnings.warn(( - "util.match_files() is deprecated. Use util.match_file() with a " - "loop for better results." + f"{__name__}.match_files() is deprecated. Use {__name__}.match_file() with " + f"a loop for better results." ), DeprecationWarning, stacklevel=2) - use_patterns = _filter_patterns(patterns) + use_patterns = [__pat for __pat in patterns if __pat.include is not None] return_files = set() for file in files: @@ -588,6 +626,38 @@ def second_path(self) -> str: return self.args[2] +@dataclass(frozen=True) +class CheckResult(Generic[TStrPath]): + """ + The :class:`CheckResult` class contains information about the file and which + pattern matched it. + """ + + # Make the class dict-less. + __slots__ = ( + 'file', + 'include', + 'index', + ) + + file: TStrPath + """ + *file* (:class:`str` or :class:`os.PathLike`) is the file path. + """ + + include: Optional[bool] + """ + *include* (:class:`bool` or :data:`None`) is whether to include or exclude the + file. If :data:`None`, no pattern matched. + """ + + index: Optional[int] + """ + *index* (:class:`int` or :data:`None`) is the index of the last pattern that + matched. If :data:`None`, no pattern matched. + """ + + class MatchDetail(object): """ The :class:`.MatchDetail` class contains information about diff --git a/tests/test_04_gitignore.py b/tests/test_04_gitignore.py index aa3c231..479bd75 100644 --- a/tests/test_04_gitignore.py +++ b/tests/test_04_gitignore.py @@ -7,11 +7,14 @@ from pathspec.gitignore import ( GitIgnoreSpec) +from .util import ( + debug_results, + get_includes) + class GitIgnoreSpecTest(unittest.TestCase): """ - The :class:`GitIgnoreSpecTest` class tests the :class:`.GitIgnoreSpec` - class. + The :class:`GitIgnoreSpecTest` class tests the :class:`.GitIgnoreSpec` class. """ def test_01_reversed_args(self): @@ -19,13 +22,18 @@ def test_01_reversed_args(self): Test reversed args for `.from_lines()`. """ spec = GitIgnoreSpec.from_lines('gitwildmatch', ['*.txt']) - results = set(spec.match_files([ + files = { 'a.txt', 'b.bin', - ])) - self.assertEqual(results, { + } + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + + self.assertEqual(ignores, { 'a.txt', - }) + }, debug) def test_02_dir_exclusions(self): """ @@ -43,17 +51,21 @@ def test_02_dir_exclusions(self): 'test2/b.bin', 'test2/c/c.txt', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'test1/a.txt', 'test1/c/c.txt', 'test2/a.txt', 'test2/c/c.txt', - }) + }, debug) self.assertEqual(files - ignores, { 'test1/b.bin', 'test2/b.bin', - }) + }, debug) def test_02_file_exclusions(self): """ @@ -71,22 +83,26 @@ def test_02_file_exclusions(self): 'Y/b.txt', 'Y/Z/c.txt', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'X/a.txt', 'X/Z/c.txt', 'Y/a.txt', 'Y/Z/c.txt', - }) + }, debug) self.assertEqual(files - ignores, { 'X/b.txt', 'Y/b.txt', - }) + }, debug) def test_02_issue_41_a(self): """ - Test including a file and excluding a directory with the same name - pattern, scenario A. + Test including a file and excluding a directory with the same name pattern, + scenario A. """ # Confirmed results with git (v2.42.0). spec = GitIgnoreSpec.from_lines([ @@ -94,31 +110,35 @@ def test_02_issue_41_a(self): '!*.yaml/', ]) files = { - 'dir.yaml/file.sql', # - + 'dir.yaml/file.sql', # - 'dir.yaml/file.yaml', # 1:*.yaml 'dir.yaml/index.txt', # - - 'dir/file.sql', # - - 'dir/file.yaml', # 1:*.yaml - 'dir/index.txt', # - - 'file.yaml', # 1:*.yaml + 'dir/file.sql', # - + 'dir/file.yaml', # 1:*.yaml + 'dir/index.txt', # - + 'file.yaml', # 1:*.yaml } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'dir.yaml/file.yaml', 'dir/file.yaml', 'file.yaml', - }) + }, debug) self.assertEqual(files - ignores, { 'dir.yaml/file.sql', 'dir.yaml/index.txt', 'dir/file.sql', 'dir/index.txt', - }) + }, debug) def test_02_issue_41_b(self): """ - Test including a file and excluding a directory with the same name - pattern, scenario B. + Test including a file and excluding a directory with the same name pattern, + scenario B. """ # Confirmed results with git (v2.42.0). spec = GitIgnoreSpec.from_lines([ @@ -126,31 +146,35 @@ def test_02_issue_41_b(self): '*.yaml', ]) files = { - 'dir.yaml/file.sql', # 2:*.yaml + 'dir.yaml/file.sql', # 2:*.yaml 'dir.yaml/file.yaml', # 2:*.yaml 'dir.yaml/index.txt', # 2:*.yaml - 'dir/file.sql', # - - 'dir/file.yaml', # 2:*.yaml - 'dir/index.txt', # - - 'file.yaml', # 2:*.yaml + 'dir/file.sql', # - + 'dir/file.yaml', # 2:*.yaml + 'dir/index.txt', # - + 'file.yaml', # 2:*.yaml } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'dir.yaml/file.sql', 'dir.yaml/file.yaml', 'dir.yaml/index.txt', 'dir/file.yaml', 'file.yaml', - }) + }, debug) self.assertEqual(files - ignores, { 'dir/file.sql', 'dir/index.txt', - }) + }, debug) def test_02_issue_41_c(self): """ - Test including a file and excluding a directory with the same name - pattern, scenario C. + Test including a file and excluding a directory with the same name pattern, + scenario C. """ # Confirmed results with git (v2.42.0). spec = GitIgnoreSpec.from_lines([ @@ -158,26 +182,30 @@ def test_02_issue_41_c(self): '!dir.yaml', ]) files = { - 'dir.yaml/file.sql', # - + 'dir.yaml/file.sql', # - 'dir.yaml/file.yaml', # 1:*.yaml 'dir.yaml/index.txt', # - - 'dir/file.sql', # - - 'dir/file.yaml', # 1:*.yaml - 'dir/index.txt', # - - 'file.yaml', # 1:*.yaml + 'dir/file.sql', # - + 'dir/file.yaml', # 1:*.yaml + 'dir/index.txt', # - + 'file.yaml', # 1:*.yaml } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'dir.yaml/file.yaml', 'dir/file.yaml', 'file.yaml', - }) + }, debug) self.assertEqual(files - ignores, { 'dir.yaml/file.sql', 'dir.yaml/index.txt', 'dir/file.sql', 'dir/index.txt', - }) + }, debug) def test_03_subdir(self): """ @@ -195,23 +223,26 @@ def test_03_subdir(self): 'dirG/dirH/fileJ', 'dirG/fileO', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'dirG/dirH/fileI', 'dirG/dirH/fileJ', 'dirG/fileO', - }) + }, debug) self.assertEqual(files - ignores, { 'fileA', 'fileB', 'dirD/fileE', 'dirD/fileF', - }) + }, debug) def test_03_issue_19_a(self): """ - Test matching files in a subdirectory of an included directory, - scenario A. + Test matching files in a subdirectory of an included directory, scenario A. """ spec = GitIgnoreSpec.from_lines([ "dirG/", @@ -225,23 +256,26 @@ def test_03_issue_19_a(self): 'dirG/dirH/fileJ', 'dirG/fileO', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'dirG/dirH/fileI', 'dirG/dirH/fileJ', 'dirG/fileO', - }) + }, debug) self.assertEqual(files - ignores, { 'fileA', 'fileB', 'dirD/fileE', 'dirD/fileF', - }) + }, debug) def test_03_issue_19_b(self): """ - Test matching files in a subdirectory of an included directory, - scenario B. + Test matching files in a subdirectory of an included directory, scenario B. """ spec = GitIgnoreSpec.from_lines([ "dirG/*", @@ -255,23 +289,26 @@ def test_03_issue_19_b(self): 'dirG/dirH/fileJ', 'dirG/fileO', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'dirG/dirH/fileI', 'dirG/dirH/fileJ', 'dirG/fileO', - }) + }, debug) self.assertEqual(files - ignores, { 'fileA', 'fileB', 'dirD/fileE', 'dirD/fileF', - }) + }, debug) def test_03_issue_19_c(self): """ - Test matching files in a subdirectory of an included directory, - scenario C. + Test matching files in a subdirectory of an included directory, scenario C. """ spec = GitIgnoreSpec.from_lines([ "dirG/**", @@ -285,18 +322,22 @@ def test_03_issue_19_c(self): 'dirG/dirH/fileJ', 'dirG/fileO', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'dirG/dirH/fileI', 'dirG/dirH/fileJ', 'dirG/fileO', - }) + }, debug) self.assertEqual(files - ignores, { 'fileA', 'fileB', 'dirD/fileE', 'dirD/fileF', - }) + }, debug) def test_04_issue_62(self): """ @@ -306,14 +347,19 @@ def test_04_issue_62(self): '*', '!product_dir/', ]) - results = set(spec.match_files([ + files = { 'anydir/file.txt', 'product_dir/file.txt', - ])) - self.assertEqual(results, { + } + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + + self.assertEqual(ignores, { 'anydir/file.txt', 'product_dir/file.txt', - }) + }, debug) def test_05_issue_39(self): """ @@ -331,16 +377,20 @@ def test_05_issue_39(self): 'important/e.txt', 'trace.c', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'a.log', 'trace.c', - }) + }, debug) self.assertEqual(files - ignores, { 'b.txt', 'important/d.log', 'important/e.txt', - }) + }, debug) def test_06_issue_64(self): """ @@ -359,8 +409,12 @@ def test_06_issue_64(self): 'A/B/C/x', 'A/B/C/y.py', } - ignores = set(spec.match_files(files)) - self.assertEqual(ignores, files) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + + self.assertEqual(ignores, files, debug) def test_07_issue_74(self): """ @@ -380,21 +434,25 @@ def test_07_issue_74(self): 'test2/b.bin', 'test2/c/c.txt', } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { 'test1/b.bin', 'test1/a.txt', 'test1/c/c.txt', 'test2/b.bin', - }) + }, debug) self.assertEqual(files - ignores, { 'test2/a.txt', 'test2/c/c.txt', - }) + }, debug) def test_08_issue_81_a(self): """ - Test issue 81, scenario A. + Test issue 81 whitelist, scenario A. """ # Confirmed results with git (v2.42.0). spec = GitIgnoreSpec.from_lines([ @@ -403,20 +461,24 @@ def test_08_issue_81_a(self): "!libfoo/**", ]) files = { - "ignore.txt", # 1:* + "ignore.txt", # 1:* "libfoo/__init__.py", # 3:!libfoo/** } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { "ignore.txt", - }) + }, debug) self.assertEqual(files - ignores, { "libfoo/__init__.py", - }) + }, debug) def test_08_issue_81_b(self): """ - Test issue 81, scenario B. + Test issue 81 whitelist, scenario B. """ # Confirmed results with git (v2.42.0). spec = GitIgnoreSpec.from_lines([ @@ -425,20 +487,24 @@ def test_08_issue_81_b(self): "!libfoo/*", ]) files = { - "ignore.txt", # 1:* + "ignore.txt", # 1:* "libfoo/__init__.py", # 3:!libfoo/* } - ignores = set(spec.match_files(files)) + + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) + self.assertEqual(ignores, { "ignore.txt", - }) + }, debug) self.assertEqual(files - ignores, { "libfoo/__init__.py", - }) + }, debug) def test_08_issue_81_c(self): """ - Test issue 81, scenario C. + Test issue 81 whitelist, scenario C. """ # Confirmed results with git (v2.42.0). spec = GitIgnoreSpec.from_lines([ @@ -447,12 +513,14 @@ def test_08_issue_81_c(self): "!libfoo/", ]) files = { - "ignore.txt", # 1:* + "ignore.txt", # 1:* "libfoo/__init__.py", # 1:* } - ignores = set(spec.match_files(files)) + results = list(spec.check_files(files)) + ignores = get_includes(results) + debug = debug_results(spec, results) self.assertEqual(ignores, { "ignore.txt", "libfoo/__init__.py", - }) + }, debug) self.assertEqual(files - ignores, set()) diff --git a/tests/util.py b/tests/util.py index 301427b..fa22870 100644 --- a/tests/util.py +++ b/tests/util.py @@ -7,8 +7,73 @@ import pathlib from typing import ( - Iterable, - Tuple) + Iterable, # Replaced by `collections.abc.Iterable` in 3.9. + Tuple, # Replaced by `collections.abc.Tuple` in 3.9. + cast) + +from pathspec import ( + PathSpec, + RegexPattern) +from pathspec.util import ( + CheckResult, + TStrPath) + + +def debug_results(spec: PathSpec, results: Iterable[CheckResult[str]]) -> str: + """ + Format the check results message. + + *spec* (:class:`~pathspec.PathSpec`) is the path-spec. + + *results* (:class:`~collections.abc.Iterable` or :class:`~pathspec.util.CheckResult`) + yields each file check result. + + Returns the message (:class:`str`). + """ + patterns = cast(list[RegexPattern], spec.patterns) + + result_table = [] + for result in results: + if result.index is not None: + pattern = patterns[result.index] + result_table.append((f"{result.index + 1}:{pattern.pattern}", result.file)) + else: + result_table.append(("-", result.file)) + + result_table.sort(key=lambda r: r[1]) + + first_max_len = max((len(__row[0]) for __row in result_table), default=0) + first_width = min(first_max_len, 20) + + result_lines = [] + for row in result_table: + result_lines.append(f" {row[0]:<{first_width}} {row[1]}") + + pattern_lines = [] + for index, pattern in enumerate(patterns, 1): + first_col = f"{index}:{pattern.pattern}" + pattern_lines.append(f" {first_col:<{first_width}} {pattern.regex.pattern!r}") + + return "\n".join([ + "\n", + " DEBUG ".center(32, "-"), + *pattern_lines, + "-"*32, + *result_lines, + "-"*32, + ]) + + +def get_includes(results: Iterable[CheckResult[TStrPath]]) -> set[TStrPath]: + """ + Get the included files from the check results. + + *results* (:class:`~collections.abc.Iterable` or :class:`~pathspec.util.CheckResult`) + yields each file check result. + + Returns the included files (:class:`set` of :class:`str`). + """ + return {__res.file for __res in results if __res.include} def make_dirs(temp_dir: pathlib.Path, dirs: Iterable[str]) -> None: