From 9b408bc68074a20da825ced4a40a1baf3b2a7d8f Mon Sep 17 00:00:00 2001 From: devinj Date: Wed, 26 Aug 2020 14:29:28 -0700 Subject: [PATCH] Implement a matcher for pattern matching with globs. This is the first step to implementing wildcard `$foo...` support in --mode=py.expr/py.stmt ([#6](https://github.com/ssbr/refex/issues/6)). I actually wanted to go the rest of the way with it, but may as well start sending out PRs for intermediate steps. PiperOrigin-RevId: 328608581 --- refex/python/matchers/base_matchers.py | 118 +++++++++++++++++++- refex/python/matchers/test_base_matchers.py | 89 +++++++++++++++ 2 files changed, 206 insertions(+), 1 deletion(-) diff --git a/refex/python/matchers/base_matchers.py b/refex/python/matchers/base_matchers.py index c6eb51c..bfd3308 100644 --- a/refex/python/matchers/base_matchers.py +++ b/refex/python/matchers/base_matchers.py @@ -67,10 +67,12 @@ from __future__ import absolute_import from __future__ import division +# from __future__ import google_type_annotations from __future__ import print_function +import collections import re -from typing import Container, List +from typing import Container, Iterable, List, Union import weakref import attr @@ -563,3 +565,117 @@ def _match(self, context, candidate): matcher.create_match(context.parsed_file, candidate)) else: return None + + +# TODO: make this public after glob support is implemented, and it's determined +# this does the right thing. +# In particular, at time of writing, it does completely the wrong thing with +# bindings -- you can't add a bound GlobStar() :( +# @matcher.safe_to_eval +@attr.s(frozen=True) +class GlobStar(matcher.Matcher): + """Matches any sequence of items in a sequence. + + Only valid within :class:`Glob`. + """ + def _match(self, context, candidate): + del context, candidate # unused + # _match isn't called by GlobMatcher; it instead specially recognizes it + # inside its own search algorithm. GlobMatcher is a bug when present in + # any other context. + raise matcher.MatchError("GlobStar used outside of Glob") + + +def _blockify_glob_matchers( + matchers: Iterable[matcher.Matcher] +) -> List[Union[GlobStar, List[matcher.Matcher]]]: + """Matchers separated into GlobStar() and sequential block of non-* matchers.""" + blocks = [] + current = [] + for m in matchers: + if isinstance(m, GlobStar): + if current: + blocks.append(current) + current = m + else: + if isinstance(current, GlobStar): + blocks.append(current) + current = [] + current.append(m) + blocks.append(current) + + return blocks + + +# TODO: make this public after glob support is implemented (see GlobStar) +@attr.s(frozen=True) +class Glob(matcher.Matcher): + """Matches a sequence, with :func:`GlobStar` wildcards. + + For example, ``Glob(['a', GlobStar(), 'b'])`` matches any sequence which + starts with ``'a'`` and ends with ``'b'``. + + class:`GlobStar()` is only valid directly within the body of a `Glob`. + """ + _matchers = matcher.submatcher_list_attrib() + + @cached_property.cached_property + def _blocked_matchers(self): + return _blockify_glob_matchers(self._matchers) + + def _match(self, context, candidate): + if not isinstance(candidate, collections.Sequence): + return False + + # https://research.swtch.com/glob + # + # The following algorithm is courtesy of the insight (from Russ Cox and + # others): you can do a backtracking search to find the substrings + # ("blocks" here), but not backtrack past the most recent GlobStar. + # + # (The algorithm looks a bit different because we're borrowing the idea, + # not the code.) + + # TODO: minor optimization: if a glob ends with + # ``..., GlobStar(), [block of size k]]``, that part should be O(k), + # not O(n*k). There is only one viable candidate: the last k items. + + # TODO: allow for ``Bind('name', GlobStar())`` + + is_search = False + pos = 0 + bindings = {} + for block in self._blocked_matchers: + if isinstance(block, GlobStar): + is_search = True + continue + + if is_search: + search_end = len(candidate) + else: + search_end = pos + 1 # only one candidate to search. + + is_search = False + result = None + for match_start in range(pos, search_end): + match_end = match_start + len(block) + result = ItemsAre(block).match(context, + candidate[match_start:match_end]) + if result is not None: + pos = match_end + break + + if result is None: + return None + bindings = matcher.merge_bindings(bindings, result.bindings) + if bindings is None: + return None + + if pos != len(candidate) and not is_search: + return None + + return matcher.MatchInfo( + matcher.create_match(context.parsed_file, candidate), + bindings, + ) + diff --git a/refex/python/matchers/test_base_matchers.py b/refex/python/matchers/test_base_matchers.py index 1fdc2f7..79db86c 100644 --- a/refex/python/matchers/test_base_matchers.py +++ b/refex/python/matchers/test_base_matchers.py @@ -633,5 +633,94 @@ def test_match_lines(self): base_matchers.InLines(lines=[2, 4]), source), ['c = d', 'g = h']) +class GlobTest(parameterized.TestCase): + + @parameterized.parameters(['abc'], [['a', 'b', 'c']]) + def test_sequence(self, abc_seq): + self.assertIsNotNone( + base_matchers.Glob(['a', 'b', 'c']).match(_FAKE_CONTEXT, abc_seq)) + + @parameterized.parameters( + 'prefix_abc', + 'abc_suffix', + '', + 'axc', + ) + def test_sequence_nomatch(self, not_abc): + self.assertIsNone( + base_matchers.Glob(['a', 'b', 'c']).match(_FAKE_CONTEXT, not_abc)) + + def test_empty(self): + empty_glob = base_matchers.Glob([]) + self.assertIsNotNone(empty_glob.match(_FAKE_CONTEXT, '')) + self.assertIsNone(empty_glob.match(_FAKE_CONTEXT, 'x')) + + @parameterized.parameters( + '', + 'x', + ) + def test_star(self, seq): + self.assertIsNotNone( + base_matchers.Glob([base_matchers.GlobStar() + ]).match(_FAKE_CONTEXT, seq)) + self.assertIsNotNone( + base_matchers.Glob([base_matchers.GlobStar(), + base_matchers.GlobStar() + ]).match(_FAKE_CONTEXT, seq)) + + @parameterized.parameters( + 'a', + 'ab', + ) + def test_prefix_star(self, seq): + self.assertIsNotNone( + base_matchers.Glob(['a', base_matchers.GlobStar() + ]).match(_FAKE_CONTEXT, seq)) + + @parameterized.parameters( + '', + 'ba', + ) + def test_prefix_star_nomatch(self, seq): + self.assertIsNone( + base_matchers.Glob(['a', base_matchers.GlobStar() + ]).match(_FAKE_CONTEXT, seq)) + + @parameterized.parameters( + 'a', + 'ba', + ) + def test_star_suffix(self, seq): + self.assertIsNotNone( + base_matchers.Glob([base_matchers.GlobStar(), + 'a']).match(_FAKE_CONTEXT, seq)) + + @parameterized.parameters( + '', + 'ab', + ) + def test_star_suffix_nomatch(self, seq): + self.assertIsNone( + base_matchers.Glob([base_matchers.GlobStar(), + 'a']).match(_FAKE_CONTEXT, seq)) + + @parameterized.parameters( + 'abcd', + 'a bcd', + 'abc d', + 'a bc d', + ) + def test_sandich(self, seq): + glob = base_matchers.Glob([ + 'a', + base_matchers.GlobStar(), + 'b', + 'c', + base_matchers.GlobStar(), + 'd', + ]) + self.assertIsNotNone(glob.match(_FAKE_CONTEXT, seq)) + + if __name__ == '__main__': absltest.main()