Skip to content

Commit

Permalink
Implement a matcher for pattern matching with globs.
Browse files Browse the repository at this point in the history
This is the first step to implementing wildcard `$foo...` support in --mode=py.expr/py.stmt ([#6](#6)).

I actually wanted to go the rest of the way with it, but may as well start sending out PRs for intermediate steps.

PiperOrigin-RevId: 328608581
  • Loading branch information
devinj authored and copybara-github committed Oct 12, 2020
1 parent f55bd49 commit 9b408bc
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 1 deletion.
118 changes: 117 additions & 1 deletion refex/python/matchers/base_matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,12 @@

from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function

import collections
import re
from typing import Container, List
from typing import Container, Iterable, List, Union
import weakref

import attr
Expand Down Expand Up @@ -563,3 +565,117 @@ def _match(self, context, candidate):
matcher.create_match(context.parsed_file, candidate))
else:
return None


# TODO: make this public after glob support is implemented, and it's determined
# this does the right thing.
# In particular, at time of writing, it does completely the wrong thing with
# bindings -- you can't add a bound GlobStar() :(
# @matcher.safe_to_eval
@attr.s(frozen=True)
class GlobStar(matcher.Matcher):
"""Matches any sequence of items in a sequence.
Only valid within :class:`Glob`.
"""
def _match(self, context, candidate):
del context, candidate # unused
# _match isn't called by GlobMatcher; it instead specially recognizes it
# inside its own search algorithm. GlobMatcher is a bug when present in
# any other context.
raise matcher.MatchError("GlobStar used outside of Glob")


def _blockify_glob_matchers(
matchers: Iterable[matcher.Matcher]
) -> List[Union[GlobStar, List[matcher.Matcher]]]:
"""Matchers separated into GlobStar() and sequential block of non-* matchers."""
blocks = []
current = []
for m in matchers:
if isinstance(m, GlobStar):
if current:
blocks.append(current)
current = m
else:
if isinstance(current, GlobStar):
blocks.append(current)
current = []
current.append(m)
blocks.append(current)

return blocks


# TODO: make this public after glob support is implemented (see GlobStar)
@attr.s(frozen=True)
class Glob(matcher.Matcher):
"""Matches a sequence, with :func:`GlobStar` wildcards.
For example, ``Glob(['a', GlobStar(), 'b'])`` matches any sequence which
starts with ``'a'`` and ends with ``'b'``.
class:`GlobStar()` is only valid directly within the body of a `Glob`.
"""
_matchers = matcher.submatcher_list_attrib()

@cached_property.cached_property
def _blocked_matchers(self):
return _blockify_glob_matchers(self._matchers)

def _match(self, context, candidate):
if not isinstance(candidate, collections.Sequence):
return False

# https://research.swtch.com/glob
#
# The following algorithm is courtesy of the insight (from Russ Cox and
# others): you can do a backtracking search to find the substrings
# ("blocks" here), but not backtrack past the most recent GlobStar.
#
# (The algorithm looks a bit different because we're borrowing the idea,
# not the code.)

# TODO: minor optimization: if a glob ends with
# ``..., GlobStar(), [block of size k]]``, that part should be O(k),
# not O(n*k). There is only one viable candidate: the last k items.

# TODO: allow for ``Bind('name', GlobStar())``

is_search = False
pos = 0
bindings = {}
for block in self._blocked_matchers:
if isinstance(block, GlobStar):
is_search = True
continue

if is_search:
search_end = len(candidate)
else:
search_end = pos + 1 # only one candidate to search.

is_search = False
result = None
for match_start in range(pos, search_end):
match_end = match_start + len(block)
result = ItemsAre(block).match(context,
candidate[match_start:match_end])
if result is not None:
pos = match_end
break

if result is None:
return None
bindings = matcher.merge_bindings(bindings, result.bindings)
if bindings is None:
return None

if pos != len(candidate) and not is_search:
return None

return matcher.MatchInfo(
matcher.create_match(context.parsed_file, candidate),
bindings,
)

89 changes: 89 additions & 0 deletions refex/python/matchers/test_base_matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,5 +633,94 @@ def test_match_lines(self):
base_matchers.InLines(lines=[2, 4]), source), ['c = d', 'g = h'])


class GlobTest(parameterized.TestCase):

@parameterized.parameters(['abc'], [['a', 'b', 'c']])
def test_sequence(self, abc_seq):
self.assertIsNotNone(
base_matchers.Glob(['a', 'b', 'c']).match(_FAKE_CONTEXT, abc_seq))

@parameterized.parameters(
'prefix_abc',
'abc_suffix',
'',
'axc',
)
def test_sequence_nomatch(self, not_abc):
self.assertIsNone(
base_matchers.Glob(['a', 'b', 'c']).match(_FAKE_CONTEXT, not_abc))

def test_empty(self):
empty_glob = base_matchers.Glob([])
self.assertIsNotNone(empty_glob.match(_FAKE_CONTEXT, ''))
self.assertIsNone(empty_glob.match(_FAKE_CONTEXT, 'x'))

@parameterized.parameters(
'',
'x',
)
def test_star(self, seq):
self.assertIsNotNone(
base_matchers.Glob([base_matchers.GlobStar()
]).match(_FAKE_CONTEXT, seq))
self.assertIsNotNone(
base_matchers.Glob([base_matchers.GlobStar(),
base_matchers.GlobStar()
]).match(_FAKE_CONTEXT, seq))

@parameterized.parameters(
'a',
'ab',
)
def test_prefix_star(self, seq):
self.assertIsNotNone(
base_matchers.Glob(['a', base_matchers.GlobStar()
]).match(_FAKE_CONTEXT, seq))

@parameterized.parameters(
'',
'ba',
)
def test_prefix_star_nomatch(self, seq):
self.assertIsNone(
base_matchers.Glob(['a', base_matchers.GlobStar()
]).match(_FAKE_CONTEXT, seq))

@parameterized.parameters(
'a',
'ba',
)
def test_star_suffix(self, seq):
self.assertIsNotNone(
base_matchers.Glob([base_matchers.GlobStar(),
'a']).match(_FAKE_CONTEXT, seq))

@parameterized.parameters(
'',
'ab',
)
def test_star_suffix_nomatch(self, seq):
self.assertIsNone(
base_matchers.Glob([base_matchers.GlobStar(),
'a']).match(_FAKE_CONTEXT, seq))

@parameterized.parameters(
'abcd',
'a bcd',
'abc d',
'a bc d',
)
def test_sandich(self, seq):
glob = base_matchers.Glob([
'a',
base_matchers.GlobStar(),
'b',
'c',
base_matchers.GlobStar(),
'd',
])
self.assertIsNotNone(glob.match(_FAKE_CONTEXT, seq))


if __name__ == '__main__':
absltest.main()

0 comments on commit 9b408bc

Please sign in to comment.