From 5623a87cfbb2fb19eb00d2334b51bcd3e59f8425 Mon Sep 17 00:00:00 2001
From: Devin Jeanpierre <jeanpierreda@google.com>
Date: Mon, 25 Sep 2023 13:57:48 -0700
Subject: [PATCH] Support `**$...` for keyword arguments.

This pretty much just works out of the box, albeit it's ordering-dependent.

For instance, to match any call to `foo.Bar` which does _not_ include `dry_run=True`, you can use:

```python
AllOf(
  ExprPattern("foo.Bar(*$..., **$...)"),
  Unless(ExprPattern("$_(*$..., **$..., dry_run=True, **$...)"))
)
```

Or similar.

A future change to refex (which I do want to make eventually...) could eventually mean keyword parameter and dict ordering is ignored by default, so that we can remove one of the `**$...`.

PiperOrigin-RevId: 568317030
---
 refex/python/matcher.py                       | 178 +++++++++++-------
 refex/python/matchers/ast_matchers.py         |  30 +++
 refex/python/matchers/base_matchers.py        |  25 ++-
 refex/python/matchers/syntax_matchers.py      |  73 ++++---
 refex/python/matchers/test_base_matchers.py   |  20 +-
 refex/python/matchers/test_syntax_matchers.py |  57 +++++-
 refex/python/python_pattern.py                | 114 +++++++----
 refex/python/syntactic_template.py            |   6 +-
 refex/python/test_python_pattern.py           |  36 +++-
 refex/test_cli.py                             |  27 ++-
 10 files changed, 402 insertions(+), 164 deletions(-)

diff --git a/refex/python/matcher.py b/refex/python/matcher.py
index d17c47a..6bf2e0d 100644
--- a/refex/python/matcher.py
+++ b/refex/python/matcher.py
@@ -203,74 +203,6 @@ def register_constant(name: str, constant: Any):
   registered_constants[name] = constant
 
 
-def coerce(value):  # Nobody uses coerce. pylint: disable=redefined-builtin
-  """Returns the 'intended' matcher given by `value`.
-
-  If `value` is already a matcher, then this is what is returned.
-
-  If `value` is anything else, then coerce returns `ImplicitEquals(value)`.
-
-  Args:
-    value: Either a Matcher, or a value to compare for equality.
-  """
-  if isinstance(value, Matcher):
-    return value
-  else:
-    return ImplicitEquals(value)
-
-
-def _coerce_list(values):
-  return [coerce(v) for v in values]
-
-
-# TODO(b/199577701): drop the **kwargs: Any in the *_attrib functions.
-
-_IS_SUBMATCHER_ATTRIB = __name__ + '._IS_SUBMATCHER_ATTRIB'
-_IS_SUBMATCHER_LIST_ATTRIB = __name__ + '._IS_SUBMATCHER_LIST_ATTRIB'
-
-
-def submatcher_attrib(*args, walk: bool = True, **kwargs: Any):
-  """Creates an attr.ib that is marked as a submatcher.
-
-  This will cause the matcher to be automatically walked as part of the
-  computation of .bind_variables. Any submatcher that can introduce a binding
-  must be listed as a submatcher_attrib or submatcher_list_attrib.
-
-  Args:
-    *args: Forwarded to attr.ib.
-    walk: Whether or not to walk to accumulate .bind_variables.
-    **kwargs: Forwarded to attr.ib.
-
-  Returns:
-    An attr.ib()
-  """
-  if walk:
-    kwargs.setdefault('metadata', {})[_IS_SUBMATCHER_ATTRIB] = True
-  kwargs.setdefault('converter', coerce)
-  return attr.ib(*args, **kwargs)
-
-
-def submatcher_list_attrib(*args, walk: bool = True, **kwargs: Any):
-  """Creates an attr.ib that is marked as an iterable of submatchers.
-
-  This will cause the matcher to be automatically walked as part of the
-  computation of .bind_variables. Any submatcher that can introduce a binding
-  must be listed as a submatcher_attrib or submatcher_list_attrib.
-
-  Args:
-    *args: Forwarded to attr.ib.
-    walk: Whether or not to walk to accumulate .bind_variables.
-    **kwargs: Forwarded to attr.ib.
-
-  Returns:
-    An attr.ib()
-  """
-  if walk:
-    kwargs.setdefault('metadata', {})[_IS_SUBMATCHER_LIST_ATTRIB] = True
-  kwargs.setdefault('converter', _coerce_list)
-  return attr.ib(*args, **kwargs)
-
-
 # TODO: make MatchObject, MatchInfo, and Matcher generic, parameterized
 # by match type. Since pytype doesn't support generics yet, that's not an
 # option, but it'd greatly clarify the API by allowing us to classify matchers
@@ -921,6 +853,16 @@ def bind_variables(self):
   type_filter = None
 
 
+class ContextualMatcher(Matcher):
+  """A matcher which requires special understanding in context.
+
+  By default, contextual matchers are not allowed inside of a submatcher
+  attribute.
+  To allow one, specify, for instance,
+  ``submatcher_attrib(contextual=MyContextualMatcher)``.
+  """
+
+  pass
 
 
 def accumulating_matcher(f):
@@ -1026,6 +968,106 @@ class ParseError(Exception):
   """
 
 
+def coerce(value):  # Nobody uses coerce. pylint: disable=redefined-builtin
+  """Returns the 'intended' matcher given by `value`.
+
+  If `value` is already a matcher, then this is what is returned.
+
+  If `value` is anything else, then coerce returns `ImplicitEquals(value)`.
+
+  Args:
+    value: Either a Matcher, or a value to compare for equality.
+  """
+  if isinstance(value, Matcher):
+    return value
+  else:
+    return ImplicitEquals(value)
+
+
+def _coerce_list(values):
+  return [coerce(v) for v in values]
+
+
+# TODO(b/199577701): drop the **kwargs: Any in the *_attrib functions.
+
+_IS_SUBMATCHER_ATTRIB = __name__ + '._IS_SUBMATCHER_ATTRIB'
+_IS_SUBMATCHER_LIST_ATTRIB = __name__ + '._IS_SUBMATCHER_LIST_ATTRIB'
+
+
+def _submatcher_validator(old_validator, contextual):
+  def validator(o: object, attribute: attr.Attribute, m: Matcher):
+    if isinstance(m, ContextualMatcher) and not isinstance(m, contextual):
+      raise TypeError(
+          f'Cannot use a `{m}` in `{type(o).__name__}.{attribute.name}`.'
+      )
+    if old_validator is not None:
+      old_validator(o, attribute, m)
+
+  return validator
+
+
+def submatcher_attrib(
+    *args,
+    walk: bool = True,
+    contextual: type[ContextualMatcher]
+    | tuple[type[ContextualMatcher], ...] = (),
+    **kwargs: Any,
+):
+  """Creates an attr.ib that is marked as a submatcher.
+
+  This will cause the matcher to be automatically walked as part of the
+  computation of .bind_variables. Any submatcher that can introduce a binding
+  must be listed as a submatcher_attrib or submatcher_list_attrib.
+
+  Args:
+    *args: Forwarded to attr.ib.
+    walk: Whether or not to walk to accumulate .bind_variables.
+    contextual: The contextual matcher classes to allow, if any.
+    **kwargs: Forwarded to attr.ib.
+
+  Returns:
+    An attr.ib()
+  """
+  if walk:
+    kwargs.setdefault('metadata', {})[_IS_SUBMATCHER_ATTRIB] = True
+  kwargs.setdefault('converter', coerce)
+  kwargs['validator'] = _submatcher_validator(
+      kwargs.get('validator'), contextual
+  )
+  return attr.ib(*args, **kwargs)
+
+
+def submatcher_list_attrib(
+    *args,
+    walk: bool = True,
+    contextual: type[ContextualMatcher]
+    | tuple[type[ContextualMatcher], ...] = (),
+    **kwargs: Any,
+):
+  """Creates an attr.ib that is marked as an iterable of submatchers.
+
+  This will cause the matcher to be automatically walked as part of the
+  computation of .bind_variables. Any submatcher that can introduce a binding
+  must be listed as a submatcher_attrib or submatcher_list_attrib.
+
+  Args:
+    *args: Forwarded to attr.ib.
+    walk: Whether or not to walk to accumulate .bind_variables.
+    contextual: The contextual matcher classes to allow, if any.
+    **kwargs: Forwarded to attr.ib.
+
+  Returns:
+    An attr.ib()
+  """
+  if walk:
+    kwargs.setdefault('metadata', {})[_IS_SUBMATCHER_LIST_ATTRIB] = True
+  kwargs.setdefault('converter', _coerce_list)
+  kwargs['validator'] = attr.validators.deep_iterable(
+      _submatcher_validator(kwargs.get('validator'), contextual)
+  )
+  return attr.ib(*args, **kwargs)
+
+
 @attr.s(frozen=True)
 class _CompareById:
   """Wrapper object to compare things by identity."""
diff --git a/refex/python/matchers/ast_matchers.py b/refex/python/matchers/ast_matchers.py
index ede7ad0..3fb047e 100644
--- a/refex/python/matchers/ast_matchers.py
+++ b/refex/python/matchers/ast_matchers.py
@@ -206,3 +206,33 @@ def _match(self, context, candidate):
                              (type(...),))
 
     type_filter = frozenset({ast.Constant})
+
+
+#####################################
+# High level AST matching overrides #
+#####################################
+
+# Firstly, a `*$...` is always equivalent to a `$...`.
+
+_old_starred = Starred  # pylint: disable=undefined-variable
+
+
+def Starred(**kw):  # pylint: disable=invalid-name
+  value = kw.get('value')
+  if isinstance(value, base_matchers.GlobStar):
+    return value
+  return _old_starred(**kw)
+
+
+# Similarly, a `**$...` is always morally-equivalent to a `$...=$...` in a call.
+# (But the latter isn't valid syntax atm, so this is the only way to spell it.)
+
+_old_keyword = keyword  # pylint: disable=undefined-variable
+
+
+def keyword(**kw):
+  value = kw.get('value')
+  arg = kw.get('arg')
+  if arg == base_matchers.Equals(None) and isinstance(value, base_matchers.GlobStar):
+    return value
+  return _old_keyword(**kw)
diff --git a/refex/python/matchers/base_matchers.py b/refex/python/matchers/base_matchers.py
index ba83d96..273e1a8 100644
--- a/refex/python/matchers/base_matchers.py
+++ b/refex/python/matchers/base_matchers.py
@@ -125,15 +125,17 @@ def _match(self, context, candidate):
     raise TestOnlyRaisedError(self.message)
 
 
-@attr.s(init=False, frozen=True)
+@attr.s(frozen=True)
 class _NAryMatcher(matcher.Matcher):
   """Base class for matchers which take arbitrarily many submatchers in init."""
 
   _matchers = matcher.submatcher_list_attrib()
 
+
+# override __init__ to take *args
+class _NAryMatcher(_NAryMatcher):
   def __init__(self, *matchers):
-    super(_NAryMatcher, self).__init__()
-    self.__dict__['_matchers'] = matchers
+    super().__init__(matchers)
 
 
 @matcher.safe_to_eval
@@ -275,7 +277,6 @@ class Bind(matcher.Matcher):
       <refex.python.matcher.BindMerge>`, or None for the default strategy
       (``KEEP_LAST``).
   """
-  _NAME_REGEX = re.compile(r'\A(?!__)[a-zA-Z_]\w*\Z')
 
   name = attr.ib()
   _submatcher = matcher.submatcher_attrib(default=Anything())
@@ -284,7 +285,12 @@ class Bind(matcher.Matcher):
       validator=attr.validators.in_(frozenset(matcher.BindConflict) | {None}))
   _on_merge = attr.ib(
       default=None,
-      validator=attr.validators.in_(frozenset(matcher.BindMerge) | {None}))
+      validator=attr.validators.in_(frozenset(matcher.BindMerge) | {None}),
+  )
+
+  # Constants go after attrs-fields to work around static analysis tooling:
+  # b/301979723
+  _NAME_REGEX = re.compile(r'\A(?!__)[a-zA-Z_]\w*\Z')
 
   @name.validator
   def _name_validator(self, attribute, value):
@@ -781,12 +787,15 @@ def _match(self, context, candidate):
 # bindings -- you can't add a bound GlobStar() :(
 # @matcher.safe_to_eval
 @attr.s(frozen=True)
-class GlobStar(matcher.Matcher):
+class GlobStar(matcher.ContextualMatcher):
   """Matches any sequence of items in a sequence.
 
-  Only valid within :class:`Glob`.
+  Only valid within special matchers like :class:`Glob`.
   """
 
+  def __str__(self):
+    return '$...'
+
   def _match(self, context, candidate):
     del context, candidate  # unused
     # _match isn't called by GlobMatcher; it instead specially recognizes it
@@ -827,7 +836,7 @@ class Glob(matcher.Matcher):
   class:`GlobStar()` is only valid directly within the body of a `Glob`.
   """
 
-  _matchers = matcher.submatcher_list_attrib()
+  _matchers = matcher.submatcher_list_attrib(contextual=GlobStar)
 
   @cached_property.cached_property
   def _blocked_matchers(self):
diff --git a/refex/python/matchers/syntax_matchers.py b/refex/python/matchers/syntax_matchers.py
index 5f3b3a1..03d81f3 100644
--- a/refex/python/matchers/syntax_matchers.py
+++ b/refex/python/matchers/syntax_matchers.py
@@ -103,10 +103,6 @@
 
 # pylint: disable=g-classes-have-attributes
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import abc
 import ast
 import inspect
@@ -125,34 +121,38 @@
 from refex.python.matchers import base_matchers
 
 
-def _remap_macro_variables(pattern: str) -> tuple[str, dict[str, str], set[str]]:
+def _remap_macro_variables(pattern: str) -> tuple[str, dict[str, str], set[str], set[str],
+]:
   """Renames the variables from the source pattern to give valid Python.
 
   Args:
-    pattern: A source pattern containing metavariables like "$foo".
+    pattern: A source pattern containing metavariables like ``$foo``, or
+        repeating metavariables like ``$foo...``
 
   Returns:
-    (remapped_source, variables, anonymous_variables)
+    (remapped_source, variables, anonymous_variables, repeating)
     * remapped_source is the pattern, but with all dollar-prefixed variables
       replaced with unique non-dollar-prefixed versions.
     * variables is the mapping of the original name to the remapped name.
     * anonymous_variables is a set of remapped names that came from `_`.
+    * repeating is the set of remapped-names which are defined to repeat many times.
 
   Raises:
     SyntaxError: The pattern can't be parsed.
   """
-  remapped_tokens, metavar_indices = python_pattern.token_pattern(pattern)
+  remapped_tokens, metavar_indices, repeating_metavar_indices = python_pattern.token_pattern(pattern)
   taken_tokens = {
-      token[1]
+      token.string
       for i, token in enumerate(remapped_tokens)
       if i not in metavar_indices
   }
   original_to_unique = {}
   anonymous_unique = set()
+  repeating_unique = set()
 
-  for metavar_index in metavar_indices:
-    metavar_token = list(remapped_tokens[metavar_index])
-    variable = metavar_token[1]
+  for metavar_index in itertools.chain(metavar_indices, repeating_metavar_indices):
+    metavar_token = remapped_tokens[metavar_index]
+    variable = metavar_token.string
 
     if variable in original_to_unique:
       remapped_name = original_to_unique[variable]
@@ -175,23 +175,26 @@ def _remap_macro_variables(pattern: str) -> tuple[str, dict[str, str], set[str]]
           else:
             original_to_unique[variable] = remapped_name
           break
-    metavar_token[1] = remapped_name
-    remapped_tokens[metavar_index] = tuple(metavar_token)
+    metavar_token = metavar_token._replace(string=remapped_name)
+    remapped_tokens[metavar_index] = metavar_token
+    if metavar_index in repeating_metavar_indices:
+      repeating_unique.add(remapped_name)
 
   return (
       tokenize.untokenize(remapped_tokens),
       original_to_unique,
       anonymous_unique,
+      repeating_unique,
   )
 
 
-def _rewrite_submatchers(pattern, restrictions):
+def _rewrite_submatchers(pattern: str, restrictions: dict[str, matcher.Matcher]):
   """Rewrites pattern/restrictions to erase metasyntactic variables.
 
   Args:
     pattern: a pattern containing $variables.
     restrictions: a dictionary of variables to submatchers. If a variable is
-      missing, Anything() is used instead.
+      not specified, Anything() is used instead.
 
   Returns:
     (remapped_pattern, variables, new_submatchers)
@@ -200,22 +203,33 @@ def _rewrite_submatchers(pattern, restrictions):
     * variables is the mapping of the original name to the remapped name.
     * new_submatchers is a dict from remapped names to submatchers. Every
       non-anonymous variable is put in a Bind() node, which has a submatcher
-      taken from `restrictions`.
+      taken from ``restrictions``.
+      Repeated anonymous wildcards use ``GlobStar()``.
 
   Raises:
     KeyError: if restrictions has a key that isn't a variable name.
   """
-  pattern, variables, anonymous_remapped = _remap_macro_variables(pattern)
+  pattern, variables, anonymous_remapped, repeating_remapped = _remap_macro_variables(pattern)
   incorrect_variables = set(restrictions) - set(variables)
   if incorrect_variables:
     raise KeyError('Some variables specified in restrictions were missing. '
                    'Did you misplace a "$"? Missing variables: %r' %
                    incorrect_variables)
 
-  submatchers = {
-      new_name: base_matchers.Anything() for new_name in anonymous_remapped
-  }
+  submatchers = {}
+  for new_name in anonymous_remapped:
+    if new_name in repeating_remapped:
+      m = base_matchers.GlobStar()
+    else:
+      m = base_matchers.Anything()
+    submatchers[new_name] = m
+
   for old_name, new_name in variables.items():
+    if new_name in repeating_remapped:
+      raise ValueError(
+          'Repeated variables are not supported:'
+          ' use `$...` (unnamed repeated wildcard)'
+          f' instead of named `${old_name}...`.')
     submatchers[new_name] = base_matchers.Bind(
         old_name,
         restrictions.get(old_name, base_matchers.Anything()),
@@ -286,21 +300,20 @@ def _ast_pattern(tree, variables):
   # but does that even happen IRL?
   # TODO: use a stack.
   if isinstance(tree, list):
-    return base_matchers.ItemsAre([_ast_pattern(e, variables) for e in tree])
+    return base_matchers.Glob([_ast_pattern(e, variables) for e in tree])
   if not isinstance(tree, ast.AST):
     # e.g. the identifier for an ast.Name.
     return base_matchers.Equals(tree)
   if isinstance(tree, ast.Name):
     if tree.id in variables:
       return variables[tree.id]
-  return getattr(ast_matchers,
-                 type(tree).__name__)(
-                     **{
-                         field: _ast_pattern(getattr(tree, field), variables)
-                         for field in type(tree)._fields
-                         # Filter out variable ctx.
-                         if field != 'ctx' or not isinstance(tree, ast.Name)
-                     })
+  kwargs = {
+      field: _ast_pattern(getattr(tree, field), variables)
+      for field in type(tree)._fields
+      # Filter out variable ctx.
+      if field != 'ctx' or not isinstance(tree, ast.Name)
+  }
+  return getattr(ast_matchers, type(tree).__name__)(**kwargs)
 
 
 def _verify_variables(tree, variables):
diff --git a/refex/python/matchers/test_base_matchers.py b/refex/python/matchers/test_base_matchers.py
index 2815485..2978625 100644
--- a/refex/python/matchers/test_base_matchers.py
+++ b/refex/python/matchers/test_base_matchers.py
@@ -685,10 +685,15 @@ def test_repr(self):
     self.assertEqual(
         repr(
             base_matchers.RecursivelyWrapped(
-                ast_matchers.Num(), lambda i: ast_matchers.UnaryOp(
-                    op=ast_matchers.Invert(), operand=i))),
-        'RecursivelyWrapped(_matchers=(Num(n=Anything()),'
-        ' UnaryOp(op=Invert(), operand=_Recurse(...))))')
+                ast_matchers.Num(),
+                lambda i: ast_matchers.UnaryOp(
+                    op=ast_matchers.Invert(), operand=i
+                ),
+            )
+        ),
+        'RecursivelyWrapped(_matchers=[Num(n=Anything()),'
+        ' UnaryOp(op=Invert(), operand=_Recurse(...))])',
+    )
 
   def test_recursive_bindings(self):
     """Recursive matchers cover both recursive/base cases in .bind_variables.
@@ -756,6 +761,13 @@ def test_match_lines(self):
 
 class GlobTest(parameterized.TestCase):
 
+  def test_globstar_in_bad_location(self):
+    with self.assertRaises(TypeError) as cm:
+      base_matchers.AllOf(base_matchers.GlobStar())
+    self.assertIn(
+        'Cannot use a `$...` in `AllOf._matchers`.', str(cm.exception)
+    )
+
   @parameterized.parameters(['abc'], [['a', 'b', 'c']])
   def test_sequence(self, abc_seq):
     self.assertIsNotNone(
diff --git a/refex/python/matchers/test_syntax_matchers.py b/refex/python/matchers/test_syntax_matchers.py
index 0060fa6..0db50f7 100644
--- a/refex/python/matchers/test_syntax_matchers.py
+++ b/refex/python/matchers/test_syntax_matchers.py
@@ -16,7 +16,6 @@
 """Tests for refex.python.matchers.syntax_matchers."""
 
 import textwrap
-import unittest
 from unittest import mock
 
 from absl.testing import absltest
@@ -55,20 +54,21 @@ def test_error_spacing_line(self):
 
   def test_identity(self):
     self.assertEqual(
-        syntax_matchers._remap_macro_variables('a + b'), ('a + b', {}, set())
+        syntax_matchers._remap_macro_variables('a + b'),
+        ('a + b', {}, set(), set()),
     )
 
   def test_remap(self):
     self.assertEqual(
         syntax_matchers._remap_macro_variables('a + $b'),
-        ('a + gensym_b', {'b': 'gensym_b'}, set()),
+        ('a + gensym_b', {'b': 'gensym_b'}, set(), set()),
     )
 
   def test_remap_twice(self):
     # But why would you _do_ this?
     self.assertEqual(
         syntax_matchers._remap_macro_variables('gensym_b + $b'),
-        ('gensym_b + gensym0_b', {'b': 'gensym0_b'}, set()),
+        ('gensym_b + gensym0_b', {'b': 'gensym0_b'}, set(), set()),
     )
 
   def test_remap_doesnt_eat_tokens(self):
@@ -79,7 +79,7 @@ def test_remap_doesnt_eat_tokens(self):
         # columns to regenerate where things should go:
         # 1) eating whitespace: 'gensym_ain b'
         # 2) leavint the $ empty and causing a pahton indent: ' gensym_a in b'
-        ('gensym_a in b', {'a': 'gensym_a'}, set()),
+        ('gensym_a in b', {'a': 'gensym_a'}, set(), set()),
     )
 
   def test_remap_is_noninvasive(self):
@@ -87,11 +87,13 @@ def test_remap_is_noninvasive(self):
     for s in ('# $cash', '"$money"'):
       with self.subTest(s=s):
         self.assertEqual(
-            syntax_matchers._remap_macro_variables(s), (s, {}, set())
+            syntax_matchers._remap_macro_variables(s), (s, {}, set(), set())
         )
 
 
-class ExprPatternTest(matcher_test_util.MatcherTestCase):
+class ExprPatternTest(
+    parameterized.TestCase,
+    matcher_test_util.MatcherTestCase):
 
   def test_nonname(self):
     with self.assertRaises(ValueError) as cm:
@@ -196,7 +198,7 @@ def test_restrictions(self):
     self.assertIsNotNone(m.match(matcher.MatchContext(parsed), expr_match))
     self.assertIsNone(m.match(matcher.MatchContext(parsed), expr_nomatch))
 
-  def test_repeated_variable(self):
+  def test_reused_variable(self):
     self.assertEqual(
         self.get_all_match_strings(
             syntax_matchers.ExprPattern('$x + $x'),
@@ -217,6 +219,45 @@ def test_variable_conflict(self):
                 syntax_matchers.ExprPattern('$x'), base_matchers.Bind('x')),
             '1'), ['1'])
 
+  def test_repeated_wildcard_in_bad_location(self):
+    with self.assertRaises(TypeError) as cm:
+      syntax_matchers.ExprPattern('$... + 3')
+    self.assertIn('Cannot use a `$...` in `BinOp.left`.', str(cm.exception))
+
+  def test_bad_dict_glob(self):
+    """Tests separated globbing of keys and values.
+
+    Under the hood, the AST is separate for keys and values, but
+    this is almost an implementation detail -- not every AST would do this --
+    and definitely surprising.
+
+    We should, for the purpose of globs, pretend it's a list of pairs.
+    """
+
+    parsed = matcher.parse_ast('{key1: value1, key2: value2}', '<string>')
+    expr = parsed.tree.body[0].value
+    m = syntax_matchers.ExprPattern('{$...:$..., key1:value2, $...:$...}')
+    # TODO(b/301637225): This shouldn't match.
+    self.assertIsNotNone(m.match(_FAKE_CONTEXT, expr))
+
+  @parameterized.parameters(
+      'foo(kw=42)',
+      'foo(1, 2, kw=42, *3)',
+      'foo(1, 2, a=3, kw=42, *4, **5)',
+  )
+  def test_call_glob(self, s):
+    """Tests pulling a keyword argument `kw` out of a call."""
+
+    parsed = matcher.parse_ast(s, '<string>')
+    expr = parsed.tree.body[0].value
+    call_matcher = syntax_matchers.ExprPattern(
+        'foo(*$..., **$..., kw=$kw, **$...)'
+    )
+    call_match = call_matcher.match(matcher.MatchContext(parsed), expr)
+    self.assertIsNotNone(call_match)
+    result = call_match.bindings['kw'].value.string
+    self.assertEqual(result, '42')
+
 
 class StmtPatternTest(matcher_test_util.MatcherTestCase):
 
diff --git a/refex/python/python_pattern.py b/refex/python/python_pattern.py
index 3afd714..7811cec 100644
--- a/refex/python/python_pattern.py
+++ b/refex/python/python_pattern.py
@@ -25,32 +25,52 @@
 
 _VARIABLE_REGEX = re.compile(r'\A[a-zA-Z_][a-zA-Z0-9_]*\Z')
 
-# TODO: replace tuple token manipulation with namedtuple manipulation,
-# when we can be Python3-only, in this and its callers.
-# For example:
-#   Py2: print(tok[1])
-#        tok = list(tok); tok[1] = ''; tok = tuple(tok)
-#   Py3: print(tok.string)
-#        tok = tok._replace(string='')
 
-
-def token_pattern(pattern):
+def token_pattern(
+    pattern: str,
+) -> tuple[list[tokenize.TokenInfo], set[int], set[int]]:
   """Tokenizes a source pattern containing metavariables like "$foo".
 
+  The following special forms are accepted:
+
+  * Matches anything once, binding to the name ``foo``: ``$foo``.
+  * Matches anything ``n`` times, binding to the name ``bars``: ``$bars...``.
+  * Matches anything ``n`` times, binding to the name ``_``: ``$...``.
+
+  (The repeated matching syntax makes the assumption that there's no useful
+  place in _Python_ for an identifier to be followed directly by a ``...``. If,
+  one day, Python allows an identifier directly followed by a ``...``, this is
+  possible to handle by extending the pattern syntax to match this via e.g.
+  ``${x}...`` vs ``${x...}``. However, this would not be a desirable state of
+  affairs.)
+
   Args:
     pattern: A Python source pattern.
 
   Returns:
-    (tokenized, metavar_indices).
+    (tokenized, nonrepeating_metavar_indices, repeating_metavar_indices).
     tokenized:
       A list of source tokens, omitting the metavariable marker ($).
-    metavar_indices:
-      A set of token indexes. tokenized[i] is a metavariable token if and only
-      if i is in metavar_indices.
+    nonrepeating_metavar_indices:
+      A set of token indexes. tokenized[i] is a nonrepeating metavariable token
+      if and only if i is in nonrepeating_metavar_indices.
+    repeating_metavar_indices:
+      A set of token indexes. tokenized[i] is a repeating metavariable token
+      if and only if i is in repeating_metavar_indices.
 
   Raises:
     SyntaxError: The pattern can't be parsed.
   """
+  # A note on column spans:
+  #
+  # untokenize() uses the gap between the end_col of the last token and the
+  # start_col of the next token to decide how many spaces to put -- there is no
+  # "space token". As a result, if we do nothing, in the expression `$x...`,
+  # the `$` and the `...` will each be filled with spaces when the `$` and `...`
+  # tokens are removed, even if `x` is replaced with a long token string such
+  # as `foooooo`. To prevent this, we must mutate the start/end cols, resulting
+  # in tokens with a string value shorter than the col span.
+
   # Work around Python 3.6.7's newline requirement.  See b/118359498.
   if pattern.endswith('\n'):
     added_newline = False
@@ -61,14 +81,30 @@ def token_pattern(pattern):
   try:
     tokens = list(tokenize.generate_tokens(io.StringIO(pattern).readline))
   except tokenize.TokenError as e:
-    raise SyntaxError("Couldn't tokenize %r: %s" % (pattern, e))
+    raise SyntaxError("Couldn't tokenize %r: %s" % (pattern, e)) from None
 
   retokenized = []
-  metavar_indices = set()
+  nonrepeating_metavar_indices = set()
+  repeating_metavar_indices = set()
 
   tokens_it = iter(tokens)
   for tok in tokens_it:
-    if tok[1] != '$':
+    if tok[1] == '...':
+      # If the last token was a nonrepeating variable, upgrade it to repeating.
+      # otherwise, add `...`.
+      last_token_index = len(retokenized) - 1
+      if last_token_index in nonrepeating_metavar_indices:
+        last_token = retokenized[last_token_index]
+        if last_token.end != tok.start:
+          raise SyntaxError(
+              f'No spaces allowed between metavariable and `...`: {pattern!r}'
+          )
+        retokenized[last_token_index] = last_token._replace(end=tok.end)
+        nonrepeating_metavar_indices.remove(last_token_index)
+        repeating_metavar_indices.add(last_token_index)
+      else:
+        retokenized.append(tok)
+    elif tok[1] != '$':
       # Just a note: in the presence of errors, even whitespace gets added as
       # error tokens, so we're including that here on purpose.
       retokenized.append(tok)
@@ -79,32 +115,32 @@ def token_pattern(pattern):
       except StopIteration:
         # This should never happen, because we get an ENDMARKER token.
         # But who knows, the token stream may change in the future.
-        raise SyntaxError('Expected variable after $, got EOF')
-      variable = variable_token[1]
-      if not _VARIABLE_REGEX.match(variable):
+        raise SyntaxError('Expected variable after $, got EOF') from None
+      if variable_token.string == '...':
+        is_repeated = True
+        variable_token = variable_token._replace(string='_')
+      elif _VARIABLE_REGEX.match(variable_token.string):
+        is_repeated = False
+      else:
+        raise SyntaxError(
+            'Expected variable name or ``...`` after $, but next token'
+            f" {variable_token.string!r} wasn't /{_VARIABLE_REGEX.pattern}/"
+            ' or ...'
+        )
+
+      if tok.end != variable_token.start:
         raise SyntaxError(
-            "Expected variable after $, but next token (%r) didn't match %s" %
-            (variable, _VARIABLE_REGEX.pattern))
-
-      start_row, start_col = variable_token[2]
-      # untokenize() uses the gap between the end_col of the last token and the
-      # start_col of this token to decide how many spaces to put -- there is no
-      # "space token". As a result, if we do nothing, the place where the "$"
-      # was will become a space. This is usually fine, but causes phantom
-      # indents and syntax errors if the $ was the first character on the line.
-      # e.g. it could not even parse the simple expression "$foo"
-      # To avoid this, we must remove 1 from start_col to make up for it.
-      if tok[2][1] != start_col - 1:
-        # newlines get a NL token, so we only need to worry about columns.
-        raise SyntaxError('No spaces allowed between $ and variable name: %r' %
-                          pattern)
-      variable_token_mut = list(variable_token)
-      variable_token_mut[2] = (start_row, start_col - 1)
-      metavar_indices.add(len(retokenized))
-      retokenized.append(tuple(variable_token_mut))
+            f'No spaces allowed between $ and next token: {pattern!r}'
+        )
+      variable_token = variable_token._replace(start=tok.start)
+      if is_repeated:
+        repeating_metavar_indices.add(len(retokenized))
+      else:
+        nonrepeating_metavar_indices.add(len(retokenized))
+      retokenized.append(variable_token)
 
   # Undo damage required to work around Python 3.6.7's newline requirement
   # See b/118359498 for details.
   if added_newline and len(retokenized) >= 2 and retokenized[-2][1] == '\n':
     del retokenized[-2]
-  return retokenized, metavar_indices
+  return retokenized, nonrepeating_metavar_indices, repeating_metavar_indices
diff --git a/refex/python/syntactic_template.py b/refex/python/syntactic_template.py
index 603cad9..1cbcc21 100644
--- a/refex/python/syntactic_template.py
+++ b/refex/python/syntactic_template.py
@@ -69,7 +69,11 @@ class _LexicalTemplate:
   def __attrs_post_init__(self):
     # Because we have frozen=True, creating values for _tokens and _var_to_i
     # is a little complex, and requires invoking object.__setattr__.
-    tokenized, metavar_indices = python_pattern.token_pattern(self.template)
+    tokenized, metavar_indices, repeating_indices = (
+        python_pattern.token_pattern(self.template)
+    )
+    if repeating_indices:
+      raise ValueError('Repeated substitutions are not yet supported')
     var_to_i = {}
     for i in metavar_indices:
       var = tokenized[i][1]
diff --git a/refex/python/test_python_pattern.py b/refex/python/test_python_pattern.py
index 29ec6c9..0fac579 100644
--- a/refex/python/test_python_pattern.py
+++ b/refex/python/test_python_pattern.py
@@ -26,18 +26,46 @@ class PythonPatternTest(parameterized.TestCase):
 
   @parameterized.parameters('', 'x', 'x y')
   def test_simple_nonpattern(self, pattern):
-    tokenized, _ = python_pattern.token_pattern(pattern)
+    tokenized, nonrepeating, repeating = python_pattern.token_pattern(pattern)
+    self.assertEmpty(nonrepeating)
+    self.assertEmpty(repeating)
     self.assertEqual(tokenize.untokenize(tokenized), pattern)
 
   @parameterized.parameters('$x', 'foo + $x', 'import $x', '$x "$y"', '$x = 0')
-  def test_simple_pattern(self, pattern):
-    tokenized, [metavar_i] = python_pattern.token_pattern(pattern)
+  def test_nonrepeating_pattern(self, pattern):
+    tokenized, [metavar_i], [] = python_pattern.token_pattern(pattern)
     # token text is 'x' -- that's the only variable in the pattern.
     self.assertEqual(tokenized[metavar_i][1], 'x')
     # it round trips to the same string except $x -> x
     self.assertEqual(tokenize.untokenize(tokenized), pattern.replace('$x', 'x'))
 
-  @parameterized.parameters('$1', '$', '$\n', '$[', '$""', '$ x', '$\nx')
+  @parameterized.parameters(
+      '$x...', 'foo + $x...', 'import $x...', '$x... "$y..."', '$x... = 0'
+  )
+  def test_repeating_pattern(self, pattern):
+    tokenized, [], [metavar_i] = python_pattern.token_pattern(pattern)
+    # token text is 'x' -- that's the only variable in the pattern.
+    self.assertEqual(tokenized[metavar_i][1], 'x')
+    # it round trips to the same string except $x... -> x
+    self.assertEqual(
+        tokenize.untokenize(tokenized), pattern.replace('$x...', 'x')
+    )
+
+  @parameterized.parameters(
+      '$...', 'foo + $...', 'import $...', '$... "$..."', '$... = 0'
+  )
+  def test_anonymous_repeating_pattern(self, pattern):
+    tokenized, [], [metavar_i] = python_pattern.token_pattern(pattern)
+    # token text is '_' -- that's the only variable in the pattern.
+    self.assertEqual(tokenized[metavar_i][1], '_')
+    # it round trips to the same string except $... -> _
+    self.assertEqual(
+        tokenize.untokenize(tokenized), pattern.replace('$...', '_', 1)
+    )
+
+  @parameterized.parameters(
+      '$1', '$', '$\n', '$[', '$""', '$ x', '$\nx', '$ ...'
+  )
   def test_syntax_error(self, pattern):
     with self.assertRaises(SyntaxError):
       python_pattern.token_pattern(pattern)
diff --git a/refex/test_cli.py b/refex/test_cli.py
index d935cf4..6087cfe 100644
--- a/refex/test_cli.py
+++ b/refex/test_cli.py
@@ -768,9 +768,31 @@ def test_py_ez_sub_extra_variables(self):
             ['--mode=%s' % mode, '$x', '--sub=$y+$z', os.devnull])
         self.assertEqual(
             message,
-            'The substitution template(s) referenced variables not matched in the Python matcher: `y`, `z`'
+            'The substitution template(s) referenced variables not matched in'
+            ' the Python matcher: `y`, `z`',
         )
 
+  @parameterized.parameters(
+      '[$..., 3, $...]',
+      '[*$..., 3, *$...]',
+      '[$..., 5]',
+      '[1, $...]',
+      '[$..., 1, $..., 2, $..., 3, $..., 4, $..., 5, $...]',
+  )
+  def test_py_ez_sub_glob_list(self, pattern):
+    for mode in ['py.expr', 'py.stmt']:
+      with self.subTest(mode=mode):
+        f = self.create_tempfile(content='[1, 2, 3, 4, 5]\n')
+
+        self.main([
+            '--mode=%s' % mode,
+            pattern,
+            '--sub=good',
+            '-i',
+            f.full_path,
+        ])
+        self.assertEqual(f.read_text(), 'good\n')
+
   @parameterized.parameters(
       # Statement fragments can't be reparsed.
       'for a in foo: pass',
@@ -782,7 +804,8 @@ def test_py_ez_sub_extra_variables(self):
       # and 'bar', two valid lines.
       # But '(foo\n,bar)' and '(foo,\n bar)' can't parse as two expressions,
       # only as one expression.
-      '(foo,\n bar)')
+      '(foo,\n bar)',
+  )
   def test_py_ez_sub_bad_reparse(self, source):
     """Substitution works for suite statements and other parser edge cases."""
     f = self.create_tempfile(content=source)