Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Wikicode.matches() handle namespaces. #335

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ htmlcov/
compile_commands.json
.idea/
.pytest_cache/
.venv/
32 changes: 28 additions & 4 deletions src/mwparserfromhell/wikicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,19 +516,43 @@ def matches(self, other):
adjusted. Specifically, whitespace and markup is stripped and the first
letter's case is normalized. Typical usage is
``if template.name.matches("stub"): ...``.

If either side has any colons, everything before the last colon is taken to be
a namespace and/or interwiki prefix. The parts before and after the colon are
normalized and compared separately; both must match for the result to be True.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not make sense - if something is not a valid namespace/interwiki prefix, it should not be normalized.

"""
normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
this = normalize(self.strip_code().strip())
this = self.strip_code().strip()
this_prefix, this_postfix = self._split_and_normalize(this)

if isinstance(other, (str, bytes, Wikicode, Node)):
that = parse_anything(other).strip_code().strip()
return this == normalize(that)
that_prefix, that_postfix = self._split_and_normalize(that)
return (this_prefix, this_postfix) == (that_prefix, that_postfix)

for obj in other:
that = parse_anything(obj).strip_code().strip()
if this == normalize(that):
that_prefix, that_postfix = self._split_and_normalize(that)
if (this_prefix, this_postfix) == (that_prefix, that_postfix):
return True
return False

def _split_and_normalize(self, s):
"""Split a page title into a prefix (everything before the last colon)
and a postfix (everything after the last colon). Both parts are normalized
according to the rules specific to that part (the prefix is case-insensitive,
while the postfix is only case insensitive in the first character) before being
returned.

If there is no prefix, the returned prefix is an empty string.
"""
normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
m = re.match(r'(.*):(.*)', s)
if m:
return normalize(m[1]).lower(), normalize(m[2])
else:
return "", normalize(s)



def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None):
"""Iterate over nodes in our list matching certain conditions.
Expand Down
9 changes: 9 additions & 0 deletions tests/test_wikicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,8 @@ def test_matches():
code3 = parse("Hello world!")
code4 = parse("World,_hello?")
code5 = parse("")
code6 = parse("File:Foo")
code7 = parse("Talk:foo")
assert code1.matches("Cleanup") is True
assert code1.matches("cleanup") is True
assert code1.matches(" cleanup\n") is True
Expand All @@ -386,6 +388,13 @@ def test_matches():
assert code5.matches("") is True
assert code5.matches("<!-- nothing -->") is True
assert code5.matches(("a", "b", "")) is True
assert code6.matches("File:Foo") is True
assert code6.matches("File:foo") is True
assert code6.matches("FILE:FOO") is False
assert code6.matches("file:foo") is True
assert code6.matches("FiLe:foo") is True
assert code6.matches("FiLE:Foo") is True
assert code7.matches("Talk:Foo") is True


def test_filter_family():
Expand Down