Skip to content

Commit

Permalink
chore: migrate to own glob parser (#2230)
Browse files Browse the repository at this point in the history
  • Loading branch information
mxschmitt authored Jan 9, 2024
1 parent 73616f4 commit 72de5b3
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 14 deletions.
68 changes: 68 additions & 0 deletions playwright/_impl/_glob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (c) Microsoft Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re

# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping
escaped_chars = {"$", "^", "+", ".", "*", "(", ")", "|", "\\", "?", "{", "}", "[", "]"}


def glob_to_regex(glob: str) -> "re.Pattern[str]":
tokens = ["^"]
in_group = False

i = 0
while i < len(glob):
c = glob[i]
if c == "\\" and i + 1 < len(glob):
char = glob[i + 1]
tokens.append("\\" + char if char in escaped_chars else char)
i += 1
elif c == "*":
before_deep = glob[i - 1] if i > 0 else None
star_count = 1
while i + 1 < len(glob) and glob[i + 1] == "*":
star_count += 1
i += 1
after_deep = glob[i + 1] if i + 1 < len(glob) else None
is_deep = (
star_count > 1
and (before_deep == "/" or before_deep is None)
and (after_deep == "/" or after_deep is None)
)
if is_deep:
tokens.append("((?:[^/]*(?:/|$))*)")
i += 1
else:
tokens.append("([^/]*)")
else:
if c == "?":
tokens.append(".")
elif c == "[":
tokens.append("[")
elif c == "]":
tokens.append("]")
elif c == "{":
in_group = True
tokens.append("(")
elif c == "}":
in_group = False
tokens.append(")")
elif c == "," and in_group:
tokens.append("|")
else:
tokens.append("\\" + c if c in escaped_chars else c)
i += 1

tokens.append("$")
return re.compile("".join(tokens))
4 changes: 2 additions & 2 deletions playwright/_impl/_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import fnmatch
import inspect
import math
import os
Expand Down Expand Up @@ -41,6 +40,7 @@

from playwright._impl._api_structures import NameValue
from playwright._impl._errors import Error, TargetClosedError, TimeoutError
from playwright._impl._glob import glob_to_regex
from playwright._impl._str_utils import escape_regex_flags

if sys.version_info >= (3, 8): # pragma: no cover
Expand Down Expand Up @@ -149,7 +149,7 @@ def __init__(self, base_url: Union[str, None], match: URLMatch) -> None:
if isinstance(match, str):
if base_url and not match.startswith("*"):
match = urljoin(base_url, match)
regex = fnmatch.translate(match)
regex = glob_to_regex(match)
self._regex_obj = re.compile(regex)
elif isinstance(match, Pattern):
self._regex_obj = match
Expand Down
5 changes: 2 additions & 3 deletions tests/async/test_browsercontext_request_fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,9 @@ async def handler_with_header_mods(route: Route) -> None:
await context.route("**/*", handler_with_header_mods)

await page.goto(server.EMPTY_PAGE)
async with page.expect_request("/sleep.zzz") as request_info:
with server.expect_request("/sleep.zzz") as server_request_info:
await page.evaluate("() => fetch('/sleep.zzz')")
request = await request_info.value
values.append(request.headers.get("foo"))
values.append(server_request_info.value.getHeader("foo"))
assert values == ["bar", "bar", "bar"]


Expand Down
45 changes: 45 additions & 0 deletions tests/async/test_interception.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import pytest

from playwright._impl._glob import glob_to_regex
from playwright.async_api import (
Browser,
BrowserContext,
Expand Down Expand Up @@ -1041,3 +1042,47 @@ async def handle_request(route: Route) -> None:
assert response
assert response.status == 200
assert await response.json() == {"foo": "bar"}


async def test_glob_to_regex() -> None:
assert glob_to_regex("**/*.js").match("https://localhost:8080/foo.js")
assert not glob_to_regex("**/*.css").match("https://localhost:8080/foo.js")
assert not glob_to_regex("*.js").match("https://localhost:8080/foo.js")
assert glob_to_regex("https://**/*.js").match("https://localhost:8080/foo.js")
assert glob_to_regex("http://localhost:8080/simple/path.js").match(
"http://localhost:8080/simple/path.js"
)
assert glob_to_regex("http://localhost:8080/?imple/path.js").match(
"http://localhost:8080/Simple/path.js"
)
assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/a.js")
assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/b.js")
assert not glob_to_regex("**/{a,b}.js").match("https://localhost:8080/c.js")

assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpg")
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpeg")
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.png")
assert not glob_to_regex("**/*.{png,jpg,jpeg}").match(
"https://localhost:8080/c.css"
)
assert glob_to_regex("foo*").match("foo.js")
assert not glob_to_regex("foo*").match("foo/bar.js")
assert not glob_to_regex("http://localhost:3000/signin-oidc*").match(
"http://localhost:3000/signin-oidc/foo"
)
assert glob_to_regex("http://localhost:3000/signin-oidc*").match(
"http://localhost:3000/signin-oidcnice"
)

assert glob_to_regex("**/three-columns/settings.html?**id=[a-z]**").match(
"http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah"
)

assert glob_to_regex("\\?") == re.compile(r"^\?$")
assert glob_to_regex("\\") == re.compile(r"^\\$")
assert glob_to_regex("\\\\") == re.compile(r"^\\$")
assert glob_to_regex("\\[") == re.compile(r"^\[$")
assert glob_to_regex("[a-z]") == re.compile(r"^[a-z]$")
assert glob_to_regex("$^+.\\*()|\\?\\{\\}\\[\\]") == re.compile(
r"^\$\^\+\.\*\(\)\|\?\{\}\[\]$"
)
5 changes: 2 additions & 3 deletions tests/async/test_page_request_fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,9 @@ async def handler_with_header_mods(route: Route) -> None:
await page.route("**/*", handler_with_header_mods)

await page.goto(server.EMPTY_PAGE)
async with page.expect_request("/sleep.zzz") as request_info:
with server.expect_request("/sleep.zzz") as server_request_info:
await page.evaluate("() => fetch('/sleep.zzz')")
request = await request_info.value
values.append(request.headers.get("foo"))
values.append(server_request_info.value.getHeader("foo"))
assert values == ["bar", "bar", "bar"]


Expand Down
5 changes: 2 additions & 3 deletions tests/sync/test_browsercontext_request_fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,9 @@ def handler_with_header_mods(route: Route) -> None:
context.route("**/*", handler_with_header_mods)

page.goto(server.EMPTY_PAGE)
with page.expect_request("/sleep.zzz") as request_info:
with server.expect_request("/sleep.zzz") as server_request_info:
page.evaluate("() => fetch('/sleep.zzz')")
request = request_info.value
values.append(request.headers.get("foo"))
values.append(server_request_info.value.getHeader("foo"))
assert values == ["bar", "bar", "bar"]


Expand Down
5 changes: 2 additions & 3 deletions tests/sync/test_page_request_fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,9 @@ def handler_with_header_mods(route: Route) -> None:
page.route("**/*", handler_with_header_mods)

page.goto(server.EMPTY_PAGE)
with page.expect_request("/sleep.zzz") as request_info:
with server.expect_request("/sleep.zzz") as server_request_info:
page.evaluate("() => fetch('/sleep.zzz')")
request = request_info.value
_append_with_return_value(values, request.headers.get("foo"))
_append_with_return_value(values, server_request_info.value.getHeader("foo"))
assert values == ["bar", "bar", "bar"]


Expand Down

0 comments on commit 72de5b3

Please sign in to comment.