From 9bb986cd4d38dc9df94cc97b5addfdc1e2a41c54 Mon Sep 17 00:00:00 2001 From: Max Schmitt Date: Tue, 9 Jan 2024 11:38:39 +0100 Subject: [PATCH] chore: migrate to own glob parser --- playwright/_impl/_glob.py | 68 +++++++++++++++++++ playwright/_impl/_helper.py | 4 +- .../test_browsercontext_request_fallback.py | 5 +- tests/async/test_interception.py | 45 ++++++++++++ tests/async/test_page_request_fallback.py | 5 +- .../test_browsercontext_request_fallback.py | 5 +- tests/sync/test_page_request_fallback.py | 5 +- 7 files changed, 123 insertions(+), 14 deletions(-) create mode 100644 playwright/_impl/_glob.py diff --git a/playwright/_impl/_glob.py b/playwright/_impl/_glob.py new file mode 100644 index 000000000..2d899a789 --- /dev/null +++ b/playwright/_impl/_glob.py @@ -0,0 +1,68 @@ +# Copyright (c) Microsoft Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re + +# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping +escaped_chars = {"$", "^", "+", ".", "*", "(", ")", "|", "\\", "?", "{", "}", "[", "]"} + + +def glob_to_regex(glob: str) -> "re.Pattern[str]": + tokens = ["^"] + in_group = False + + i = 0 + while i < len(glob): + c = glob[i] + if c == "\\" and i + 1 < len(glob): + char = glob[i + 1] + tokens.append("\\" + char if char in escaped_chars else char) + i += 1 + elif c == "*": + before_deep = glob[i - 1] if i > 0 else None + star_count = 1 + while i + 1 < len(glob) and glob[i + 1] == "*": + star_count += 1 + i += 1 + after_deep = glob[i + 1] if i + 1 < len(glob) else None + is_deep = ( + star_count > 1 + and (before_deep == "/" or before_deep is None) + and (after_deep == "/" or after_deep is None) + ) + if is_deep: + tokens.append("((?:[^/]*(?:/|$))*)") + i += 1 + else: + tokens.append("([^/]*)") + else: + if c == "?": + tokens.append(".") + elif c == "[": + tokens.append("[") + elif c == "]": + tokens.append("]") + elif c == "{": + in_group = True + tokens.append("(") + elif c == "}": + in_group = False + tokens.append(")") + elif c == "," and in_group: + tokens.append("|") + else: + tokens.append("\\" + c if c in escaped_chars else c) + i += 1 + + tokens.append("$") + return re.compile("".join(tokens)) diff --git a/playwright/_impl/_helper.py b/playwright/_impl/_helper.py index 1b4902613..b68ad6f0b 100644 --- a/playwright/_impl/_helper.py +++ b/playwright/_impl/_helper.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import asyncio -import fnmatch import inspect import math import os @@ -41,6 +40,7 @@ from playwright._impl._api_structures import NameValue from playwright._impl._errors import Error, TargetClosedError, TimeoutError +from playwright._impl._glob import glob_to_regex from playwright._impl._str_utils import escape_regex_flags if sys.version_info >= (3, 8): # pragma: no cover @@ -149,7 +149,7 @@ def __init__(self, base_url: Union[str, None], match: URLMatch) -> None: if isinstance(match, str): if base_url and not match.startswith("*"): match = urljoin(base_url, match) - regex = fnmatch.translate(match) + regex = glob_to_regex(match) self._regex_obj = re.compile(regex) elif isinstance(match, Pattern): self._regex_obj = match diff --git a/tests/async/test_browsercontext_request_fallback.py b/tests/async/test_browsercontext_request_fallback.py index f3959490b..b198a4ebd 100644 --- a/tests/async/test_browsercontext_request_fallback.py +++ b/tests/async/test_browsercontext_request_fallback.py @@ -185,10 +185,9 @@ async def handler_with_header_mods(route: Route) -> None: await context.route("**/*", handler_with_header_mods) await page.goto(server.EMPTY_PAGE) - async with page.expect_request("/sleep.zzz") as request_info: + with server.expect_request("/sleep.zzz") as server_request_info: await page.evaluate("() => fetch('/sleep.zzz')") - request = await request_info.value - values.append(request.headers.get("foo")) + values.append(server_request_info.value.getHeader("foo")) assert values == ["bar", "bar", "bar"] diff --git a/tests/async/test_interception.py b/tests/async/test_interception.py index 911d7ddd8..01f932360 100644 --- a/tests/async/test_interception.py +++ b/tests/async/test_interception.py @@ -20,6 +20,7 @@ import pytest +from playwright._impl._glob import glob_to_regex from playwright.async_api import ( Browser, BrowserContext, @@ -1041,3 +1042,47 @@ async def handle_request(route: Route) -> None: assert response assert response.status == 200 assert await response.json() == {"foo": "bar"} + + +async def test_glob_to_regex() -> None: + assert glob_to_regex("**/*.js").match("https://localhost:8080/foo.js") + assert not glob_to_regex("**/*.css").match("https://localhost:8080/foo.js") + assert not glob_to_regex("*.js").match("https://localhost:8080/foo.js") + assert glob_to_regex("https://**/*.js").match("https://localhost:8080/foo.js") + assert glob_to_regex("http://localhost:8080/simple/path.js").match( + "http://localhost:8080/simple/path.js" + ) + assert glob_to_regex("http://localhost:8080/?imple/path.js").match( + "http://localhost:8080/Simple/path.js" + ) + assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/a.js") + assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/b.js") + assert not glob_to_regex("**/{a,b}.js").match("https://localhost:8080/c.js") + + assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpg") + assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpeg") + assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.png") + assert not glob_to_regex("**/*.{png,jpg,jpeg}").match( + "https://localhost:8080/c.css" + ) + assert glob_to_regex("foo*").match("foo.js") + assert not glob_to_regex("foo*").match("foo/bar.js") + assert not glob_to_regex("http://localhost:3000/signin-oidc*").match( + "http://localhost:3000/signin-oidc/foo" + ) + assert glob_to_regex("http://localhost:3000/signin-oidc*").match( + "http://localhost:3000/signin-oidcnice" + ) + + assert glob_to_regex("**/three-columns/settings.html?**id=[a-z]**").match( + "http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah" + ) + + assert glob_to_regex("\\?") == re.compile(r"^\?$") + assert glob_to_regex("\\") == re.compile(r"^\\$") + assert glob_to_regex("\\\\") == re.compile(r"^\\$") + assert glob_to_regex("\\[") == re.compile(r"^\[$") + assert glob_to_regex("[a-z]") == re.compile(r"^[a-z]$") + assert glob_to_regex("$^+.\\*()|\\?\\{\\}\\[\\]") == re.compile( + r"^\$\^\+\.\*\(\)\|\?\{\}\[\]$" + ) diff --git a/tests/async/test_page_request_fallback.py b/tests/async/test_page_request_fallback.py index 456c911a3..1cea1204a 100644 --- a/tests/async/test_page_request_fallback.py +++ b/tests/async/test_page_request_fallback.py @@ -164,10 +164,9 @@ async def handler_with_header_mods(route: Route) -> None: await page.route("**/*", handler_with_header_mods) await page.goto(server.EMPTY_PAGE) - async with page.expect_request("/sleep.zzz") as request_info: + with server.expect_request("/sleep.zzz") as server_request_info: await page.evaluate("() => fetch('/sleep.zzz')") - request = await request_info.value - values.append(request.headers.get("foo")) + values.append(server_request_info.value.getHeader("foo")) assert values == ["bar", "bar", "bar"] diff --git a/tests/sync/test_browsercontext_request_fallback.py b/tests/sync/test_browsercontext_request_fallback.py index e653800d7..6feb19942 100644 --- a/tests/sync/test_browsercontext_request_fallback.py +++ b/tests/sync/test_browsercontext_request_fallback.py @@ -174,10 +174,9 @@ def handler_with_header_mods(route: Route) -> None: context.route("**/*", handler_with_header_mods) page.goto(server.EMPTY_PAGE) - with page.expect_request("/sleep.zzz") as request_info: + with server.expect_request("/sleep.zzz") as server_request_info: page.evaluate("() => fetch('/sleep.zzz')") - request = request_info.value - values.append(request.headers.get("foo")) + values.append(server_request_info.value.getHeader("foo")) assert values == ["bar", "bar", "bar"] diff --git a/tests/sync/test_page_request_fallback.py b/tests/sync/test_page_request_fallback.py index 09a3c9845..53570960c 100644 --- a/tests/sync/test_page_request_fallback.py +++ b/tests/sync/test_page_request_fallback.py @@ -162,10 +162,9 @@ def handler_with_header_mods(route: Route) -> None: page.route("**/*", handler_with_header_mods) page.goto(server.EMPTY_PAGE) - with page.expect_request("/sleep.zzz") as request_info: + with server.expect_request("/sleep.zzz") as server_request_info: page.evaluate("() => fetch('/sleep.zzz')") - request = request_info.value - _append_with_return_value(values, request.headers.get("foo")) + _append_with_return_value(values, server_request_info.value.getHeader("foo")) assert values == ["bar", "bar", "bar"]