Skip to content

Commit

Permalink
Merge pull request #279 from openzim/bare_fragments
Browse files Browse the repository at this point in the history
Do not rewrite URLs composed of just a fragment
  • Loading branch information
benoit74 authored May 24, 2024
2 parents e9ab5e3 + 8a43297 commit 08f713f
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/warc2zim/url_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ def __call__(
try:
item_url = item_url.strip()

# Make case of standalone fragments more straightforward
if item_url.startswith("#"):
return item_url

item_scheme = urlsplit(item_url).scheme
if item_scheme and item_scheme not in ("http", "https"):
return item_url
Expand Down
63 changes: 63 additions & 0 deletions test-website/content/href-to-folder/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="utf-8">
<title>Test website</title>
<link rel="apple-touch-icon" sizes="180x180" href="./icons/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="./icons/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="./icons/favicon-16x16.png">
<link rel="manifest" href="./icons/site.webmanifest">
<link rel="shortcut icon" href="./icons/favicon.ico">
</head>

<body>

<h2>Tests of links to folder instead of document</h2>

<p><a href="./">./</a></p>
<p><a href="../href-to-folder/">../href-to-folder/</a></p>
<p><a href="../../href-to-folder/">../../href-to-folder/</a> (too deep)</p>
<p><a href="#section1">#section1</a></p>
<p><a href="./#section2">#section2</a></p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p id="section1">Section1</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p id="section2">Section2</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>
<p>...</p>

</body>

</html>
1 change: 1 addition & 0 deletions test-website/content/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
<li><a href="./http-return-codes.html">HTTP return codes</a></li>
<li><a href="./base-href.html">Base href</a></li>
<li><a href="./onxxx.html">onxxx HTML events</a></li>
<li><a href="./href-to-folder/">links to folder instead of file</a></li>
</ul>
</body>

Expand Down
21 changes: 21 additions & 0 deletions tests/test_url_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,27 @@
["kiwix.org/foo.html"],
False,
),
(
"https://kiwix.org/a/article/document.html",
"#anchor1",
"#anchor1",
["kiwix.org/a/article/document.html"],
False,
),
(
"https://kiwix.org/a/article/",
"#anchor1",
"#anchor1",
["kiwix.org/a/article/"],
False,
),
(
"https://kiwix.org/a/article/",
"../article/",
"./",
["kiwix.org/a/article/"],
False,
),
],
)
def test_relative_url(
Expand Down

0 comments on commit 08f713f

Please sign in to comment.