Skip to content

Commit

Permalink
Merge pull request #310 from openzim/upgrade_deps
Browse files Browse the repository at this point in the history
Upgrade deps + fix rewrite mode
  • Loading branch information
benoit74 authored Jun 13, 2024
2 parents 8bae7be + 3a6f84e commit 523665a
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ repos:
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.3
rev: v0.4.8
hooks:
- id: ruff
- repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.361
rev: v1.1.367
hooks:
- id: pyright
name: pyright (system)
Expand Down
2 changes: 1 addition & 1 deletion openzim.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ execute_after=[

[files.assets.actions."wombat.js"]
action="get_file"
source="https://cdn.jsdelivr.net/npm/@webrecorder/[email protected].0/dist/wombat.js"
source="https://cdn.jsdelivr.net/npm/@webrecorder/[email protected].5/dist/wombat.js"
target_file="wombat.js"

[files.assets.actions."wombatSetup.js"] # fallback if this script has not been properly build (should happen only in dev)
Expand Down
18 changes: 9 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[build-system]
# jinja2 is required to generate JS and Python rules at build time
requires = ["hatchling", "hatch-openzim==0.2.1", "jinja2==3.1.3"]
requires = ["hatchling", "hatch-openzim==0.2.1", "jinja2==3.1.4"]
build-backend = "hatchling.build"

[project]
Expand All @@ -10,16 +10,16 @@ description = "Convert WARC to ZIM"
readme = "README.md"
dependencies = [
"warcio==1.7.4",
"requests==2.31.0",
"requests==2.32.3",
"zimscraperlib==3.3.2",
"jinja2==3.1.3",
"jinja2==3.1.4",
"chardet==5.2.0",
# to support possible brotli content in warcs, must be added separately
"brotlipy==0.7.0",
"cdxj_indexer==1.4.5",
"tinycss2==1.3.0",
"beautifulsoup4==4.12.3", # used to parse base href
"lxml==5.2.1", # used by beautifulsoup4 for parsing html
"lxml==5.2.2", # used by beautifulsoup4 for parsing html
]
dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]

Expand All @@ -39,17 +39,17 @@ scripts = [
]
lint = [
"black==24.4.2",
"ruff==0.4.3",
"ruff==0.4.8",
]
check = [
"pyright==1.1.361",
"pyright==1.1.367",
]
test = [
"pytest==8.2.0",
"coverage==7.5.0",
"pytest==8.2.2",
"coverage==7.5.3",
]
dev = [
"pre-commit==3.6.2",
"pre-commit==3.7.1",
"debugpy==1.8.1",
"warc2zim[scripts]",
"warc2zim[lint]",
Expand Down
6 changes: 4 additions & 2 deletions src/warc2zim/content_rewriting/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def get_rewrite_mode(self, record, mimetype):
f"mimetype: {mimetype}, resourcetype: {resourcetype})"
)

return resourcetype_rewrite_mode

def get_resourcetype_rewrite_mode(self, record, resourcetype, mimetype):
"""Get current record rewrite mode based on WARC-Resource-Type and mimetype"""

Expand All @@ -160,12 +162,12 @@ def get_resourcetype_rewrite_mode(self, record, resourcetype, mimetype):
if resourcetype == "stylesheet":
return "css"

if resourcetype in ["script", "fetch"] and (
if resourcetype in ["script", "fetch", "xhr"] and (
mimetype == "application/json" or self.path.value.endswith(".json")
):
return "json"

if resourcetype == "script" and mimetype in [
if resourcetype in ["script", "xhr"] and mimetype in [
"text/javascript",
"application/javascript",
"application/x-javascript",
Expand Down
28 changes: 28 additions & 0 deletions tests/test_warc_to_zim.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,3 +796,31 @@ def test_redirection_loops(self, tmp_path):
self.assert_item_does_not_exist(
zim_output, f"website.test.openzim.org/{ignored_website_items}"
)

def test_content_resource_types(self, tmp_path):
zim_output = "tests_en_content-resource-types.zim"

main(
[
os.path.join(TEST_DATA_DIR, "content-resource-types.warc.gz"),
"--output",
str(tmp_path),
"--zim-file",
zim_output,
"--name",
"tests_en_content-resource-types",
]
)
zim_output = tmp_path / zim_output

res = self.get_article(
zim_output, "website.test.openzim.org/content-types/index.html"
)
assert b"<!-- WB Insert -->" in res # simple check that rewriting has been done

for js_file in [
"website.test.openzim.org/content-types/script1.js",
"website.test.openzim.org/content-types/script2.js",
]:
res = self.get_article(zim_output, js_file)
assert b"wombat" in res # simple check that rewriting has been done

0 comments on commit 523665a

Please sign in to comment.