From 4749161272e7ff4987606cd9322846f2d6e25980 Mon Sep 17 00:00:00 2001
From: benoit74 <benoit74@users.noreply.github.com>
Date: Tue, 8 Oct 2024 09:14:26 +0000
Subject: [PATCH] Apply proper CSS for proper page display - step 1

This first step takes care of CSS stylesheets which are in an external
file (two indeed, one for screen and one for print).

It does not consider inline CSS which is needed and will be handled in
a step 2.
---
 CONTRIBUTING.md                          |   4 +-
 scraper/pyproject.toml                   |   1 +
 scraper/src/libretexts2zim/client.py     |  34 ++-
 scraper/src/libretexts2zim/css.py        | 137 +++++++++++
 scraper/src/libretexts2zim/processor.py  |  36 +++
 scraper/src/libretexts2zim/utils.py      |  30 +++
 scraper/tests-integration/test_client.py |  16 ++
 scraper/tests/test_css.py                | 294 +++++++++++++++++++++++
 scraper/tests/test_utils.py              |  59 +++++
 zimui/index.html                         |  10 +-
 10 files changed, 614 insertions(+), 7 deletions(-)
 create mode 100644 scraper/src/libretexts2zim/css.py
 create mode 100644 scraper/src/libretexts2zim/utils.py
 create mode 100644 scraper/tests/test_css.py
 create mode 100644 scraper/tests/test_utils.py

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4205369..d4cefca 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -24,10 +24,10 @@ To achieve this, first build the Docker image based on current code base.
 docker build -t local-libretexts2zim .
 ```
 
-Scrape a library (here we use the [Geosciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments).
+Scrape a library (here we use the [Geosciences](https://geo.libretexts.org) library, with only page id 28207 and its children but you could use any other one of interest for your UI developments).
 
 ```
-docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" --overwrite
+docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" --root-page-id 28207 --overwrite
 ```
 
 Extract interesting ZIM content and move it to `public` folder.
diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml
index fdbd0de..97b2ec8 100644
--- a/scraper/pyproject.toml
+++ b/scraper/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
   "beautifulsoup4==4.12.3",
   "types-beautifulsoup4==4.12.0.20240907",
   "lxml==5.3.0",
+  "tinycss2==1.3.0",
 ]
 dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]
 
diff --git a/scraper/src/libretexts2zim/client.py b/scraper/src/libretexts2zim/client.py
index a08952d..7d74402 100644
--- a/scraper/src/libretexts2zim/client.py
+++ b/scraper/src/libretexts2zim/client.py
@@ -22,6 +22,8 @@ class LibreTextsParsingError(Exception):
 class LibreTextsHome(BaseModel):
     welcome_text_paragraphs: list[str]
     welcome_image_url: str
+    screen_css_url: str
+    print_css_url: str
 
 
 LibraryPageId = str
@@ -206,6 +208,8 @@ def get_home(self) -> LibreTextsHome:
         return LibreTextsHome(
             welcome_text_paragraphs=_get_welcome_text_from_home(soup),
             welcome_image_url=_get_welcome_image_url_from_home(soup),
+            screen_css_url=_get_screen_css_url_from_home(soup),
+            print_css_url=_get_print_css_url_from_home(soup),
         )
 
     def get_deki_token(self) -> str:
@@ -308,7 +312,7 @@ def get_page_content(self, page: LibraryPage) -> LibraryPageContent:
         if tree["body"][1]["@target"] != "toc":
             raise LibreTextsParsingError(
                 f"Unexpected second body element of /pages/{page.id}/contents, "
-                f"@target property is '{tree["body"][1]["@target"]}' while only 'toc' "
+                f"@target property is '{tree['body'][1]['@target']}' while only 'toc' "
                 "is expected"
             )
         return LibraryPageContent(html_body=tree["body"][0])
@@ -373,3 +377,31 @@ def _get_deki_token_from_home(soup: BeautifulSoup) -> str:
             "Failed to retrieve API token to query website API, missing apiToken."
         )
     return x_deki_token
+
+
+def _get_any_css_url_from_home(soup: BeautifulSoup, media: str) -> str:
+    """Returns the URL of any media CSS found on home page
+
+    This function expects there is only one <style /> with a media attribute per page
+    and returns the URL of this tag. This is is the case on libretexts.org as of October
+    2024, might be a bit fragile.
+    """
+    links = soup.find_all("link", {"rel": "stylesheet", "media": media})
+    if len(links) != 1:
+        raise LibreTextsParsingError(
+            f"Failed to find {media} CSS URL in home page, {len(links)} link(s) found"
+        )
+    css_url = links[0].get("href", None)
+    if not css_url:
+        raise LibreTextsParsingError("screen CSS link has no href")
+    return css_url
+
+
+def _get_screen_css_url_from_home(soup: BeautifulSoup) -> str:
+    """Returns the URL of screen CSS found on home page"""
+    return _get_any_css_url_from_home(soup, "screen")
+
+
+def _get_print_css_url_from_home(soup: BeautifulSoup) -> str:
+    """Returns the URL of print CSS found on home page"""
+    return _get_any_css_url_from_home(soup, "print")
diff --git a/scraper/src/libretexts2zim/css.py b/scraper/src/libretexts2zim/css.py
new file mode 100644
index 0000000..64e075d
--- /dev/null
+++ b/scraper/src/libretexts2zim/css.py
@@ -0,0 +1,137 @@
+from collections.abc import Iterable
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+
+from tinycss2 import ast, parse_stylesheet_bytes, serialize  # pyright: ignore
+from tinycss2.serializer import serialize_url  # pyright: ignore
+
+from libretexts2zim.utils import get_asset_path_from_url
+
+OriginalUrl = str
+FullZimPath = Path
+RelativeCssPath = Path
+
+
+class CssProcessor:
+    """Utility to to process CSS, extract assets and rewrite URLs
+
+    This utility can process multiple CSS documents that will be stored in a ZIM
+    It extracts the list of assets (images, fonts) that are used in the CSS documents
+    and compute appropriate ZIM paths for each of them.
+
+    Arguments:
+      css_target_path: "folder" where the CSS documents that will be processed will be
+        stored in the ZIM
+      css_assets_root_path: "folder" where the CSS assets referenced in the CSS
+        documents will be stored in the ZIM
+    """
+
+    def __init__(
+        self,
+        css_target_path: Path = Path("/content"),
+        css_assets_root_path: Path = Path("/content/css_assets"),
+    ) -> None:
+        self.css_target_path = css_target_path
+        self.css_assets_root_path = css_assets_root_path
+        self.css_assets: dict[OriginalUrl, FullZimPath] = {}
+        self.used_paths: list[RelativeCssPath] = []
+
+    def process(self, css_original_url: str, css_content: bytes) -> str:
+        """Rewrite CSS rules and update list of assets to fetch
+
+        This function updates the CSS rules to target assets path inside the ZIM
+        It also updates the list of `css_assets` which is the list of online resources
+        referenced inside the ZIM and which should be fetched and stored inside the ZIM
+        for proper CSS operation.
+        """
+        rules, _ = parse_stylesheet_bytes(  # pyright: ignore[reportUnknownVariableType]
+            css_content
+        )
+        self._process_list(
+            css_original_url,
+            rules,  # pyright: ignore[reportUnknownArgumentType]
+        )
+        return serialize(rules)
+
+    def _process_url(
+        self, css_original_url: str, css_url: str
+    ) -> RelativeCssPath | None:
+        """Process a URL which has been found in CSS rules
+
+        - Transforms the URL into a ZIM path
+        - Updates the list of assets to retrieve
+        """
+        original_url = urljoin(css_original_url, css_url)
+        original_url_parsed = urlparse(original_url)
+        if original_url_parsed.scheme.lower() not in ["http", "https"]:
+            return None
+        if original_url in self.css_assets:
+            return self.css_assets[original_url].relative_to(self.css_target_path)
+        relative_path = get_asset_path_from_url(original_url, self.used_paths)
+        self.used_paths.append(relative_path)
+        target_path = self.css_assets_root_path / relative_path
+        self.css_assets[original_url] = target_path
+        return target_path.relative_to(self.css_target_path)
+
+    def _process_node(self, css_original_url: str, node: ast.Node):
+        """Process one single CSS node"""
+        if isinstance(
+            node,
+            ast.QualifiedRule
+            | ast.SquareBracketsBlock
+            | ast.ParenthesesBlock
+            | ast.CurlyBracketsBlock,
+        ):
+            self._process_list(
+                css_original_url,
+                node.content,  # pyright: ignore[reportUnknownArgumentType, reportUnknownMemberType]
+            )
+        elif isinstance(node, ast.FunctionBlock):
+            if node.lower_name == "url":  # pyright: ignore[reportUnknownMemberType]
+                url_node: ast.Node = node.arguments[0]  # pyright: ignore
+                relative_css_path = self._process_url(
+                    css_original_url,
+                    url_node.value,  # pyright: ignore
+                )
+                if not relative_css_path:
+                    return
+                url_node.value = str(relative_css_path)  # pyright: ignore
+                url_node.representation = (  # pyright: ignore
+                    f'"{serialize_url(str(relative_css_path))}"'
+                )
+
+            else:
+                self._process_list(
+                    css_original_url,
+                    node.arguments,  # pyright: ignore
+                )
+        elif isinstance(node, ast.AtRule):
+            self._process_list(
+                css_original_url,
+                node.prelude,  # pyright: ignore
+            )
+            self._process_list(
+                css_original_url,
+                node.content,  # pyright: ignore
+            )
+        elif isinstance(node, ast.Declaration):
+            self._process_list(
+                css_original_url,
+                node.value,  # pyright: ignore
+            )
+        elif isinstance(node, ast.URLToken):
+            relative_css_path = self._process_url(
+                css_original_url,
+                node.value,  # pyright: ignore
+            )
+            if not relative_css_path:
+                return
+            node.value = str(relative_css_path)
+            node.representation = f"url({serialize_url(str(relative_css_path))})"
+
+    def _process_list(self, css_original_url: str, nodes: Iterable[ast.Node] | None):
+        """Process a list of CSS nodes"""
+        if not nodes:
+            return
+        for node in nodes:
+            self._process_node(css_original_url, node)
diff --git a/scraper/src/libretexts2zim/processor.py b/scraper/src/libretexts2zim/processor.py
index 93a6bad..eafcf26 100644
--- a/scraper/src/libretexts2zim/processor.py
+++ b/scraper/src/libretexts2zim/processor.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 
 from pydantic import BaseModel
+from requests.exceptions import HTTPError
 from zimscraperlib.download import (
     stream_file,  # pyright: ignore[reportUnknownVariableType]
 )
@@ -21,6 +22,7 @@
     LibreTextsMetadata,
 )
 from libretexts2zim.constants import LANGUAGE_ISO_639_3, NAME, ROOT_DIR, VERSION, logger
+from libretexts2zim.css import CssProcessor
 from libretexts2zim.ui import (
     ConfigModel,
     PageContentModel,
@@ -261,11 +263,45 @@ def run(self) -> Path:
 
             logger.info("  Fetching and storing home page...")
             home = self.libretexts_client.get_home()
+
             welcome_image = BytesIO()
             stream_file(home.welcome_image_url, byte_stream=welcome_image)
             add_item_for(creator, "content/logo.png", content=welcome_image.getvalue())
             del welcome_image
 
+            css_processor = CssProcessor()
+            screen_css = BytesIO()
+            stream_file(home.screen_css_url, byte_stream=screen_css)
+            result = css_processor.process(
+                css_original_url=home.screen_css_url, css_content=screen_css.getvalue()
+            )
+            add_item_for(creator, "content/screen.css", content=result)
+            del screen_css
+
+            print_css = BytesIO()
+            stream_file(home.print_css_url, byte_stream=print_css)
+            result = css_processor.process(
+                css_original_url=home.print_css_url, css_content=print_css.getvalue()
+            )
+            add_item_for(creator, "content/print.css", content=result)
+            del print_css
+
+            logger.info(f"  Retrieving {len(css_processor.css_assets)} CSS assets...")
+            for asset_url, asset_path in css_processor.css_assets.items():
+                try:
+                    css_asset = BytesIO()
+                    stream_file(asset_url, byte_stream=css_asset)
+                    add_item_for(
+                        creator, str(asset_path)[1:], content=css_asset.getvalue()
+                    )
+                    logger.debug(f"Adding {asset_url} to {asset_path} in the ZIM")
+                    del css_asset
+                except HTTPError as exc:
+                    # would make more sense to be a warning, but this is just too
+                    # verbose, at least on geo.libretexts.org many assets are just
+                    # missing
+                    logger.debug(f"Ignoring {asset_path} due to {exc}")
+
             logger.info(f"Adding Vue.JS UI files in {self.zimui_dist}")
             for file in self.zimui_dist.rglob("*"):
                 if file.is_dir():
diff --git a/scraper/src/libretexts2zim/utils.py b/scraper/src/libretexts2zim/utils.py
new file mode 100644
index 0000000..ca05336
--- /dev/null
+++ b/scraper/src/libretexts2zim/utils.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from urllib.parse import urlparse
+
+
+def get_asset_path_from_url(online_url: str, already_used_paths: list[Path]) -> Path:
+    """Computes the path where one should store its asset based on its online URL
+
+    This function try to:
+    - preserve as much the online path as possible
+    - simplify filename (e.g. dropping querystring) to simply ZimPath
+    """
+    original_path = Path(urlparse(online_url).path)
+    target_parent = Path(
+        *[
+            parent.name
+            for parent in reversed(original_path.parents)
+            if parent.name and parent.name != ".."
+        ]
+    )
+
+    index = 0
+    while True:
+        relative_path = (
+            target_parent / f"{original_path.stem}{'_' + str(index) if index else ''}"
+            f"{original_path.suffix}"
+        )
+        if relative_path not in already_used_paths:
+            break
+        index += 1
+    return relative_path
diff --git a/scraper/tests-integration/test_client.py b/scraper/tests-integration/test_client.py
index e6a0251..ca3eed2 100644
--- a/scraper/tests-integration/test_client.py
+++ b/scraper/tests-integration/test_client.py
@@ -134,3 +134,19 @@ def test_get_home_welcome_text_paragraphs(
 def test_get_home_page_content(client: LibreTextsClient, page_tree: LibraryTree):
     """Ensures we can get content of root page"""
     assert client.get_page_content(page_tree.root).html_body
+
+
+def test_get_home_screen_css_url(home: LibreTextsHome):
+    """Ensures proper screen CSS url is retrieved"""
+    assert (
+        home.screen_css_url
+        == "https://a.mtstatic.com/@cache/layout/anonymous.css?_=715eca8811db7abb8e6f0555936e020d_Z2VvLmxpYnJldGV4dHMub3Jn:site_4038"
+    )
+
+
+def test_get_home_print_css_url(home: LibreTextsHome):
+    """Ensures proper print CSS url is retrieved"""
+    assert (
+        home.print_css_url
+        == "https://a.mtstatic.com/@cache/layout/print.css?_=99d83fb44eaebe60981933ec554d138d:site_4038"
+    )
diff --git a/scraper/tests/test_css.py b/scraper/tests/test_css.py
new file mode 100644
index 0000000..e6dcc0b
--- /dev/null
+++ b/scraper/tests/test_css.py
@@ -0,0 +1,294 @@
+from pathlib import Path
+
+import pytest
+
+from libretexts2zim.css import CssProcessor
+
+
+@pytest.mark.parametrize(
+    "css_document_content, css_document_url, expected_assets, expected_css_rewritten",
+    [
+        pytest.param(
+            """
+body {
+    background-image: url('https://example.com/image.jpg');
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {"https://example.com/image.jpg": Path("/content/css_assets/image.jpg")},
+            """
+body {
+    background-image: url("css_assets/image.jpg");
+}
+""",
+            id="basic_full",
+        ),
+        pytest.param(
+            """
+body {
+    background-image: url('/assets/image.jpg');
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {
+                "https://www.acme.com/assets/image.jpg": Path(
+                    "/content/css_assets/assets/image.jpg"
+                )
+            },
+            """
+body {
+    background-image: url("css_assets/assets/image.jpg");
+}
+""",
+            id="basic_absolute",
+        ),
+        pytest.param(
+            """
+body {
+    background-image: url('../image.jpg');
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {"https://www.acme.com/image.jpg": Path("/content/css_assets/image.jpg")},
+            """
+body {
+    background-image: url("css_assets/image.jpg");
+}
+""",
+            id="basic_relative1",
+        ),
+        pytest.param(
+            """
+body {
+    background-image: url('./image.jpg');
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {
+                "https://www.acme.com/styles/image.jpg": Path(
+                    "/content/css_assets/styles/image.jpg"
+                )
+            },
+            """
+body {
+    background-image: url("css_assets/styles/image.jpg");
+}
+""",
+            id="basic_relative2",
+        ),
+        pytest.param(
+            """
+@import url("print.css")
+""",
+            "https://www.acme.com/styles/main.css",
+            {
+                "https://www.acme.com/styles/print.css": Path(
+                    "/content/css_assets/styles/print.css"
+                )
+            },
+            """
+@import url("css_assets/styles/print.css")
+;""",
+            id="import",
+        ),
+        pytest.param(
+            """
+body {
+    background-image: url('https://example.com/image.jpg'), url('/assets/image.jpg');
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {
+                "https://example.com/image.jpg": Path("/content/css_assets/image.jpg"),
+                "https://www.acme.com/assets/image.jpg": Path(
+                    "/content/css_assets/assets/image.jpg"
+                ),
+            },
+            """
+body {
+    background-image: url("css_assets/image.jpg"), url("css_assets/assets/image.jpg");
+}
+""",
+            id="two_backgrounds",
+        ),
+        pytest.param(
+            """
+.ui-widget-content {
+    background: #fff url("https://example.com/banner2.png") 50% 50% repeat-x;
+    color: #222;
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {
+                "https://example.com/banner2.png": Path(
+                    "/content/css_assets/banner2.png"
+                ),
+            },
+            """
+.ui-widget-content {
+    background: #fff url("css_assets/banner2.png") 50% 50% repeat-x;
+    color: #222;
+}
+""",
+            id="complex_1",
+        ),
+        pytest.param(
+            """
+@font-face {
+    font-display: swap;
+    font-family: icomoon;
+    font-style: normal;
+    font-weight: 400;
+    src: url(/@style/icons/icomoon.eot?_=ae123bc);
+    src: url(/@style/icons/icomoon.eot?_=ae123bc#iefix)
+        format("embedded-opentype"),
+        url(/@style/icons/icomoon.woff?_=ae123bc)
+        format("woff"),
+        url(/@style/icons/icomoon.ttf?_=ae123bc)
+        format("truetype"),
+        url(/@style/icons/icomoon.svg?_=ae123bc#icomoon)
+        format("svg");
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {
+                "https://www.acme.com/@style/icons/icomoon.eot?_=ae123bc": Path(
+                    "/content/css_assets/@style/icons/icomoon.eot"
+                ),
+                "https://www.acme.com/@style/icons/icomoon.eot?_=ae123bc#iefix": Path(
+                    "/content/css_assets/@style/icons/icomoon_1.eot"
+                ),
+                "https://www.acme.com/@style/icons/icomoon.woff?_=ae123bc": Path(
+                    "/content/css_assets/@style/icons/icomoon.woff"
+                ),
+                "https://www.acme.com/@style/icons/icomoon.ttf?_=ae123bc": Path(
+                    "/content/css_assets/@style/icons/icomoon.ttf"
+                ),
+                "https://www.acme.com/@style/icons/icomoon.svg?_=ae123bc#icomoon": Path(
+                    "/content/css_assets/@style/icons/icomoon.svg"
+                ),
+            },
+            """
+@font-face {
+    font-display: swap;
+    font-family: icomoon;
+    font-style: normal;
+    font-weight: 400;
+    src: url(css_assets/@style/icons/icomoon.eot);
+    src: url(css_assets/@style/icons/icomoon_1.eot)
+        format("embedded-opentype"),
+        url(css_assets/@style/icons/icomoon.woff)
+        format("woff"),
+        url(css_assets/@style/icons/icomoon.ttf)
+        format("truetype"),
+        url(css_assets/@style/icons/icomoon.svg)
+        format("svg");
+}
+""",
+            id="complex_2",
+        ),
+        pytest.param(
+            """
+body {
+    background-image: url('https://example.com/image.jpg');
+}
+div {
+    background-image: url('https://example.com/image.jpg');
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {"https://example.com/image.jpg": Path("/content/css_assets/image.jpg")},
+            """
+body {
+    background-image: url("css_assets/image.jpg");
+}
+div {
+    background-image: url("css_assets/image.jpg");
+}
+""",
+            id="duplicate",
+        ),
+        pytest.param(
+            """
+.magicBg {
+background-image: url(data:image/gif;base64,R0lGODlhAQBkAPcAAAAAAAEBAQICAgMDAwQEBAUFBQ)
+}
+""",
+            "https://www.acme.com/styles/main.css",
+            {},
+            """
+.magicBg {
+background-image: url(data:image/gif;base64,R0lGODlhAQBkAPcAAAAAAAEBAQICAgMDAwQEBAUFBQ)
+}
+""",
+            id="ignore_data",
+        ),
+    ],
+)
+def test_css_processor_single_doc(
+    css_document_content: str,
+    css_document_url: str,
+    expected_assets: dict[str, Path],
+    expected_css_rewritten: str,
+):
+    processor = CssProcessor()
+    result = processor.process(css_document_url, css_document_content.encode())
+    assert processor.css_assets == expected_assets
+    assert result == expected_css_rewritten
+
+
+def test_css_processor_multiple_docs():
+    doc1 = """
+body {
+    background-image: url('https://example.com/image.jpg'), url('https://example.com/image.jpg?_=test1');
+}
+"""
+    doc2 = """
+div {
+    background-image: url('https://example.com/image.jpg'), url('https://example.com/image.jpg?_=test2');
+}
+"""
+    css_1_url = "https://www.acme.com/styles/main1.css"
+    css_2_url = "https://www.acme.com/styles/main2.css"
+    processor = CssProcessor()
+
+    # process a first document
+    result1 = processor.process(css_original_url=css_1_url, css_content=doc1.encode())
+
+    assert processor.css_assets == {
+        "https://example.com/image.jpg": Path("/content/css_assets/image.jpg"),
+        "https://example.com/image.jpg?_=test1": Path(
+            "/content/css_assets/image_1.jpg"
+        ),
+    }
+
+    assert (
+        result1
+        == """
+body {
+    background-image: url("css_assets/image.jpg"), url("css_assets/image_1.jpg");
+}
+"""
+    )
+
+    # process a second document
+    result2 = processor.process(css_original_url=css_2_url, css_content=doc2.encode())
+
+    assert processor.css_assets == {
+        "https://example.com/image.jpg": Path("/content/css_assets/image.jpg"),
+        "https://example.com/image.jpg?_=test1": Path(
+            "/content/css_assets/image_1.jpg"
+        ),
+        "https://example.com/image.jpg?_=test2": Path(
+            "/content/css_assets/image_2.jpg"
+        ),
+    }
+
+    assert (
+        result2
+        == """
+div {
+    background-image: url("css_assets/image.jpg"), url("css_assets/image_2.jpg");
+}
+"""
+    )
diff --git a/scraper/tests/test_utils.py b/scraper/tests/test_utils.py
new file mode 100644
index 0000000..6e903c1
--- /dev/null
+++ b/scraper/tests/test_utils.py
@@ -0,0 +1,59 @@
+from pathlib import Path
+
+import pytest
+
+from libretexts2zim.utils import get_asset_path_from_url
+
+
+@pytest.mark.parametrize(
+    "online_url, already_used_paths, expected_path",
+    [
+        pytest.param("style.css", [], "style.css", id="simple"),
+        pytest.param("folder/style.css", [], "folder/style.css", id="folder"),
+        pytest.param("style.css", ["style.css"], "style_1.css", id="conflict"),
+        pytest.param(
+            "folder/style.css",
+            ["folder/style.css"],
+            "folder/style_1.css",
+            id="folder_conflict",
+        ),
+        pytest.param(
+            "folder/style.css",
+            ["style.css"],
+            "folder/style.css",
+            id="folder_noconflict",
+        ),
+        pytest.param(
+            "../folder/style.css", [], "folder/style.css", id="relative_parent"
+        ),
+        pytest.param("./folder/style.css", [], "folder/style.css", id="relative_same"),
+        pytest.param("/folder/style.css", [], "folder/style.css", id="absolute"),
+        pytest.param(
+            "/folder/style.css",
+            ["folder/style.css"],
+            "folder/style_1.css",
+            id="conflict_absolute",
+        ),
+        pytest.param(
+            "https://www.acme.com/folder/style.css", [], "folder/style.css", id="full"
+        ),
+        pytest.param(
+            "//www.acme.com/folder/style.css",
+            [],
+            "folder/style.css",
+            id="full_no_scheme",
+        ),
+        pytest.param(
+            "style.css?q=value#fragment",
+            [],
+            "style.css",
+            id="query_string_and_fragment",
+        ),
+    ],
+)
+def test_get_asset_path_from_url(
+    online_url: str, already_used_paths: list[str], expected_path: str
+):
+    assert get_asset_path_from_url(
+        online_url, [Path(path) for path in already_used_paths]
+    ) == Path(expected_path)
diff --git a/zimui/index.html b/zimui/index.html
index a888544..4fccf60 100644
--- a/zimui/index.html
+++ b/zimui/index.html
@@ -1,9 +1,11 @@
-<!DOCTYPE html>
+<!doctype html>
 <html lang="en">
   <head>
-    <meta charset="UTF-8">
-    <link rel="icon" href="/favicon.ico">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta charset="UTF-8" />
+    <link rel="icon" href="./favicon.ico" />
+    <link rel="stylesheet" type="text/css" media="screen" href="./content/screen.css" />
+    <link rel="stylesheet" type="text/css" media="print" href="./content/print.css" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>Vite App</title>
   </head>
   <body>