From 38cdd644ead7e39db25a58b383a7577dc00f4061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Sat, 17 Feb 2024 16:33:19 +0100 Subject: [PATCH] fixup! feat: Add option to scan and register HTML anchors --- docs/changelog.md | 9 --------- docs/index.md | 10 ++++------ src/mkdocs_autorefs/references.py | 28 ++++++++++++++++++++++------ tests/test_plugin.py | 24 ------------------------ tests/test_references.py | 28 +++++++++++++++++++++++++++- 5 files changed, 53 insertions(+), 46 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 3c03394..786b75d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,10 +1 @@ --8<-- "CHANGELOG.md" - -[](#hello){#hello2} - -## Hello - -Hello. - -Link to [Hello 1][hello1]. -Link to [Hello 2][hello2]. diff --git a/docs/index.md b/docs/index.md index 78b84ee..5b450db 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,10 +1,8 @@ --8<-- "README.md" -[](#hello){#hello1} +[](){#hello} +## Hello world -## Hello +helllo. -Hello. - -Link to [Hello 1][hello1]. -Link to [Hello 2][hello2]. +[hello][hello] \ No newline at end of file diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index d5a2538..23f39cb 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -3,8 +3,10 @@ from __future__ import annotations import re +import unicodedata from html import escape, unescape -from typing import TYPE_CHECKING, Any, Callable, Match, Tuple +from itertools import zip_longest +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple from urllib.parse import urlsplit from xml.etree.ElementTree import Element @@ -204,6 +206,8 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str class AnchorScannerTreeProcessor(Treeprocessor): """Tree processor to scan and register HTML anchors.""" + _htags: ClassVar[set[str]] = {"h1", "h2", "h3", "h4", "h5", "h6"} + def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: """Initialize the tree processor. @@ -217,12 +221,24 @@ def run(self, root: Element) -> None: # noqa: D102 if self.plugin.current_page is not None: self._scan_anchors(root) - def _scan_anchors(self, parent: Element) -> None: - for el in parent: - if el.tag == "a" and (hid := el.get("id")): - self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#")) # type: ignore[arg-type] + @staticmethod + def _slugify(value: str, separator: str = "-") -> str: + value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii") + value = re.sub(r"[^\w\s-]", "", value.lower()) + return re.sub(r"[-_\s]+", separator, value).strip("-_") + + def _scan_anchors(self, parent: Element) -> str | None: + hid = None + for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): + if el.tag == "a": + hid = el.get("id") + elif el.tag == "p" and (hid := self._scan_anchors(el)): + href = (next_el.get("id") or self._slugify(next_el.text or "")) if next_el.tag in self._htags else "" + self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] + hid = None else: - self._scan_anchors(el) + hid = self._scan_anchors(el) + return hid class AutorefsExtension(Extension): diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 8fcae75..8acd446 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -60,27 +60,3 @@ def test_dont_make_relative_urls_relative_again() -> None: plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",)) == "../../foo/bar/baz.html#foo.bar.baz" ) - - -def test_register_html_anchors() -> None: - """Check that HT?ML anchors are registered when enabled.""" - plugin = AutorefsPlugin() - plugin.scan_toc = False - plugin.scan_anchors = True - - class Page: - url = "/page/url" - - plugin.on_page_content( - """ - - - - - """, - page=Page(), # type: ignore[arg-type] - ) - assert "foo.bar" in plugin._url_map - assert "foo.baz" not in plugin._url_map - assert "foo.qux" in plugin._url_map - assert "qux.foo" in plugin._url_map diff --git a/tests/test_references.py b/tests/test_references.py index 5a25844..734b1b7 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -2,10 +2,14 @@ from __future__ import annotations +from functools import partial +from textwrap import dedent + import markdown import pytest -from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url +from mkdocs_autorefs.plugin import AutorefsPlugin +from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url @pytest.mark.parametrize( @@ -224,3 +228,25 @@ def test_external_references() -> None: output, unmapped = fix_refs(source, url_map.__getitem__) assert output == 'example' assert unmapped == [] + + +def test_register_html_anchors() -> None: + """Check that HTML anchors are registered when enabled.""" + plugin = AutorefsPlugin() + md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(partial(AnchorScannerTreeProcessor, plugin))]) + plugin.current_page = "" + md.convert( + dedent( + """ + [](){#foo} + ## Heading + + Paragraph 1. + + [](){#bar} + Paragraph 2. + """, + ), + ) + assert plugin._url_map["foo"] == "#heading" + assert plugin._url_map["bar"] == "#bar"