Skip to content

Commit

Permalink
fixup! feat: Add option to scan and register HTML anchors
Browse files Browse the repository at this point in the history
  • Loading branch information
pawamoy committed Feb 17, 2024
1 parent ef77542 commit 38cdd64
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 46 deletions.
9 changes: 0 additions & 9 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,10 +1 @@
--8<-- "CHANGELOG.md"

[](#hello){#hello2}

## Hello

Hello.

Link to [Hello 1][hello1].
Link to [Hello 2][hello2].
10 changes: 4 additions & 6 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
--8<-- "README.md"

[](#hello){#hello1}
[](){#hello}
## Hello world

## Hello
helllo.

Hello.

Link to [Hello 1][hello1].
Link to [Hello 2][hello2].
[hello][hello]
28 changes: 22 additions & 6 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from __future__ import annotations

import re
import unicodedata
from html import escape, unescape
from typing import TYPE_CHECKING, Any, Callable, Match, Tuple
from itertools import zip_longest
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element

Expand Down Expand Up @@ -204,6 +206,8 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
class AnchorScannerTreeProcessor(Treeprocessor):
"""Tree processor to scan and register HTML anchors."""

_htags: ClassVar[set[str]] = {"h1", "h2", "h3", "h4", "h5", "h6"}

def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
"""Initialize the tree processor.
Expand All @@ -217,12 +221,24 @@ def run(self, root: Element) -> None: # noqa: D102
if self.plugin.current_page is not None:
self._scan_anchors(root)

def _scan_anchors(self, parent: Element) -> None:
for el in parent:
if el.tag == "a" and (hid := el.get("id")):
self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#")) # type: ignore[arg-type]
@staticmethod
def _slugify(value: str, separator: str = "-") -> str:
value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii")
value = re.sub(r"[^\w\s-]", "", value.lower())
return re.sub(r"[-_\s]+", separator, value).strip("-_")

def _scan_anchors(self, parent: Element) -> str | None:
hid = None
for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")):
if el.tag == "a":
hid = el.get("id")
elif el.tag == "p" and (hid := self._scan_anchors(el)):
href = (next_el.get("id") or self._slugify(next_el.text or "")) if next_el.tag in self._htags else ""
self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type]
hid = None
else:
self._scan_anchors(el)
hid = self._scan_anchors(el)
return hid


class AutorefsExtension(Extension):
Expand Down
24 changes: 0 additions & 24 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,27 +60,3 @@ def test_dont_make_relative_urls_relative_again() -> None:
plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",))
== "../../foo/bar/baz.html#foo.bar.baz"
)


def test_register_html_anchors() -> None:
"""Check that HT?ML anchors are registered when enabled."""
plugin = AutorefsPlugin()
plugin.scan_toc = False
plugin.scan_anchors = True

class Page:
url = "/page/url"

plugin.on_page_content(
"""
<a id="foo.bar">
<a href="#foo.baz">
<a id="foo.qux" href="#fooqux">
<a href="quxfoo" id="qux.foo">
""",
page=Page(), # type: ignore[arg-type]
)
assert "foo.bar" in plugin._url_map
assert "foo.baz" not in plugin._url_map
assert "foo.qux" in plugin._url_map
assert "qux.foo" in plugin._url_map
28 changes: 27 additions & 1 deletion tests/test_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

from __future__ import annotations

from functools import partial
from textwrap import dedent

import markdown
import pytest

from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
from mkdocs_autorefs.plugin import AutorefsPlugin
from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url


@pytest.mark.parametrize(
Expand Down Expand Up @@ -224,3 +228,25 @@ def test_external_references() -> None:
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external" href="https://example.com">example</a>'
assert unmapped == []


def test_register_html_anchors() -> None:
"""Check that HTML anchors are registered when enabled."""
plugin = AutorefsPlugin()
md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(partial(AnchorScannerTreeProcessor, plugin))])
plugin.current_page = ""
md.convert(
dedent(
"""
[](){#foo}
## Heading
Paragraph 1.
[](){#bar}
Paragraph 2.
""",
),
)
assert plugin._url_map["foo"] == "#heading"
assert plugin._url_map["bar"] == "#bar"

0 comments on commit 38cdd64

Please sign in to comment.