From a246b8f00e9ec86d6368168e5ef5255b837fe1d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Sun, 18 Feb 2024 16:49:30 +0100 Subject: [PATCH] fixup! feat: Add option to scan and register HTML anchors --- README.md | 88 ++++++++++++++++++++++++++++++- src/mkdocs_autorefs/references.py | 36 +++++++++---- tests/test_references.py | 21 ++++++-- 3 files changed, 130 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index bb60430..e783cdf 100644 --- a/README.md +++ b/README.md @@ -49,4 +49,90 @@ This works the same as [a normal link to that heading](../doc1.md#hello-world). Linking to a heading without needing to know the destination page can be useful if specifying that path is cumbersome, e.g. when the pages have deeply nested paths, are far apart, or are moved around frequently. And the issue is somewhat exacerbated by the fact that [MkDocs supports only *relative* links between pages](https://github.com/mkdocs/mkdocs/issues/1592). -Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. +Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. In such cases, use [Markdown anchors](#markdown-anchors) to add unique aliases to your headings. + +### Markdown anchors + +The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. The syntax is `[](){#id-of-the-anchor}`. First you must enable the feature: + +```yaml +# mkdocs.yml +plugins: + - search + - autorefs + scan_anchors: true + +markdown_extensions: + - attr_list +``` + +Then, add an anchor to a document: + +```md +Somewhere in a document. + +[](){#foobar-paragraph} + +Paragraph about foobar. +``` + +Now you can link to this anchor with the usual syntax: + +```md +Check out the [paragraph about foobar][foobar-pararaph]. +``` + +If you add a Markdown anchor right above a heading, this anchor will redirect to the heading itself: + +```md +[](){#foobar} +## A verbose title about foobar +``` + +Linking to the `foobar` anchor will bring you directly to the heading, not the anchor itself, so the URL will show `#a-verbose-title-about-foobar` instead of `#foobar`. These anchors therefore act as "aliases" for headings. It is possible to define multiple aliases per heading: + +```md +[](){#contributing} +[](){#development-setup} +## How to contribute to the project? +``` + +Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading was undefined behavior (it could lead to any one of the headings, undeterministically). With unique aliases above headings, you can make sure to link to the right heading. + +For example, consider the following setup. You have one document per operating system describing how to install a project with the OS package manager or from sources: + +``` +docs/ + install/ + arch.md + debian.md + gentoo.md +``` + +Each page has: + +```md +## Install with package manager + +... + +## Install from sources + +... +``` + +You don't want to change headings and make them redundant, like `## Arch: Install with package manager` and `## Debian: Install with package manager` just to be able to reference the right one with autorefs. Instead you can do this: + +```md +[](){#arch-install-pkg} +## Install with package manager + +... + +[](){#arch-install-src} +## Install from sources + +... +``` + +...changing `arch` by `debian`, `gentoo`, etc. in the other pages. diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index bcdd6a9..76c7b77 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -222,23 +222,39 @@ def run(self, root: Element) -> None: # noqa: D102 self._scan_anchors(root) @staticmethod - def _slugify(value: str, separator: str = "-") -> str: + def _slug(value: str, separator: str = "-") -> str: value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii") value = re.sub(r"[^\w\s-]", "", value.lower()) return re.sub(r"[-_\s]+", separator, value).strip("-_") - def _scan_anchors(self, parent: Element) -> str | None: - hid = None + def _scan_anchors(self, parent: Element) -> list[str]: + ids = [] + # We iterate on pairs of elements, to check if the next element is a heading (alias feature). for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): if el.tag == "a": - hid = el.get("id") - elif el.tag == "p" and (hid := self._scan_anchors(el)): - href = (next_el.get("id") or self._slugify(next_el.text or "")) if next_el.tag in self._htags else "" - self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] - hid = None + # We found an anchor. Record its id if it has one. + if hid := el.get("id"): + if el.tail and el.tail.strip(): + # If the anchor has a non-whitespace-only tail, it's not an alias: + # register it immediately. + self.plugin.register_anchor(self.plugin.current_page, hid) # type: ignore[arg-type] + else: + # Else record its id and continue. + ids.append(hid) + elif el.tag == "p": + if ids := self._scan_anchors(el): + # Markdown anchors are always rendered as `a` tags within a `p` tag. + # Headings therefore appear after the `p` tag. Here the current element + # is a `p` tag and it contains at least one anchor with an id. + # We can check if the next element is a heading, and use its id as href. + href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else "" + for hid in ids: + self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] + ids.clear() else: - hid = self._scan_anchors(el) - return hid + # Recurse into sub-elements. + ids = self._scan_anchors(el) + return ids class AutorefsExtension(Extension): diff --git a/tests/test_references.py b/tests/test_references.py index 061b002..7a68e09 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -229,8 +229,8 @@ def test_external_references() -> None: assert unmapped == [] -def test_register_html_anchors() -> None: - """Check that HTML anchors are registered when enabled.""" +def test_register_markdown_anchors() -> None: + """Check that Markdown anchors are registered when enabled.""" plugin = AutorefsPlugin() md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(plugin)]) plugin.current_page = "" @@ -238,14 +238,27 @@ def test_register_html_anchors() -> None: dedent( """ [](){#foo} - ## Heading + ## Heading foo Paragraph 1. [](){#bar} Paragraph 2. + + [](){#alias1} + [](){#alias2} + ## Heading bar + + [](){#alias3} + Text. + [](){#alias4} + ## Heading baz """, ), ) - assert plugin._url_map["foo"] == "#heading" + assert plugin._url_map["foo"] == "#heading-foo" assert plugin._url_map["bar"] == "#bar" + assert plugin._url_map["alias1"] == "#heading-bar" + assert plugin._url_map["alias2"] == "#heading-bar" + assert plugin._url_map["alias3"] == "#alias3" + assert plugin._url_map["alias4"] == "#heading-baz"