From 5eea2e0eb2d2edc960387285819380d369b53e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Sun, 18 Feb 2024 23:29:27 +0100 Subject: [PATCH] fixup! feat: Add option to scan and register HTML anchors --- README.md | 35 ++++++++++++++++++------------- src/mkdocs_autorefs/plugin.py | 9 +++++++- src/mkdocs_autorefs/references.py | 18 ++++++---------- tests/test_references.py | 2 +- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index e783cdf..7eebcd7 100644 --- a/README.md +++ b/README.md @@ -53,20 +53,29 @@ Note that this plugin's behavior is undefined when trying to link to a heading t ### Markdown anchors -The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. The syntax is `[](){#id-of-the-anchor}`. First you must enable the feature: +The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. + +The syntax is: + +```md +[](){#id-of-the-anchor} +``` + +If you look closely, it starts with the usual syntax for a link, `[]()`, except both the text value and URL of the link are empty. Then we see `{#id-of-the-anchor}`, which is the syntax supported by the [`attr_list`](https://python-markdown.github.io/extensions/attr_list/) extension. It sets an HTML id to the anchor element. The autorefs plugin simply gives a meaning to such anchors with ids. Note that raw HTML anchors like `` are not supported. + +The `attr_list` extension must be enabled for the Markdown anchors feature to work: ```yaml # mkdocs.yml plugins: - search - autorefs - scan_anchors: true markdown_extensions: - attr_list ``` -Then, add an anchor to a document: +Now, you can add anchors to documents: ```md Somewhere in a document. @@ -76,9 +85,11 @@ Somewhere in a document. Paragraph about foobar. ``` -Now you can link to this anchor with the usual syntax: +...making it possible to link to this anchor with our automatic links: ```md +In any document. + Check out the [paragraph about foobar][foobar-pararaph]. ``` @@ -97,27 +108,25 @@ Linking to the `foobar` anchor will bring you directly to the heading, not the a ## How to contribute to the project? ``` -Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading was undefined behavior (it could lead to any one of the headings, undeterministically). With unique aliases above headings, you can make sure to link to the right heading. +Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading is undefined behavior (it could lead to any one of the headings). With unique aliases above headings, you can make sure to link to the right heading. For example, consider the following setup. You have one document per operating system describing how to install a project with the OS package manager or from sources: -``` +```tree docs/ - install/ - arch.md - debian.md - gentoo.md + install/ + arch.md + debian.md + gentoo.md ``` Each page has: ```md ## Install with package manager - ... ## Install from sources - ... ``` @@ -126,12 +135,10 @@ You don't want to change headings and make them redundant, like `## Arch: Instal ```md [](){#arch-install-pkg} ## Install with package manager - ... [](){#arch-install-src} ## Install from sources - ... ``` diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 740dd6f..a0dacde 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit +from markdown.extensions.attr_list import AttrListExtension from mkdocs.config.base import Config from mkdocs.config.config_options import Type from mkdocs.config.defaults import MkDocsConfig @@ -145,7 +146,13 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: The modified config. """ log.debug("Adding AutorefsExtension to the list") - scan_anchors = self.scan_anchors or self.config.scan_anchors + for ext in config.markdown_extensions: + if ext == "attr_list" or isinstance(ext, AttrListExtension): + log.debug("Enabling Markdown anchors feature") + scan_anchors = True + break + else: + scan_anchors = False config["markdown_extensions"].append(AutorefsExtension(plugin=self if scan_anchors else None)) return config diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 76c7b77..0ca5d09 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -3,7 +3,6 @@ from __future__ import annotations import re -import unicodedata from html import escape, unescape from itertools import zip_longest from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple @@ -216,31 +215,26 @@ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: """ super().__init__(md) self.plugin = plugin + self._slug = md.treeprocessors["toc"].slugify def run(self, root: Element) -> None: # noqa: D102 if self.plugin.current_page is not None: self._scan_anchors(root) - @staticmethod - def _slug(value: str, separator: str = "-") -> str: - value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii") - value = re.sub(r"[^\w\s-]", "", value.lower()) - return re.sub(r"[-_\s]+", separator, value).strip("-_") - def _scan_anchors(self, parent: Element) -> list[str]: ids = [] # We iterate on pairs of elements, to check if the next element is a heading (alias feature). for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): if el.tag == "a": # We found an anchor. Record its id if it has one. - if hid := el.get("id"): + if anchor_id := el.get("id"): if el.tail and el.tail.strip(): # If the anchor has a non-whitespace-only tail, it's not an alias: # register it immediately. - self.plugin.register_anchor(self.plugin.current_page, hid) # type: ignore[arg-type] + self.plugin.register_anchor(self.plugin.current_page, anchor_id) # type: ignore[arg-type] else: # Else record its id and continue. - ids.append(hid) + ids.append(anchor_id) elif el.tag == "p": if ids := self._scan_anchors(el): # Markdown anchors are always rendered as `a` tags within a `p` tag. @@ -248,8 +242,8 @@ def _scan_anchors(self, parent: Element) -> list[str]: # is a `p` tag and it contains at least one anchor with an id. # We can check if the next element is a heading, and use its id as href. href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else "" - for hid in ids: - self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] + for anchor_id in ids: + self.plugin.register_anchor(self.plugin.current_page, anchor_id, href) # type: ignore[arg-type] ids.clear() else: # Recurse into sub-elements. diff --git a/tests/test_references.py b/tests/test_references.py index 7a68e09..b5c8526 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -232,7 +232,7 @@ def test_external_references() -> None: def test_register_markdown_anchors() -> None: """Check that Markdown anchors are registered when enabled.""" plugin = AutorefsPlugin() - md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(plugin)]) + md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)]) plugin.current_page = "" md.convert( dedent(