From ec47ac2ddc14b92662092c44f49b25f3bdfe5af9 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 23 Feb 2024 13:09:16 +0100 Subject: [PATCH] fixup! feat: Add option to scan and register HTML anchors --- mkdocs.yml | 3 +- src/mkdocs_autorefs/plugin.py | 11 +---- src/mkdocs_autorefs/references.py | 68 +++++++++++++++++++------------ tests/test_references.py | 26 ++++++++---- 4 files changed, 63 insertions(+), 45 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 4d4cb75..c120b5b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,8 +95,7 @@ markdown_extensions: permalink: "ยค" plugins: -- autorefs: - scan_anchors: true +- autorefs - search - markdown-exec - gen-files: diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index a0dacde..2fea146 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -19,8 +19,6 @@ from urllib.parse import urlsplit from markdown.extensions.attr_list import AttrListExtension -from mkdocs.config.base import Config -from mkdocs.config.config_options import Type from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin from mkdocs.structure.pages import Page @@ -41,14 +39,7 @@ log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] -class AutorefsConfig(Config): - """Configuration options for the Autorefs plugin.""" - - scan_anchors = Type(bool, default=False) - """Whether to scan HTML pages for anchors defining references.""" - - -class AutorefsPlugin(BasePlugin[AutorefsConfig]): +class AutorefsPlugin(BasePlugin): """An `mkdocs` plugin. This plugin defines the following event hooks: diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 0ca5d09..d1cb2c3 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -4,7 +4,6 @@ import re from html import escape, unescape -from itertools import zip_longest from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple from urllib.parse import urlsplit from xml.etree.ElementTree import Element @@ -215,40 +214,57 @@ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: """ super().__init__(md) self.plugin = plugin - self._slug = md.treeprocessors["toc"].slugify def run(self, root: Element) -> None: # noqa: D102 if self.plugin.current_page is not None: - self._scan_anchors(root) + pending_anchors = _PendingAnchors(self.plugin, self.plugin.current_page) + self._scan_anchors(root, pending_anchors) + pending_anchors.flush() - def _scan_anchors(self, parent: Element) -> list[str]: - ids = [] - # We iterate on pairs of elements, to check if the next element is a heading (alias feature). - for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): + def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors) -> None: + for el in parent: if el.tag == "a": # We found an anchor. Record its id if it has one. if anchor_id := el.get("id"): - if el.tail and el.tail.strip(): - # If the anchor has a non-whitespace-only tail, it's not an alias: - # register it immediately. - self.plugin.register_anchor(self.plugin.current_page, anchor_id) # type: ignore[arg-type] - else: - # Else record its id and continue. - ids.append(anchor_id) + pending_anchors.append(anchor_id) + # Non-whitespace text after the element interrupts the chain, aliases can't apply. + if el.tail and el.tail.strip(): + pending_anchors.flush() + elif el.tag == "p": - if ids := self._scan_anchors(el): - # Markdown anchors are always rendered as `a` tags within a `p` tag. - # Headings therefore appear after the `p` tag. Here the current element - # is a `p` tag and it contains at least one anchor with an id. - # We can check if the next element is a heading, and use its id as href. - href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else "" - for anchor_id in ids: - self.plugin.register_anchor(self.plugin.current_page, anchor_id, href) # type: ignore[arg-type] - ids.clear() + # A `p` tag is a no-op for our purposes, just recurse into it in the context + # of the current collection of anchors. + self._scan_anchors(el, pending_anchors) + # Non-whitespace text after the element interrupts the chain, aliases can't apply. + if el.tail and el.tail.strip(): + pending_anchors.flush() + + elif el.tag in self._htags: + # If the element is a heading, that turns the pending anchors into aliases. + pending_anchors.flush(el.get("id")) + else: - # Recurse into sub-elements. - ids = self._scan_anchors(el) - return ids + # But if it's some other interruption, flush anchors anyway as non-aliases. + pending_anchors.flush() + # Recurse into sub-elements, in a *separate* context. + self.run(el) + + +class _PendingAnchors: + """A collection of HTML anchors that may or may not become aliased to an upcoming heading.""" + + def __init__(self, plugin: AutorefsPlugin, current_page: str): + self.plugin = plugin + self.current_page = current_page + self.anchors: list[str] = [] + + def append(self, anchor: str) -> None: + self.anchors.append(anchor) + + def flush(self, alias_to: str | None = None) -> None: + for anchor in self.anchors: + self.plugin.register_anchor(self.current_page, anchor, alias_to) + self.anchors.clear() class AutorefsExtension(Extension): diff --git a/tests/test_references.py b/tests/test_references.py index b5c8526..077bb9b 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -233,7 +233,7 @@ def test_register_markdown_anchors() -> None: """Check that Markdown anchors are registered when enabled.""" plugin = AutorefsPlugin() md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)]) - plugin.current_page = "" + plugin.current_page = "page" md.convert( dedent( """ @@ -253,12 +253,24 @@ def test_register_markdown_anchors() -> None: Text. [](){#alias4} ## Heading baz + + [](){#alias5} + [](){#alias6} + Decoy. + ## Heading more + + [](){#alias7} """, ), ) - assert plugin._url_map["foo"] == "#heading-foo" - assert plugin._url_map["bar"] == "#bar" - assert plugin._url_map["alias1"] == "#heading-bar" - assert plugin._url_map["alias2"] == "#heading-bar" - assert plugin._url_map["alias3"] == "#alias3" - assert plugin._url_map["alias4"] == "#heading-baz" + assert plugin._url_map == { + "foo": "page#heading-foo", + "bar": "page#bar", + "alias1": "page#heading-bar", + "alias2": "page#heading-bar", + "alias3": "page#alias3", + "alias4": "page#heading-baz", + "alias5": "page#alias5", + "alias6": "page#alias6", + "alias7": "page#alias7", + }