From 631783ec7716f7b33c11aff14ce4b779698888a0 Mon Sep 17 00:00:00 2001 From: tvdboom Date: Tue, 9 Aug 2022 19:23:44 +0200 Subject: [PATCH] feat: Add option to scan and register HTML anchors --- docs/changelog.md | 9 ++++++ docs/index.md | 9 ++++++ mkdocs.yml | 3 ++ src/mkdocs_autorefs/plugin.py | 50 +++++++++++++++++++++---------- src/mkdocs_autorefs/references.py | 50 +++++++++++++++++++++++++++++++ tests/test_plugin.py | 24 +++++++++++++++ 6 files changed, 129 insertions(+), 16 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 786b75d..3c03394 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1 +1,10 @@ --8<-- "CHANGELOG.md" + +[](#hello){#hello2} + +## Hello + +Hello. + +Link to [Hello 1][hello1]. +Link to [Hello 2][hello2]. diff --git a/docs/index.md b/docs/index.md index 612c7a5..78b84ee 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1 +1,10 @@ --8<-- "README.md" + +[](#hello){#hello1} + +## Hello + +Hello. + +Link to [Hello 1][hello1]. +Link to [Hello 2][hello2]. diff --git a/mkdocs.yml b/mkdocs.yml index 98661b8..4d4cb75 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,6 +95,8 @@ markdown_extensions: permalink: "ยค" plugins: +- autorefs: + scan_anchors: true - search - markdown-exec - gen-files: @@ -109,6 +111,7 @@ plugins: import: - https://docs.python.org/3/objects.inv - https://www.mkdocs.org/objects.inv + - https://python-markdown.github.io/objects.inv paths: [src] options: docstring_options: diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 5eca316..9f1a31e 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -15,12 +15,18 @@ import contextlib import functools import logging +import re +from functools import partial from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit +from mkdocs.config.base import Config +from mkdocs.config.config_options import Type +from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin +from mkdocs.structure.pages import Page -from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url +from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url if TYPE_CHECKING: from mkdocs.config.defaults import MkDocsConfig @@ -36,7 +42,14 @@ log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] -class AutorefsPlugin(BasePlugin): +class AutorefsConfig(Config): + """Configuration options for the Autorefs plugin.""" + + scan_anchors = Type(bool, default=False) + """Whether to scan HTML pages for anchors defining references.""" + + +class AutorefsPlugin(BasePlugin[AutorefsConfig]): """An `mkdocs` plugin. This plugin defines the following event hooks: @@ -50,23 +63,28 @@ class AutorefsPlugin(BasePlugin): """ scan_toc: bool = True + scan_anchors: bool = False current_page: str | None = None + _re_anchors = re.compile(r'') + def __init__(self) -> None: """Initialize the object.""" super().__init__() self._url_map: dict[str, str] = {} self._abs_url_map: dict[str, str] = {} + self._extension: AutorefsExtension | None = None self.get_fallback_anchor: Callable[[str], str | None] | None = None + self.current_page: str | None = None - def register_anchor(self, page: str, identifier: str) -> None: + def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None: """Register that an anchor corresponding to an identifier was encountered when rendering the page. Arguments: page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` identifier: The HTML anchor (without '#') as a string. """ - self._url_map[identifier] = f"{page}#{identifier}" + self._url_map[identifier] = f"{page}#{anchor or identifier}" def register_url(self, identifier: str, url: str) -> None: """Register that the identifier should be turned into a link to this URL. @@ -133,20 +151,15 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: The modified config. """ log.debug("Adding AutorefsExtension to the list") - config["markdown_extensions"].append(AutorefsExtension()) + anchor_scanner_factory = ( + partial(AnchorScannerTreeProcessor, self) if self.scan_anchors or self.config.scan_anchors else None + ) + # anchor_scanner_factory = None + self._extension = AutorefsExtension(anchor_scanner_factory=anchor_scanner_factory) + config["markdown_extensions"].append(self._extension) return config - def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 - """Remember which page is the current one. - - Arguments: - markdown: Input Markdown. - page: The related MkDocs page instance. - kwargs: Additional arguments passed by MkDocs. - - Returns: - The same Markdown. We only use this hook to map anchors to URLs. - """ + def on_page_markdown(self, markdown: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002, D102 self.current_page = page.url return markdown @@ -170,6 +183,11 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: log.debug(f"Mapping identifiers to URLs for page {page.file.src_path}") for item in page.toc.items: self.map_urls(page.url, item) + + # if self.scan_anchors or self.config.scan_anchors: + # for href, hid in re.findall(self._re_anchors, html): + # self.register_anchor(page.url, identifier=hid, anchor=href.lstrip("#")) + return html def map_urls(self, base_url: str, anchor: AnchorLink) -> None: diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 66b4931..5ce86a7 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -8,13 +8,17 @@ from urllib.parse import urlsplit from xml.etree.ElementTree import Element +from markdown.core import Markdown from markdown.extensions import Extension from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor +from markdown.treeprocessors import Treeprocessor from markdown.util import INLINE_PLACEHOLDER_RE if TYPE_CHECKING: from markdown import Markdown + from mkdocs_autorefs.plugin import AutorefsPlugin + AUTO_REF_RE = re.compile( r"autorefs-identifier|autorefs-optional|autorefs-optional-hover)=" r'("?)(?P[^"<>]*)\2>(?P.*?)</span>', @@ -197,9 +201,48 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str return html, unmapped +class AnchorScannerTreeProcessor(Treeprocessor): + """Tree processor to scan and register HTML anchors.""" + + def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: + """Initialize the tree processor. + + Parameters: + plugin: A reference to the autorefs plugin, to use its `register_anchor` method. + """ + super().__init__(md) + self.plugin = plugin + + def run(self, root: Element) -> None: # noqa: D102 + if self.plugin.current_page is not None: + self._scan_anchors(root) + + def _scan_anchors(self, parent: Element) -> None: + for el in parent: + if el.tag == "a" and (hid := el.get("id")): + self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#")) # type: ignore[arg-type] + else: + self._scan_anchors(el) + + class AutorefsExtension(Extension): """Extension that inserts auto-references in Markdown.""" + def __init__( + self, + anchor_scanner_factory: Callable[[Markdown], AnchorScannerTreeProcessor] | None = None, + **kwargs: Any, + ) -> None: + """Initialize the Markdown extension. + + Parameters: + anchor_scanner_factory: A callable that returns an instance of the anchor scanner tree processor. + **kwargs: Keyword arguments passed to the [base constructor][markdown.extensions.Extension]. + """ + super().__init__(**kwargs) + self.anchor_scanner_factory = anchor_scanner_factory + self.anchor_scanner: AnchorScannerTreeProcessor | None = None + def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name) """Register the extension. @@ -213,3 +256,10 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me "mkdocs-autorefs", priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor ) + if self.anchor_scanner_factory: + self.anchor_scanner = self.anchor_scanner_factory(md) + md.treeprocessors.register( + self.anchor_scanner, + "mkdocs-autorefs-anchors-scanner", + priority=0, + ) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 8acd446..8fcae75 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -60,3 +60,27 @@ def test_dont_make_relative_urls_relative_again() -> None: plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",)) == "../../foo/bar/baz.html#foo.bar.baz" ) + + +def test_register_html_anchors() -> None: + """Check that HT?ML anchors are registered when enabled.""" + plugin = AutorefsPlugin() + plugin.scan_toc = False + plugin.scan_anchors = True + + class Page: + url = "/page/url" + + plugin.on_page_content( + """ + <a id="foo.bar"> + <a href="#foo.baz"> + <a id="foo.qux" href="#fooqux"> + <a href="quxfoo" id="qux.foo"> + """, + page=Page(), # type: ignore[arg-type] + ) + assert "foo.bar" in plugin._url_map + assert "foo.baz" not in plugin._url_map + assert "foo.qux" in plugin._url_map + assert "qux.foo" in plugin._url_map