From 95ac6d839ef3ea0572953f8c28ac3bf995f33008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Tue, 9 Aug 2022 19:23:44 +0200 Subject: [PATCH 01/12] feat: Add option to scan and register HTML anchors Co-authored-by: tvdboom --- docs/changelog.md | 9 ++++++ docs/index.md | 9 ++++++ mkdocs.yml | 3 ++ src/mkdocs_autorefs/plugin.py | 50 +++++++++++++++++++++---------- src/mkdocs_autorefs/references.py | 50 +++++++++++++++++++++++++++++++ tests/test_plugin.py | 24 +++++++++++++++ 6 files changed, 129 insertions(+), 16 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 786b75d..3c03394 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1 +1,10 @@ --8<-- "CHANGELOG.md" + +[](#hello){#hello2} + +## Hello + +Hello. + +Link to [Hello 1][hello1]. +Link to [Hello 2][hello2]. diff --git a/docs/index.md b/docs/index.md index 612c7a5..78b84ee 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1 +1,10 @@ --8<-- "README.md" + +[](#hello){#hello1} + +## Hello + +Hello. + +Link to [Hello 1][hello1]. +Link to [Hello 2][hello2]. diff --git a/mkdocs.yml b/mkdocs.yml index 98661b8..4d4cb75 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,6 +95,8 @@ markdown_extensions: permalink: "¤" plugins: +- autorefs: + scan_anchors: true - search - markdown-exec - gen-files: @@ -109,6 +111,7 @@ plugins: import: - https://docs.python.org/3/objects.inv - https://www.mkdocs.org/objects.inv + - https://python-markdown.github.io/objects.inv paths: [src] options: docstring_options: diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 5eca316..9f1a31e 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -15,12 +15,18 @@ import contextlib import functools import logging +import re +from functools import partial from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit +from mkdocs.config.base import Config +from mkdocs.config.config_options import Type +from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin +from mkdocs.structure.pages import Page -from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url +from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url if TYPE_CHECKING: from mkdocs.config.defaults import MkDocsConfig @@ -36,7 +42,14 @@ log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] -class AutorefsPlugin(BasePlugin): +class AutorefsConfig(Config): + """Configuration options for the Autorefs plugin.""" + + scan_anchors = Type(bool, default=False) + """Whether to scan HTML pages for anchors defining references.""" + + +class AutorefsPlugin(BasePlugin[AutorefsConfig]): """An `mkdocs` plugin. This plugin defines the following event hooks: @@ -50,23 +63,28 @@ class AutorefsPlugin(BasePlugin): """ scan_toc: bool = True + scan_anchors: bool = False current_page: str | None = None + _re_anchors = re.compile(r'') + def __init__(self) -> None: """Initialize the object.""" super().__init__() self._url_map: dict[str, str] = {} self._abs_url_map: dict[str, str] = {} + self._extension: AutorefsExtension | None = None self.get_fallback_anchor: Callable[[str], str | None] | None = None + self.current_page: str | None = None - def register_anchor(self, page: str, identifier: str) -> None: + def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None: """Register that an anchor corresponding to an identifier was encountered when rendering the page. Arguments: page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` identifier: The HTML anchor (without '#') as a string. """ - self._url_map[identifier] = f"{page}#{identifier}" + self._url_map[identifier] = f"{page}#{anchor or identifier}" def register_url(self, identifier: str, url: str) -> None: """Register that the identifier should be turned into a link to this URL. @@ -133,20 +151,15 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: The modified config. """ log.debug("Adding AutorefsExtension to the list") - config["markdown_extensions"].append(AutorefsExtension()) + anchor_scanner_factory = ( + partial(AnchorScannerTreeProcessor, self) if self.scan_anchors or self.config.scan_anchors else None + ) + # anchor_scanner_factory = None + self._extension = AutorefsExtension(anchor_scanner_factory=anchor_scanner_factory) + config["markdown_extensions"].append(self._extension) return config - def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 - """Remember which page is the current one. - - Arguments: - markdown: Input Markdown. - page: The related MkDocs page instance. - kwargs: Additional arguments passed by MkDocs. - - Returns: - The same Markdown. We only use this hook to map anchors to URLs. - """ + def on_page_markdown(self, markdown: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002, D102 self.current_page = page.url return markdown @@ -170,6 +183,11 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: log.debug(f"Mapping identifiers to URLs for page {page.file.src_path}") for item in page.toc.items: self.map_urls(page.url, item) + + # if self.scan_anchors or self.config.scan_anchors: + # for href, hid in re.findall(self._re_anchors, html): + # self.register_anchor(page.url, identifier=hid, anchor=href.lstrip("#")) + return html def map_urls(self, base_url: str, anchor: AnchorLink) -> None: diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 66b4931..5ce86a7 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -8,13 +8,17 @@ from urllib.parse import urlsplit from xml.etree.ElementTree import Element +from markdown.core import Markdown from markdown.extensions import Extension from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor +from markdown.treeprocessors import Treeprocessor from markdown.util import INLINE_PLACEHOLDER_RE if TYPE_CHECKING: from markdown import Markdown + from mkdocs_autorefs.plugin import AutorefsPlugin + AUTO_REF_RE = re.compile( r"autorefs-identifier|autorefs-optional|autorefs-optional-hover)=" r'("?)(?P[^"<>]*)\2>(?P.*?)</span>', @@ -197,9 +201,48 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str return html, unmapped +class AnchorScannerTreeProcessor(Treeprocessor): + """Tree processor to scan and register HTML anchors.""" + + def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: + """Initialize the tree processor. + + Parameters: + plugin: A reference to the autorefs plugin, to use its `register_anchor` method. + """ + super().__init__(md) + self.plugin = plugin + + def run(self, root: Element) -> None: # noqa: D102 + if self.plugin.current_page is not None: + self._scan_anchors(root) + + def _scan_anchors(self, parent: Element) -> None: + for el in parent: + if el.tag == "a" and (hid := el.get("id")): + self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#")) # type: ignore[arg-type] + else: + self._scan_anchors(el) + + class AutorefsExtension(Extension): """Extension that inserts auto-references in Markdown.""" + def __init__( + self, + anchor_scanner_factory: Callable[[Markdown], AnchorScannerTreeProcessor] | None = None, + **kwargs: Any, + ) -> None: + """Initialize the Markdown extension. + + Parameters: + anchor_scanner_factory: A callable that returns an instance of the anchor scanner tree processor. + **kwargs: Keyword arguments passed to the [base constructor][markdown.extensions.Extension]. + """ + super().__init__(**kwargs) + self.anchor_scanner_factory = anchor_scanner_factory + self.anchor_scanner: AnchorScannerTreeProcessor | None = None + def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name) """Register the extension. @@ -213,3 +256,10 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me "mkdocs-autorefs", priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor ) + if self.anchor_scanner_factory: + self.anchor_scanner = self.anchor_scanner_factory(md) + md.treeprocessors.register( + self.anchor_scanner, + "mkdocs-autorefs-anchors-scanner", + priority=0, + ) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 8acd446..8fcae75 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -60,3 +60,27 @@ def test_dont_make_relative_urls_relative_again() -> None: plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",)) == "../../foo/bar/baz.html#foo.bar.baz" ) + + +def test_register_html_anchors() -> None: + """Check that HT?ML anchors are registered when enabled.""" + plugin = AutorefsPlugin() + plugin.scan_toc = False + plugin.scan_anchors = True + + class Page: + url = "/page/url" + + plugin.on_page_content( + """ + <a id="foo.bar"> + <a href="#foo.baz"> + <a id="foo.qux" href="#fooqux"> + <a href="quxfoo" id="qux.foo"> + """, + page=Page(), # type: ignore[arg-type] + ) + assert "foo.bar" in plugin._url_map + assert "foo.baz" not in plugin._url_map + assert "foo.qux" in plugin._url_map + assert "qux.foo" in plugin._url_map From 4c054d5e5631263505e245f251cd21e0656c8f7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Fri, 16 Feb 2024 16:19:00 +0100 Subject: [PATCH 02/12] fixup! feat: Add option to scan and register HTML anchors --- src/mkdocs_autorefs/plugin.py | 24 +++++++++++++++--------- src/mkdocs_autorefs/references.py | 4 +--- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 9f1a31e..0c332bb 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -73,9 +73,7 @@ def __init__(self) -> None: super().__init__() self._url_map: dict[str, str] = {} self._abs_url_map: dict[str, str] = {} - self._extension: AutorefsExtension | None = None self.get_fallback_anchor: Callable[[str], str | None] | None = None - self.current_page: str | None = None def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None: """Register that an anchor corresponding to an identifier was encountered when rendering the page. @@ -151,15 +149,23 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: The modified config. """ log.debug("Adding AutorefsExtension to the list") - anchor_scanner_factory = ( - partial(AnchorScannerTreeProcessor, self) if self.scan_anchors or self.config.scan_anchors else None - ) - # anchor_scanner_factory = None - self._extension = AutorefsExtension(anchor_scanner_factory=anchor_scanner_factory) - config["markdown_extensions"].append(self._extension) + scan_anchors = self.scan_anchors or self.config.scan_anchors + anchor_scanner_factory = partial(AnchorScannerTreeProcessor, self) if scan_anchors else None + config["markdown_extensions"].append(AutorefsExtension(anchor_scanner_factory)) return config - def on_page_markdown(self, markdown: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002, D102 + def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 + """Remember which page is the current one. + + Arguments: + markdown: Input Markdown. + page: The related MkDocs page instance. + kwargs: Additional arguments passed by MkDocs. + + Returns: + The same Markdown. We only use this hook to keep a reference to the current page URL, + used during Markdown conversion by the anchor scanner tree processor. + """ self.current_page = page.url return markdown diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 5ce86a7..d5a2538 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -241,7 +241,6 @@ def __init__( """ super().__init__(**kwargs) self.anchor_scanner_factory = anchor_scanner_factory - self.anchor_scanner: AnchorScannerTreeProcessor | None = None def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name) """Register the extension. @@ -257,9 +256,8 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor ) if self.anchor_scanner_factory: - self.anchor_scanner = self.anchor_scanner_factory(md) md.treeprocessors.register( - self.anchor_scanner, + self.anchor_scanner_factory(md), "mkdocs-autorefs-anchors-scanner", priority=0, ) From 5d034b49ce424dbd4d0c08166f5c1fb4f0ffb595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Sat, 17 Feb 2024 16:33:19 +0100 Subject: [PATCH 03/12] fixup! feat: Add option to scan and register HTML anchors --- docs/changelog.md | 9 --------- docs/index.md | 10 ++++------ src/mkdocs_autorefs/references.py | 28 ++++++++++++++++++++++------ tests/test_plugin.py | 24 ------------------------ tests/test_references.py | 28 +++++++++++++++++++++++++++- 5 files changed, 53 insertions(+), 46 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 3c03394..786b75d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,10 +1 @@ --8<-- "CHANGELOG.md" - -[](#hello){#hello2} - -## Hello - -Hello. - -Link to [Hello 1][hello1]. -Link to [Hello 2][hello2]. diff --git a/docs/index.md b/docs/index.md index 78b84ee..5b450db 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,10 +1,8 @@ --8<-- "README.md" -[](#hello){#hello1} +[](){#hello} +## Hello world -## Hello +helllo. -Hello. - -Link to [Hello 1][hello1]. -Link to [Hello 2][hello2]. +[hello][hello] \ No newline at end of file diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index d5a2538..23f39cb 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -3,8 +3,10 @@ from __future__ import annotations import re +import unicodedata from html import escape, unescape -from typing import TYPE_CHECKING, Any, Callable, Match, Tuple +from itertools import zip_longest +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple from urllib.parse import urlsplit from xml.etree.ElementTree import Element @@ -204,6 +206,8 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str class AnchorScannerTreeProcessor(Treeprocessor): """Tree processor to scan and register HTML anchors.""" + _htags: ClassVar[set[str]] = {"h1", "h2", "h3", "h4", "h5", "h6"} + def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: """Initialize the tree processor. @@ -217,12 +221,24 @@ def run(self, root: Element) -> None: # noqa: D102 if self.plugin.current_page is not None: self._scan_anchors(root) - def _scan_anchors(self, parent: Element) -> None: - for el in parent: - if el.tag == "a" and (hid := el.get("id")): - self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#")) # type: ignore[arg-type] + @staticmethod + def _slugify(value: str, separator: str = "-") -> str: + value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii") + value = re.sub(r"[^\w\s-]", "", value.lower()) + return re.sub(r"[-_\s]+", separator, value).strip("-_") + + def _scan_anchors(self, parent: Element) -> str | None: + hid = None + for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): + if el.tag == "a": + hid = el.get("id") + elif el.tag == "p" and (hid := self._scan_anchors(el)): + href = (next_el.get("id") or self._slugify(next_el.text or "")) if next_el.tag in self._htags else "" + self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] + hid = None else: - self._scan_anchors(el) + hid = self._scan_anchors(el) + return hid class AutorefsExtension(Extension): diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 8fcae75..8acd446 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -60,27 +60,3 @@ def test_dont_make_relative_urls_relative_again() -> None: plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",)) == "../../foo/bar/baz.html#foo.bar.baz" ) - - -def test_register_html_anchors() -> None: - """Check that HT?ML anchors are registered when enabled.""" - plugin = AutorefsPlugin() - plugin.scan_toc = False - plugin.scan_anchors = True - - class Page: - url = "/page/url" - - plugin.on_page_content( - """ - <a id="foo.bar"> - <a href="#foo.baz"> - <a id="foo.qux" href="#fooqux"> - <a href="quxfoo" id="qux.foo"> - """, - page=Page(), # type: ignore[arg-type] - ) - assert "foo.bar" in plugin._url_map - assert "foo.baz" not in plugin._url_map - assert "foo.qux" in plugin._url_map - assert "qux.foo" in plugin._url_map diff --git a/tests/test_references.py b/tests/test_references.py index 5a25844..734b1b7 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -2,10 +2,14 @@ from __future__ import annotations +from functools import partial +from textwrap import dedent + import markdown import pytest -from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url +from mkdocs_autorefs.plugin import AutorefsPlugin +from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url @pytest.mark.parametrize( @@ -224,3 +228,25 @@ def test_external_references() -> None: output, unmapped = fix_refs(source, url_map.__getitem__) assert output == '<a class="autorefs autorefs-external" href="https://example.com">example</a>' assert unmapped == [] + + +def test_register_html_anchors() -> None: + """Check that HTML anchors are registered when enabled.""" + plugin = AutorefsPlugin() + md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(partial(AnchorScannerTreeProcessor, plugin))]) + plugin.current_page = "" + md.convert( + dedent( + """ + [](){#foo} + ## Heading + + Paragraph 1. + + [](){#bar} + Paragraph 2. + """, + ), + ) + assert plugin._url_map["foo"] == "#heading" + assert plugin._url_map["bar"] == "#bar" From 209f87487946d307f40766866e0db9e45179dd23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Sat, 17 Feb 2024 16:39:46 +0100 Subject: [PATCH 04/12] fixup! feat: Add option to scan and register HTML anchors --- docs/index.md | 7 ------- src/mkdocs_autorefs/plugin.py | 7 ------- 2 files changed, 14 deletions(-) diff --git a/docs/index.md b/docs/index.md index 5b450db..612c7a5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,8 +1 @@ --8<-- "README.md" - -[](){#hello} -## Hello world - -helllo. - -[hello][hello] \ No newline at end of file diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 0c332bb..c499861 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -15,7 +15,6 @@ import contextlib import functools import logging -import re from functools import partial from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit @@ -66,8 +65,6 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]): scan_anchors: bool = False current_page: str | None = None - _re_anchors = re.compile(r'<a(?:\s+href="([^"]*)")?\s+id="([^"]+)"\s*>') - def __init__(self) -> None: """Initialize the object.""" super().__init__() @@ -190,10 +187,6 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: for item in page.toc.items: self.map_urls(page.url, item) - # if self.scan_anchors or self.config.scan_anchors: - # for href, hid in re.findall(self._re_anchors, html): - # self.register_anchor(page.url, identifier=hid, anchor=href.lstrip("#")) - return html def map_urls(self, base_url: str, anchor: AnchorLink) -> None: From 030ba752da96a098b62de7e63cce2e2c83c0e730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Sat, 17 Feb 2024 16:40:19 +0100 Subject: [PATCH 05/12] fixup! feat: Add option to scan and register HTML anchors --- src/mkdocs_autorefs/plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index c499861..06c8bcd 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -186,7 +186,6 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: log.debug(f"Mapping identifiers to URLs for page {page.file.src_path}") for item in page.toc.items: self.map_urls(page.url, item) - return html def map_urls(self, base_url: str, anchor: AnchorLink) -> None: From e9450a285acabf7d2091f2e39ff0f640670dd014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Sat, 17 Feb 2024 17:00:04 +0100 Subject: [PATCH 06/12] fixup! feat: Add option to scan and register HTML anchors --- src/mkdocs_autorefs/plugin.py | 6 ++---- src/mkdocs_autorefs/references.py | 8 ++++---- tests/test_references.py | 5 ++--- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 06c8bcd..740dd6f 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -15,7 +15,6 @@ import contextlib import functools import logging -from functools import partial from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit @@ -25,7 +24,7 @@ from mkdocs.plugins import BasePlugin from mkdocs.structure.pages import Page -from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url +from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url if TYPE_CHECKING: from mkdocs.config.defaults import MkDocsConfig @@ -147,8 +146,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: """ log.debug("Adding AutorefsExtension to the list") scan_anchors = self.scan_anchors or self.config.scan_anchors - anchor_scanner_factory = partial(AnchorScannerTreeProcessor, self) if scan_anchors else None - config["markdown_extensions"].append(AutorefsExtension(anchor_scanner_factory)) + config["markdown_extensions"].append(AutorefsExtension(plugin=self if scan_anchors else None)) return config def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 23f39cb..7918100 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -246,7 +246,7 @@ class AutorefsExtension(Extension): def __init__( self, - anchor_scanner_factory: Callable[[Markdown], AnchorScannerTreeProcessor] | None = None, + plugin: AutorefsPlugin | None = None, **kwargs: Any, ) -> None: """Initialize the Markdown extension. @@ -256,7 +256,7 @@ def __init__( **kwargs: Keyword arguments passed to the [base constructor][markdown.extensions.Extension]. """ super().__init__(**kwargs) - self.anchor_scanner_factory = anchor_scanner_factory + self.plugin = plugin def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name) """Register the extension. @@ -271,9 +271,9 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me "mkdocs-autorefs", priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor ) - if self.anchor_scanner_factory: + if self.plugin: md.treeprocessors.register( - self.anchor_scanner_factory(md), + AnchorScannerTreeProcessor(self.plugin, md), "mkdocs-autorefs-anchors-scanner", priority=0, ) diff --git a/tests/test_references.py b/tests/test_references.py index 734b1b7..061b002 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -2,14 +2,13 @@ from __future__ import annotations -from functools import partial from textwrap import dedent import markdown import pytest from mkdocs_autorefs.plugin import AutorefsPlugin -from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url +from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url @pytest.mark.parametrize( @@ -233,7 +232,7 @@ def test_external_references() -> None: def test_register_html_anchors() -> None: """Check that HTML anchors are registered when enabled.""" plugin = AutorefsPlugin() - md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(partial(AnchorScannerTreeProcessor, plugin))]) + md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(plugin)]) plugin.current_page = "" md.convert( dedent( From 3e6b0e4bbd355e38eff18393a03ebcdbad82fb9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Sat, 17 Feb 2024 17:09:07 +0100 Subject: [PATCH 07/12] fixup! feat: Add option to scan and register HTML anchors --- src/mkdocs_autorefs/references.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 7918100..bcdd6a9 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -252,7 +252,7 @@ def __init__( """Initialize the Markdown extension. Parameters: - anchor_scanner_factory: A callable that returns an instance of the anchor scanner tree processor. + plugin: An optional reference to the autorefs plugin (to pass it to the anchor scanner tree processor). **kwargs: Keyword arguments passed to the [base constructor][markdown.extensions.Extension]. """ super().__init__(**kwargs) @@ -262,6 +262,8 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me """Register the extension. Add an instance of our [`AutoRefInlineProcessor`][mkdocs_autorefs.references.AutoRefInlineProcessor] to the Markdown parser. + Also optionally add an instance of our [`AnchorScannerTreeProcessor`][mkdocs_autorefs.references.AnchorScannerTreeProcessor] + to the Markdown parser if a reference to the autorefs plugin was passed to this extension. Arguments: md: A `markdown.Markdown` instance. From 5c933758e5539b1abf0c61883e84486c825634e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Sun, 18 Feb 2024 16:49:30 +0100 Subject: [PATCH 08/12] fixup! feat: Add option to scan and register HTML anchors --- README.md | 88 ++++++++++++++++++++++++++++++- src/mkdocs_autorefs/references.py | 36 +++++++++---- tests/test_references.py | 21 ++++++-- 3 files changed, 130 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index bb60430..e783cdf 100644 --- a/README.md +++ b/README.md @@ -49,4 +49,90 @@ This works the same as [a normal link to that heading](../doc1.md#hello-world). Linking to a heading without needing to know the destination page can be useful if specifying that path is cumbersome, e.g. when the pages have deeply nested paths, are far apart, or are moved around frequently. And the issue is somewhat exacerbated by the fact that [MkDocs supports only *relative* links between pages](https://github.com/mkdocs/mkdocs/issues/1592). -Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. +Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. In such cases, use [Markdown anchors](#markdown-anchors) to add unique aliases to your headings. + +### Markdown anchors + +The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. The syntax is `[](){#id-of-the-anchor}`. First you must enable the feature: + +```yaml +# mkdocs.yml +plugins: + - search + - autorefs + scan_anchors: true + +markdown_extensions: + - attr_list +``` + +Then, add an anchor to a document: + +```md +Somewhere in a document. + +[](){#foobar-paragraph} + +Paragraph about foobar. +``` + +Now you can link to this anchor with the usual syntax: + +```md +Check out the [paragraph about foobar][foobar-pararaph]. +``` + +If you add a Markdown anchor right above a heading, this anchor will redirect to the heading itself: + +```md +[](){#foobar} +## A verbose title about foobar +``` + +Linking to the `foobar` anchor will bring you directly to the heading, not the anchor itself, so the URL will show `#a-verbose-title-about-foobar` instead of `#foobar`. These anchors therefore act as "aliases" for headings. It is possible to define multiple aliases per heading: + +```md +[](){#contributing} +[](){#development-setup} +## How to contribute to the project? +``` + +Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading was undefined behavior (it could lead to any one of the headings, undeterministically). With unique aliases above headings, you can make sure to link to the right heading. + +For example, consider the following setup. You have one document per operating system describing how to install a project with the OS package manager or from sources: + +``` +docs/ + install/ + arch.md + debian.md + gentoo.md +``` + +Each page has: + +```md +## Install with package manager + +... + +## Install from sources + +... +``` + +You don't want to change headings and make them redundant, like `## Arch: Install with package manager` and `## Debian: Install with package manager` just to be able to reference the right one with autorefs. Instead you can do this: + +```md +[](){#arch-install-pkg} +## Install with package manager + +... + +[](){#arch-install-src} +## Install from sources + +... +``` + +...changing `arch` by `debian`, `gentoo`, etc. in the other pages. diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index bcdd6a9..76c7b77 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -222,23 +222,39 @@ def run(self, root: Element) -> None: # noqa: D102 self._scan_anchors(root) @staticmethod - def _slugify(value: str, separator: str = "-") -> str: + def _slug(value: str, separator: str = "-") -> str: value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii") value = re.sub(r"[^\w\s-]", "", value.lower()) return re.sub(r"[-_\s]+", separator, value).strip("-_") - def _scan_anchors(self, parent: Element) -> str | None: - hid = None + def _scan_anchors(self, parent: Element) -> list[str]: + ids = [] + # We iterate on pairs of elements, to check if the next element is a heading (alias feature). for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): if el.tag == "a": - hid = el.get("id") - elif el.tag == "p" and (hid := self._scan_anchors(el)): - href = (next_el.get("id") or self._slugify(next_el.text or "")) if next_el.tag in self._htags else "" - self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] - hid = None + # We found an anchor. Record its id if it has one. + if hid := el.get("id"): + if el.tail and el.tail.strip(): + # If the anchor has a non-whitespace-only tail, it's not an alias: + # register it immediately. + self.plugin.register_anchor(self.plugin.current_page, hid) # type: ignore[arg-type] + else: + # Else record its id and continue. + ids.append(hid) + elif el.tag == "p": + if ids := self._scan_anchors(el): + # Markdown anchors are always rendered as `a` tags within a `p` tag. + # Headings therefore appear after the `p` tag. Here the current element + # is a `p` tag and it contains at least one anchor with an id. + # We can check if the next element is a heading, and use its id as href. + href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else "" + for hid in ids: + self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] + ids.clear() else: - hid = self._scan_anchors(el) - return hid + # Recurse into sub-elements. + ids = self._scan_anchors(el) + return ids class AutorefsExtension(Extension): diff --git a/tests/test_references.py b/tests/test_references.py index 061b002..7a68e09 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -229,8 +229,8 @@ def test_external_references() -> None: assert unmapped == [] -def test_register_html_anchors() -> None: - """Check that HTML anchors are registered when enabled.""" +def test_register_markdown_anchors() -> None: + """Check that Markdown anchors are registered when enabled.""" plugin = AutorefsPlugin() md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(plugin)]) plugin.current_page = "" @@ -238,14 +238,27 @@ def test_register_html_anchors() -> None: dedent( """ [](){#foo} - ## Heading + ## Heading foo Paragraph 1. [](){#bar} Paragraph 2. + + [](){#alias1} + [](){#alias2} + ## Heading bar + + [](){#alias3} + Text. + [](){#alias4} + ## Heading baz """, ), ) - assert plugin._url_map["foo"] == "#heading" + assert plugin._url_map["foo"] == "#heading-foo" assert plugin._url_map["bar"] == "#bar" + assert plugin._url_map["alias1"] == "#heading-bar" + assert plugin._url_map["alias2"] == "#heading-bar" + assert plugin._url_map["alias3"] == "#alias3" + assert plugin._url_map["alias4"] == "#heading-baz" From 5eea2e0eb2d2edc960387285819380d369b53e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= <dev@pawamoy.fr> Date: Sun, 18 Feb 2024 23:29:27 +0100 Subject: [PATCH 09/12] fixup! feat: Add option to scan and register HTML anchors --- README.md | 35 ++++++++++++++++++------------- src/mkdocs_autorefs/plugin.py | 9 +++++++- src/mkdocs_autorefs/references.py | 18 ++++++---------- tests/test_references.py | 2 +- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index e783cdf..7eebcd7 100644 --- a/README.md +++ b/README.md @@ -53,20 +53,29 @@ Note that this plugin's behavior is undefined when trying to link to a heading t ### Markdown anchors -The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. The syntax is `[](){#id-of-the-anchor}`. First you must enable the feature: +The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. + +The syntax is: + +```md +[](){#id-of-the-anchor} +``` + +If you look closely, it starts with the usual syntax for a link, `[]()`, except both the text value and URL of the link are empty. Then we see `{#id-of-the-anchor}`, which is the syntax supported by the [`attr_list`](https://python-markdown.github.io/extensions/attr_list/) extension. It sets an HTML id to the anchor element. The autorefs plugin simply gives a meaning to such anchors with ids. Note that raw HTML anchors like `<a id="foo"></a>` are not supported. + +The `attr_list` extension must be enabled for the Markdown anchors feature to work: ```yaml # mkdocs.yml plugins: - search - autorefs - scan_anchors: true markdown_extensions: - attr_list ``` -Then, add an anchor to a document: +Now, you can add anchors to documents: ```md Somewhere in a document. @@ -76,9 +85,11 @@ Somewhere in a document. Paragraph about foobar. ``` -Now you can link to this anchor with the usual syntax: +...making it possible to link to this anchor with our automatic links: ```md +In any document. + Check out the [paragraph about foobar][foobar-pararaph]. ``` @@ -97,27 +108,25 @@ Linking to the `foobar` anchor will bring you directly to the heading, not the a ## How to contribute to the project? ``` -Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading was undefined behavior (it could lead to any one of the headings, undeterministically). With unique aliases above headings, you can make sure to link to the right heading. +Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading is undefined behavior (it could lead to any one of the headings). With unique aliases above headings, you can make sure to link to the right heading. For example, consider the following setup. You have one document per operating system describing how to install a project with the OS package manager or from sources: -``` +```tree docs/ - install/ - arch.md - debian.md - gentoo.md + install/ + arch.md + debian.md + gentoo.md ``` Each page has: ```md ## Install with package manager - ... ## Install from sources - ... ``` @@ -126,12 +135,10 @@ You don't want to change headings and make them redundant, like `## Arch: Instal ```md [](){#arch-install-pkg} ## Install with package manager - ... [](){#arch-install-src} ## Install from sources - ... ``` diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 740dd6f..a0dacde 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit +from markdown.extensions.attr_list import AttrListExtension from mkdocs.config.base import Config from mkdocs.config.config_options import Type from mkdocs.config.defaults import MkDocsConfig @@ -145,7 +146,13 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: The modified config. """ log.debug("Adding AutorefsExtension to the list") - scan_anchors = self.scan_anchors or self.config.scan_anchors + for ext in config.markdown_extensions: + if ext == "attr_list" or isinstance(ext, AttrListExtension): + log.debug("Enabling Markdown anchors feature") + scan_anchors = True + break + else: + scan_anchors = False config["markdown_extensions"].append(AutorefsExtension(plugin=self if scan_anchors else None)) return config diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 76c7b77..0ca5d09 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -3,7 +3,6 @@ from __future__ import annotations import re -import unicodedata from html import escape, unescape from itertools import zip_longest from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple @@ -216,31 +215,26 @@ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: """ super().__init__(md) self.plugin = plugin + self._slug = md.treeprocessors["toc"].slugify def run(self, root: Element) -> None: # noqa: D102 if self.plugin.current_page is not None: self._scan_anchors(root) - @staticmethod - def _slug(value: str, separator: str = "-") -> str: - value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii") - value = re.sub(r"[^\w\s-]", "", value.lower()) - return re.sub(r"[-_\s]+", separator, value).strip("-_") - def _scan_anchors(self, parent: Element) -> list[str]: ids = [] # We iterate on pairs of elements, to check if the next element is a heading (alias feature). for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): if el.tag == "a": # We found an anchor. Record its id if it has one. - if hid := el.get("id"): + if anchor_id := el.get("id"): if el.tail and el.tail.strip(): # If the anchor has a non-whitespace-only tail, it's not an alias: # register it immediately. - self.plugin.register_anchor(self.plugin.current_page, hid) # type: ignore[arg-type] + self.plugin.register_anchor(self.plugin.current_page, anchor_id) # type: ignore[arg-type] else: # Else record its id and continue. - ids.append(hid) + ids.append(anchor_id) elif el.tag == "p": if ids := self._scan_anchors(el): # Markdown anchors are always rendered as `a` tags within a `p` tag. @@ -248,8 +242,8 @@ def _scan_anchors(self, parent: Element) -> list[str]: # is a `p` tag and it contains at least one anchor with an id. # We can check if the next element is a heading, and use its id as href. href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else "" - for hid in ids: - self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type] + for anchor_id in ids: + self.plugin.register_anchor(self.plugin.current_page, anchor_id, href) # type: ignore[arg-type] ids.clear() else: # Recurse into sub-elements. diff --git a/tests/test_references.py b/tests/test_references.py index 7a68e09..b5c8526 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -232,7 +232,7 @@ def test_external_references() -> None: def test_register_markdown_anchors() -> None: """Check that Markdown anchors are registered when enabled.""" plugin = AutorefsPlugin() - md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(plugin)]) + md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)]) plugin.current_page = "" md.convert( dedent( From ec47ac2ddc14b92662092c44f49b25f3bdfe5af9 Mon Sep 17 00:00:00 2001 From: Oleh Prypin <oleh@pryp.in> Date: Fri, 23 Feb 2024 13:09:16 +0100 Subject: [PATCH 10/12] fixup! feat: Add option to scan and register HTML anchors --- mkdocs.yml | 3 +- src/mkdocs_autorefs/plugin.py | 11 +---- src/mkdocs_autorefs/references.py | 68 +++++++++++++++++++------------ tests/test_references.py | 26 ++++++++---- 4 files changed, 63 insertions(+), 45 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 4d4cb75..c120b5b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,8 +95,7 @@ markdown_extensions: permalink: "¤" plugins: -- autorefs: - scan_anchors: true +- autorefs - search - markdown-exec - gen-files: diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index a0dacde..2fea146 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -19,8 +19,6 @@ from urllib.parse import urlsplit from markdown.extensions.attr_list import AttrListExtension -from mkdocs.config.base import Config -from mkdocs.config.config_options import Type from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin from mkdocs.structure.pages import Page @@ -41,14 +39,7 @@ log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] -class AutorefsConfig(Config): - """Configuration options for the Autorefs plugin.""" - - scan_anchors = Type(bool, default=False) - """Whether to scan HTML pages for anchors defining references.""" - - -class AutorefsPlugin(BasePlugin[AutorefsConfig]): +class AutorefsPlugin(BasePlugin): """An `mkdocs` plugin. This plugin defines the following event hooks: diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 0ca5d09..d1cb2c3 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -4,7 +4,6 @@ import re from html import escape, unescape -from itertools import zip_longest from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple from urllib.parse import urlsplit from xml.etree.ElementTree import Element @@ -215,40 +214,57 @@ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: """ super().__init__(md) self.plugin = plugin - self._slug = md.treeprocessors["toc"].slugify def run(self, root: Element) -> None: # noqa: D102 if self.plugin.current_page is not None: - self._scan_anchors(root) + pending_anchors = _PendingAnchors(self.plugin, self.plugin.current_page) + self._scan_anchors(root, pending_anchors) + pending_anchors.flush() - def _scan_anchors(self, parent: Element) -> list[str]: - ids = [] - # We iterate on pairs of elements, to check if the next element is a heading (alias feature). - for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")): + def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors) -> None: + for el in parent: if el.tag == "a": # We found an anchor. Record its id if it has one. if anchor_id := el.get("id"): - if el.tail and el.tail.strip(): - # If the anchor has a non-whitespace-only tail, it's not an alias: - # register it immediately. - self.plugin.register_anchor(self.plugin.current_page, anchor_id) # type: ignore[arg-type] - else: - # Else record its id and continue. - ids.append(anchor_id) + pending_anchors.append(anchor_id) + # Non-whitespace text after the element interrupts the chain, aliases can't apply. + if el.tail and el.tail.strip(): + pending_anchors.flush() + elif el.tag == "p": - if ids := self._scan_anchors(el): - # Markdown anchors are always rendered as `a` tags within a `p` tag. - # Headings therefore appear after the `p` tag. Here the current element - # is a `p` tag and it contains at least one anchor with an id. - # We can check if the next element is a heading, and use its id as href. - href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else "" - for anchor_id in ids: - self.plugin.register_anchor(self.plugin.current_page, anchor_id, href) # type: ignore[arg-type] - ids.clear() + # A `p` tag is a no-op for our purposes, just recurse into it in the context + # of the current collection of anchors. + self._scan_anchors(el, pending_anchors) + # Non-whitespace text after the element interrupts the chain, aliases can't apply. + if el.tail and el.tail.strip(): + pending_anchors.flush() + + elif el.tag in self._htags: + # If the element is a heading, that turns the pending anchors into aliases. + pending_anchors.flush(el.get("id")) + else: - # Recurse into sub-elements. - ids = self._scan_anchors(el) - return ids + # But if it's some other interruption, flush anchors anyway as non-aliases. + pending_anchors.flush() + # Recurse into sub-elements, in a *separate* context. + self.run(el) + + +class _PendingAnchors: + """A collection of HTML anchors that may or may not become aliased to an upcoming heading.""" + + def __init__(self, plugin: AutorefsPlugin, current_page: str): + self.plugin = plugin + self.current_page = current_page + self.anchors: list[str] = [] + + def append(self, anchor: str) -> None: + self.anchors.append(anchor) + + def flush(self, alias_to: str | None = None) -> None: + for anchor in self.anchors: + self.plugin.register_anchor(self.current_page, anchor, alias_to) + self.anchors.clear() class AutorefsExtension(Extension): diff --git a/tests/test_references.py b/tests/test_references.py index b5c8526..077bb9b 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -233,7 +233,7 @@ def test_register_markdown_anchors() -> None: """Check that Markdown anchors are registered when enabled.""" plugin = AutorefsPlugin() md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)]) - plugin.current_page = "" + plugin.current_page = "page" md.convert( dedent( """ @@ -253,12 +253,24 @@ def test_register_markdown_anchors() -> None: Text. [](){#alias4} ## Heading baz + + [](){#alias5} + [](){#alias6} + Decoy. + ## Heading more + + [](){#alias7} """, ), ) - assert plugin._url_map["foo"] == "#heading-foo" - assert plugin._url_map["bar"] == "#bar" - assert plugin._url_map["alias1"] == "#heading-bar" - assert plugin._url_map["alias2"] == "#heading-bar" - assert plugin._url_map["alias3"] == "#alias3" - assert plugin._url_map["alias4"] == "#heading-baz" + assert plugin._url_map == { + "foo": "page#heading-foo", + "bar": "page#bar", + "alias1": "page#heading-bar", + "alias2": "page#heading-bar", + "alias3": "page#alias3", + "alias4": "page#heading-baz", + "alias5": "page#alias5", + "alias6": "page#alias6", + "alias7": "page#alias7", + } From bd1a6367599840baa43d24dbd5966cb83386778b Mon Sep 17 00:00:00 2001 From: Oleh Prypin <oleh@pryp.in> Date: Fri, 23 Feb 2024 14:49:27 +0100 Subject: [PATCH 11/12] fixup! feat: Add option to scan and register HTML anchors --- src/mkdocs_autorefs/references.py | 3 ++- tests/test_references.py | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index d1cb2c3..8dd29a6 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -227,8 +227,9 @@ def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors) -> No # We found an anchor. Record its id if it has one. if anchor_id := el.get("id"): pending_anchors.append(anchor_id) + # If the element has text or a link, it's not an alias. # Non-whitespace text after the element interrupts the chain, aliases can't apply. - if el.tail and el.tail.strip(): + if el.text or el.get("href") or (el.tail and el.tail.strip()): pending_anchors.flush() elif el.tag == "p": diff --git a/tests/test_references.py b/tests/test_references.py index 077bb9b..15617c9 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -257,9 +257,14 @@ def test_register_markdown_anchors() -> None: [](){#alias5} [](){#alias6} Decoy. - ## Heading more + ## Heading more1 [](){#alias7} + [decoy](){#alias8} + [](){#alias9} + ## Heading more2 + + [](){#alias10} """, ), ) @@ -273,4 +278,7 @@ def test_register_markdown_anchors() -> None: "alias5": "page#alias5", "alias6": "page#alias6", "alias7": "page#alias7", + "alias8": "page#alias8", + "alias9": "page#heading-more2", + "alias10": "page#alias10", } From cc7ab04767ba46145a1cd0d9695d9badb32c4bf4 Mon Sep 17 00:00:00 2001 From: Oleh Prypin <oleh@pryp.in> Date: Fri, 23 Feb 2024 17:09:58 +0100 Subject: [PATCH 12/12] fixup! feat: Add option to scan and register HTML anchors --- src/mkdocs_autorefs/plugin.py | 11 +---------- src/mkdocs_autorefs/references.py | 12 +++++++++++- tests/test_references.py | 31 +++++++++++++++++++++++++++++-- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 2fea146..24d2a23 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit -from markdown.extensions.attr_list import AttrListExtension from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin from mkdocs.structure.pages import Page @@ -53,7 +52,6 @@ class AutorefsPlugin(BasePlugin): """ scan_toc: bool = True - scan_anchors: bool = False current_page: str | None = None def __init__(self) -> None: @@ -137,14 +135,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: The modified config. """ log.debug("Adding AutorefsExtension to the list") - for ext in config.markdown_extensions: - if ext == "attr_list" or isinstance(ext, AttrListExtension): - log.debug("Enabling Markdown anchors feature") - scan_anchors = True - break - else: - scan_anchors = False - config["markdown_extensions"].append(AutorefsExtension(plugin=self if scan_anchors else None)) + config["markdown_extensions"].append(AutorefsExtension(self)) return config def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 8dd29a6..590ebbc 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -2,6 +2,7 @@ from __future__ import annotations +import logging import re from html import escape, unescape from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple @@ -19,6 +20,14 @@ from mkdocs_autorefs.plugin import AutorefsPlugin +try: + from mkdocs.plugins import get_plugin_logger + + log = get_plugin_logger(__name__) +except ImportError: + # TODO: remove once support for MkDocs <1.5 is dropped + log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] + AUTO_REF_RE = re.compile( r"<span data-(?P<kind>autorefs-identifier|autorefs-optional|autorefs-optional-hover)=" r'("?)(?P<identifier>[^"<>]*)\2>(?P<title>.*?)</span>', @@ -300,7 +309,8 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me "mkdocs-autorefs", priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor ) - if self.plugin: + if self.plugin is not None and self.plugin.scan_toc and "attr_list" in md.treeprocessors: + log.debug("Enabling Markdown anchors feature") md.treeprocessors.register( AnchorScannerTreeProcessor(self.plugin, md), "mkdocs-autorefs-anchors-scanner", diff --git a/tests/test_references.py b/tests/test_references.py index 15617c9..7a0a603 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -262,7 +262,7 @@ def test_register_markdown_anchors() -> None: [](){#alias7} [decoy](){#alias8} [](){#alias9} - ## Heading more2 + ## Heading more2 {#heading-custom2} [](){#alias10} """, @@ -279,6 +279,33 @@ def test_register_markdown_anchors() -> None: "alias6": "page#alias6", "alias7": "page#alias7", "alias8": "page#alias8", - "alias9": "page#heading-more2", + "alias9": "page#heading-custom2", "alias10": "page#alias10", } + + +def test_register_markdown_anchors_with_admonition() -> None: + """Check that Markdown anchors are registered inside a nested admonition element.""" + plugin = AutorefsPlugin() + md = markdown.Markdown(extensions=["attr_list", "toc", "admonition", AutorefsExtension(plugin)]) + plugin.current_page = "page" + md.convert( + dedent( + """ + [](){#alias1} + !!! note + ## Heading foo + + [](){#alias2} + ## Heading bar + + [](){#alias3} + ## Heading baz + """, + ), + ) + assert plugin._url_map == { + "alias1": "page#alias1", + "alias2": "page#heading-bar", + "alias3": "page#alias3", + }