From 009c49d099b347903f22f4bc7a5bc8e4c678ce5c Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Wed, 3 Jan 2024 12:14:21 -0500 Subject: [PATCH 01/20] Refactor the sphinx inventory so it can be used to retreive the type of some object. --- pydoctor/sphinx.py | 72 +++++++++++++++++++++++++------ pydoctor/test/test_epydoc2stan.py | 12 +++--- pydoctor/test/test_sphinx.py | 22 +++++++--- 3 files changed, 81 insertions(+), 25 deletions(-) diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index 5766934a7..e60402364 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -12,6 +12,7 @@ TYPE_CHECKING, Callable, ContextManager, Dict, IO, Iterable, Mapping, Optional, Tuple ) +import enum import appdirs import attr @@ -34,6 +35,40 @@ def close(self) -> None: ... logger = logging.getLogger(__name__) +class InvObjectType(enum.Enum): + + FUNCTION = ('function', 'func') + METHOD = ('method', 'meth') + MODULE = ('module', 'mod', 'package', 'pack') + CLASS = ('class', 'cls') + ATTRIBUTE = ('attribute', 'attr', 'attrib', 'data') + OBJECT = ('obj', 'object') + + def common_name(self) -> str: + v = self.value + assert isinstance(v, tuple) + return v[0] # type:ignore + + @classmethod + def from_text(cls, txt:str) -> 'InvObjectType': + for e in InvObjectType._member_map_.values(): + if txt in e.value: + assert isinstance(e, InvObjectType) + return e + return InvObjectType.OBJECT + +def _convert_typ(typ:str) -> 'InvObjectType': + if typ.startswith('py:'): + return InvObjectType.from_text(typ[3:].strip(':')) + else: + return InvObjectType.from_text(typ.strip(':')) + +@attr.s(auto_attribs=True) +class InventoryObject: + name:str + base_url:str + location:str + typ:InvObjectType = attr.ib(converter=_convert_typ) class SphinxInventory: """ @@ -47,9 +82,11 @@ def __init__( ): """ @param project_name: Dummy argument to stay compatible with - L{twisted.python._pydoctor}. + L{twisted.python._pydoctor}. This will be used when we fix + U{#609 }. + """ - self._links: Dict[str, Tuple[str, str]] = {} + self._links: Dict[str, InventoryObject] = {} self._logger = logger def error(self, where: str, message: str) -> None: @@ -110,7 +147,7 @@ def _parseInventory( self, base_url: str, payload: str - ) -> Dict[str, Tuple[str, str]]: + ) -> Dict[str, InventoryObject]: """ Parse clear text payload and return a dict with module to link mapping. """ @@ -129,16 +166,24 @@ def _parseInventory( # Non-Python references are ignored. continue - result[name] = (base_url, location) + result[name] = InventoryObject(name=name, typ=typ, + base_url=base_url, location=location) + return result + def getInv(self, name) -> Optional[InventoryObject]: + return self._links.get(name) + def getLink(self, name: str) -> Optional[str]: """ Return link for `name` or None if no link is found. """ - base_url, relative_link = self._links.get(name, (None, None)) - if not relative_link: + invobj = self.getInv(name) + if not invobj: return None + + base_url = invobj.base_url + relative_link = invobj.location # For links ending with $, replace it with full name. if relative_link.endswith('$'): @@ -253,23 +298,24 @@ def _generateLine(self, obj: Documentable) -> str: url = obj.url display = '-' + objtype: InvObjectType if isinstance(obj, model.Module): - domainname = 'module' + objtype = InvObjectType.MODULE elif isinstance(obj, model.Class): - domainname = 'class' + objtype = InvObjectType.CLASS elif isinstance(obj, model.Function): if obj.kind is model.DocumentableKind.FUNCTION: - domainname = 'function' + objtype = InvObjectType.FUNCTION else: - domainname = 'method' + objtype = InvObjectType.METHOD elif isinstance(obj, model.Attribute): - domainname = 'attribute' + objtype = InvObjectType.ATTRIBUTE else: - domainname = 'obj' + objtype = InvObjectType.OBJECT self.error( 'sphinx', "Unknown type %r for %s." % (type(obj), full_name,)) - return f'{full_name} py:{domainname} -1 {url} {display}\n' + return f'{full_name} py:{objtype.common_name()} -1 {url} {display}\n' USER_INTERSPHINX_CACHE = appdirs.user_cache_dir("pydoctor") diff --git a/pydoctor/test/test_epydoc2stan.py b/pydoctor/test/test_epydoc2stan.py index 828edccfb..c0b15a3c1 100644 --- a/pydoctor/test/test_epydoc2stan.py +++ b/pydoctor/test/test_epydoc2stan.py @@ -9,7 +9,7 @@ from pydoctor.epydoc.markup import get_supported_docformats from pydoctor.stanutils import flatten, flatten_text from pydoctor.epydoc.markup.epytext import ParsedEpytextDocstring -from pydoctor.sphinx import SphinxInventory +from pydoctor.sphinx import SphinxInventory, InventoryObject from pydoctor.test.test_astbuilder import fromText, unwrap from pydoctor.test import CapSys, NotFoundLinker from pydoctor.templatewriter.search import stem_identifier @@ -168,7 +168,7 @@ def func(): system = mod.system inventory = SphinxInventory(system.msg) - inventory._links['external.func'] = ('https://example.net', 'lib.html#func') + inventory._links['external.func'] = InventoryObject(name='external.func', base_url='https://example.net', location='lib.html#func', typ='func') system.intersphinx = inventory html = docstring2html(mod.contents['func']) @@ -1079,7 +1079,7 @@ def test_EpydocLinker_look_for_intersphinx_hit() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['base.module.other'] = ('http://tm.tld', 'some.html') + inventory._links['base.module.other'] = InventoryObject(name='base.module.other', base_url='http://tm.tld', location='some.html', typ='mod') system.intersphinx = inventory target = model.Module(system, 'ignore-name') sut = target.docstring_linker @@ -1095,7 +1095,7 @@ def test_EpydocLinker_adds_intersphinx_link_css_class() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['base.module.other'] = ('http://tm.tld', 'some.html') + inventory._links['base.module.other'] = InventoryObject(name='base.module.other', base_url='http://tm.tld', location='some.html', typ='mod') system.intersphinx = inventory target = model.Module(system, 'ignore-name') sut = target.docstring_linker @@ -1116,7 +1116,7 @@ def test_EpydocLinker_resolve_identifier_xref_intersphinx_absolute_id() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['base.module.other'] = ('http://tm.tld', 'some.html') + inventory._links['base.module.other'] = InventoryObject(name='base.module.other', base_url='http://tm.tld', location='some.html', typ='mod') system.intersphinx = inventory target = model.Module(system, 'ignore-name') sut = target.docstring_linker @@ -1136,7 +1136,7 @@ def test_EpydocLinker_resolve_identifier_xref_intersphinx_relative_id() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['ext_package.ext_module'] = ('http://tm.tld', 'some.html') + inventory._links['ext_package.ext_module'] = InventoryObject(name='ext_package.ext_module', base_url='http://tm.tld', location='some.html', typ='mod') system.intersphinx = inventory target = model.Module(system, 'ignore-name') # Here we set up the target module as it would have this import. diff --git a/pydoctor/test/test_sphinx.py b/pydoctor/test/test_sphinx.py index d71e0caf5..5a00ca03a 100644 --- a/pydoctor/test/test_sphinx.py +++ b/pydoctor/test/test_sphinx.py @@ -334,7 +334,7 @@ def test_getLink_found(inv_reader_nolog: sphinx.SphinxInventory) -> None: Return the link from internal state. """ - inv_reader_nolog._links['some.name'] = ('http://base.tld', 'some/url.php') + inv_reader_nolog._links['some.name'] = sphinx.InventoryObject(name='some.name', base_url='http://base.tld', location='some/url.php', typ='mod') assert 'http://base.tld/some/url.php' == inv_reader_nolog.getLink('some.name') @@ -344,7 +344,7 @@ def test_getLink_self_anchor(inv_reader_nolog: sphinx.SphinxInventory) -> None: Return the link with anchor as target name when link end with $. """ - inv_reader_nolog._links['some.name'] = ('http://base.tld', 'some/url.php#$') + inv_reader_nolog._links['some.name'] = sphinx.InventoryObject(name='some.name', base_url='http://base.tld', location='some/url.php#$', typ='mod') assert 'http://base.tld/some/url.php#some.name' == inv_reader_nolog.getLink('some.name') @@ -421,7 +421,7 @@ def test_parseInventory_single_line(inv_reader_nolog: sphinx.SphinxInventory) -> result = inv_reader_nolog._parseInventory( 'http://base.tld', 'some.attr py:attr -1 some.html De scription') - assert {'some.attr': ('http://base.tld', 'some.html')} == result + assert {'some.attr': sphinx.InventoryObject(name='some.attr', base_url='http://base.tld', location='some.html', typ='py:attr')} == result def test_parseInventory_spaces() -> None: @@ -471,8 +471,8 @@ def test_parseInventory_invalid_lines(inv_reader: InvReader) -> None: result = inv_reader._parseInventory(base_url, content) assert { - 'good.attr': (base_url, 'some.html'), - 'good.again': (base_url, 'again.html'), + 'good.attr': sphinx.InventoryObject(name='good.attr', base_url='http://tm.tld', location='some.html', typ='py:attribute'), + 'good.again': sphinx.InventoryObject(name='good.again', base_url='http://tm.tld', location='again.html', typ='py:module'), } == result assert [ ( @@ -505,7 +505,7 @@ def test_parseInventory_type_filter(inv_reader: InvReader) -> None: result = inv_reader._parseInventory(base_url, content) assert { - 'dict': (base_url, 'library/stdtypes.html#$'), + 'dict': sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='py:class'), } == result assert [] == inv_reader._logger.messages @@ -757,3 +757,13 @@ def test_prepareCache( if clearCache: assert not cacheDirectory.exists() + +def test_inv_object_typ() -> None: + obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='py:class') + assert obj.typ is sphinx.InvObjectType.CLASS + obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='py:class:') + assert obj.typ is sphinx.InvObjectType.CLASS + obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='cls') + assert obj.typ is sphinx.InvObjectType.CLASS + obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='class') + assert obj.typ is sphinx.InvObjectType.CLASS \ No newline at end of file From c9cafffd1cd63149deb5ca84244e7623cc499966 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 12 Jan 2024 16:19:02 -0500 Subject: [PATCH 02/20] Initial implementation for supporting any kind of intersphinx references. Also include code to load intersphinx from a local file. Missing a few tests. --- README.rst | 8 + pydoctor/epydoc/markup/__init__.py | 14 +- pydoctor/epydoc/markup/restructuredtext.py | 204 +++++++++++++-- pydoctor/linker.py | 141 +++++++---- pydoctor/model.py | 4 +- pydoctor/node2stan.py | 78 ++++-- pydoctor/options.py | 162 +++++++++++- pydoctor/sphinx.py | 276 ++++++++++++++------- pydoctor/test/__init__.py | 4 +- pydoctor/test/test_epydoc2stan.py | 57 ++++- pydoctor/test/test_model.py | 8 +- pydoctor/test/test_options.py | 68 ++++- pydoctor/test/test_sphinx.py | 150 ++++++++--- 13 files changed, 937 insertions(+), 237 deletions(-) diff --git a/README.rst b/README.rst index 280588043..b2b054189 100644 --- a/README.rst +++ b/README.rst @@ -83,6 +83,14 @@ This is the last major release to support Python 3.7. * `ExtRegistrar.register_post_processor()` now supports a `priority` argument that is an int. Highest priority callables will be called first during post-processing. * Fix too noisy ``--verbose`` mode (suppres some ambiguous annotations warnings). +* Major improvements of the intersphinx integration: + - The ``--intersphinx`` option now supports the following format: ``[INVENTORY_NAME:]URL_OR_PATH[:BASE_URL]``. + Where ``INVENTORY_NAME`` is a an arbitrary name used to filter ``:external:`` references, + ``URL_OR_PATH`` is an URL pointing to a ``objects.inv`` file OR a file path pointing to a local ``.inv`` file, + ``BASE_URL`` is the base for the generated links, it is mandatory if loading the inventory from a file. + - Pydoctor now supports linking to arbitrary intersphinx references with Sphinx role ``:external:``. + - Other common Sphinx reference roles like ``:ref:``, ``:any:``, ``:class:``, ``py:*``, etc are now + properly interpreted (instead of stripping the with a regex). pydoctor 23.9.1 ^^^^^^^^^^^^^^^ diff --git a/pydoctor/epydoc/markup/__init__.py b/pydoctor/epydoc/markup/__init__.py index 3933934fb..40af6bc0e 100644 --- a/pydoctor/epydoc/markup/__init__.py +++ b/pydoctor/epydoc/markup/__init__.py @@ -297,7 +297,11 @@ def link_to(self, target: str, label: "Flattenable") -> Tag: @return: The link, or just the label if the target was not found. """ - def link_xref(self, target: str, label: "Flattenable", lineno: int) -> Tag: + def link_xref(self, target: str, label: "Flattenable", lineno: int, *, + invname: Optional[str] = None, + domain: Optional[str] = None, + reftype: Optional[str] = None, + external: bool = False) -> Tag: """ Format a cross-reference link to a Python identifier. This will resolve the identifier to any reasonable target, @@ -308,6 +312,14 @@ def link_xref(self, target: str, label: "Flattenable", lineno: int) -> Tag: @param label: The label to show for the link. @param lineno: The line number within the docstring at which the crossreference is located. + @param invname: In the case of an intersphinx resolution, filters by + inventory name. + @param domain: In the case of an intersphinx resolution, filters by + domain. + @param reftype: In the case of an intersphinx resolution, filters by + reference type. + @param external: If True, forces the lookup to use intersphinx and + ingnore local names. @return: The link, or just the label if the target was not found. In either case, the returned top-level tag will be C{}. """ diff --git a/pydoctor/epydoc/markup/restructuredtext.py b/pydoctor/epydoc/markup/restructuredtext.py index 9025083ee..baee389e1 100644 --- a/pydoctor/epydoc/markup/restructuredtext.py +++ b/pydoctor/epydoc/markup/restructuredtext.py @@ -39,24 +39,30 @@ the list. """ from __future__ import annotations +from contextlib import contextmanager +from types import ModuleType + __docformat__ = 'epytext en' -from typing import Iterable, List, Optional, Sequence, Set, cast -import re -from docutils import nodes +from typing import Any, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, cast +from docutils import nodes +from docutils.utils import SystemMessage from docutils.core import publish_string from docutils.writers import Writer -from docutils.parsers.rst.directives.admonitions import BaseAdmonition # type: ignore[import] +from docutils.parsers.rst.directives.admonitions import BaseAdmonition # type: ignore[import-untyped] from docutils.readers.standalone import Reader as StandaloneReader from docutils.utils import Reporter from docutils.parsers.rst import Directive, directives from docutils.transforms import Transform, frontmatter +from docutils.parsers.rst import roles +import docutils.parsers.rst.states from pydoctor.epydoc.markup import Field, ParseError, ParsedDocstring, ParserFunction from pydoctor.epydoc.markup.plaintext import ParsedPlaintextDocstring -from pydoctor.epydoc.docutils import new_document +from pydoctor.epydoc.docutils import new_document, set_node_attributes from pydoctor.model import Documentable +from pydoctor.sphinx import parse_domain_reftype #: A dictionary whose keys are the "consolidated fields" that are #: recognized by epydoc; and whose values are the corresponding epydoc @@ -93,18 +99,11 @@ def parse_docstring(docstring: str, """ writer = _DocumentPseudoWriter() reader = _EpydocReader(errors) # Outputs errors to the list. - - # Credits: mhils - Maximilian Hils from the pdoc repository https://github.com/mitmproxy/pdoc - # Strip Sphinx interpreted text roles for code references: :obj:`foo` -> `foo` - docstring = re.sub( - r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):", "", docstring - ) - - publish_string(docstring, writer=writer, reader=reader, - settings_overrides={'report_level':10000, - 'halt_level':10000, - 'warning_stream':None}) - + with patch_docutils_role_function(): + publish_string(docstring, writer=writer, reader=reader, + settings_overrides={'report_level':10000, + 'halt_level':10000, + 'warning_stream':None}) document = writer.document visitor = _SplitFieldsTranslator(document, errors) document.walk(visitor) @@ -498,6 +497,177 @@ class DocutilsAndSphinxCodeBlockAdapter(PythonCodeDirective): 'caption': directives.unchanged_required, } +def parse_external(name: str) -> Tuple[Optional[str], Optional[str]]: + """ + Returns a tuple: (inventory name, role) + + @raises ValueError: If the format is invalid. + """ + assert name.startswith('external'), name + # either we have an explicit inventory name, i.e, + # :external+inv:reftype: or + # :external+inv:domain:reftype: + # or we look in all inventories, i.e., + # :external:reftype: or + # :external:domain:reftype: or + # :external: + suffix = name[9:] + if len(name) > len('external'): + if name[8] == '+': + parts = suffix.split(':', 1) + if len(parts) == 2: + inv_name, suffix = parts + if inv_name and suffix: + return inv_name, suffix + elif len(parts) == 1: + inv_name, = parts + if inv_name: + return inv_name, None + elif name[8] == ':' and suffix: + return None, suffix + msg = f'Malformed :external: role name: {name!r}' + raise ValueError(msg) + return None, None + +# roles._RoleFn +def link_role(role: str, rawtext: str, text: str, lineno: int, + inliner: docutils.parsers.rst.states.Inliner, + options:Any=None, content:Any=None) -> 'tuple[list[nodes.Node], list[nodes.Node]]': + + # See https://www.sphinx-doc.org/en/master/usage/referencing.htm + # and https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html + invname: Optional[str] = None + domain: Optional[str] = None + reftype: Optional[str] = None + external: bool = False + if role.startswith('external'): + try: + invname, suffix = parse_external(role) + if suffix is not None: + domain, reftype = parse_domain_reftype(suffix) + except ValueError as e: + print(f'{lineno}: {e}') + return [], [] # TODO: report the error + else: + external = True + elif role: + try: + domain, reftype = parse_domain_reftype(role) + except ValueError as e: + print(f'{lineno}: {e}') + return [], [] + + if reftype in SUPPORTED_DOMAINS and domain is None: + print(f'{lineno}: Malformed role name, domain is missing reference type') + return [], [] + + if reftype in SUPPORTED_DEFAULT_REFTYPES: + reftype = None + + if reftype in SUPPORTED_EXTERNAL_STD_REFTYPES and domain is None: + external = True + domain = 'std' + + if domain in SUPPORTED_EXTERNAL_DOMAINS: + external = True + + text_node = nodes.Text(text) + node = nodes.title_reference(rawtext, '', + invname=invname, + domain=domain, + reftype=reftype, + external=external, + lineno=lineno) + set_node_attributes(node, children=[text_node], document=inliner.document) # type: ignore + return [node], [] + +SUPPORTED_LOCAL_DOMAINS = set(( + # When using a domain specification, one must also give the reftype. + # links like :py:`something` will trigger an error. + # python domain references + 'py', +)) + +SUPPORTED_EXTERNAL_DOMAINS = set(( + # domain of other languages, complement this list as necessary + 'c', 'cpp', 'js', 'rust', + 'erl', 'php', 'rb', 'go', + # the standard domain + 'std', +)) + +SUPPORTED_DOMAINS = SUPPORTED_LOCAL_DOMAINS | SUPPORTED_EXTERNAL_DOMAINS + +SUPPORTED_PY_REFTYPES = set(( + # Specific objects types in the 'py' domains. + 'mod', 'module', + 'func', 'function', + 'meth', 'method', + 'data', + 'const', 'constant', + 'class', 'cls', + 'attr', 'attrib', 'attribute', + 'exc', 'exception', + # py:obj doesn't exists in cpython sphinx docs, + # so it's not listed here: it's listed down there. +)) + +SUPPORTED_EXTERNAL_STD_REFTYPES = set(( + # Narrative documentation refs and other standard domain + # present in cpython documentation. These roles are always + # implicitely external. Stuff not explicitely listed here + # might still be linked to with an :external: role. + # These reftypes also implicitely belong to the 'std' domain. + 'doc', 'cmdoption', 'option', 'envvar', + 'label', 'opcode', 'term', 'token' +)) + +SUPPORTED_DEFAULT_REFTYPES = set(( + # equivalent to None. + 'ref', 'any', 'obj', 'object', +)) + +ALL_SUPPORTED_ROLES = set(( + # external references + 'external', + *SUPPORTED_DOMAINS, + *SUPPORTED_PY_REFTYPES, + *SUPPORTED_EXTERNAL_STD_REFTYPES, + *SUPPORTED_DEFAULT_REFTYPES + )) + +@contextmanager +def patch_docutils_role_function() -> Iterator[None]: + r""" + Like sphinx, we are patching the L{docutils.parsers.rst.roles.role} function. + This function is a factory for role handlers functions. In order to handle any kind + of roles names like C{:external+python:doc:`something`} (the role here is C{external+python:doc}, + we need to patch this function because Docutils only handles extact matches... + + Tip: To list roles contained in a given inventory, use the following command:: + + python3 -m sphinx.ext.intersphinx https://docs.python.org/3/objects.inv | grep -v '^\s' + + """ + + old_role = roles.role + + def new_role(role_name: str, language_module: ModuleType, + lineno: int, reporter: Reporter) -> 'tuple[nodes._RoleFn, list[SystemMessage]]': + + if role_name in ALL_SUPPORTED_ROLES or any( + role_name.startswith(f'{n}:') for n in ALL_SUPPORTED_ROLES) or \ + role_name.startswith('external+'): # 'external+' is a special case + return link_role, [] + return old_role(role_name, language_module, lineno, reporter) # type: ignore + + roles.role = new_role + yield + roles.role = old_role + +# https://docutils.sourceforge.io/docs/ref/rst/directives.html#default-role +roles.register_local_role('default-role', link_role) + directives.register_directive('python', PythonCodeDirective) directives.register_directive('code', DocutilsAndSphinxCodeBlockAdapter) directives.register_directive('code-block', DocutilsAndSphinxCodeBlockAdapter) diff --git a/pydoctor/linker.py b/pydoctor/linker.py index f403e64dc..595077a31 100644 --- a/pydoctor/linker.py +++ b/pydoctor/linker.py @@ -6,7 +6,7 @@ import contextlib from twisted.web.template import Tag, tags from typing import ( - TYPE_CHECKING, Iterable, Iterator, + TYPE_CHECKING, Any, Iterable, Iterator, Optional, Union ) @@ -123,13 +123,18 @@ def look_for_name(self, 'resolve_identifier_xref', lineno) return None - def look_for_intersphinx(self, name: str) -> Optional[str]: + def look_for_intersphinx(self, name: str, *, + invname: Optional[str] = None, + domain: Optional[str] = None, + reftype: Optional[str] = None) -> Optional[str]: """ Return link for `name` based on intersphinx inventory. Return None if link is not found. """ - return self.obj.system.intersphinx.getLink(name) + return self.obj.system.intersphinx.getLink(name, + invname=invname, domain=domain, + reftype=reftype) def link_to(self, identifier: str, label: "Flattenable") -> Tag: fullID = self.obj.expandName(identifier) @@ -145,10 +150,16 @@ def link_to(self, identifier: str, label: "Flattenable") -> Tag: link = tags.transparent(label) return link - def link_xref(self, target: str, label: "Flattenable", lineno: int) -> Tag: + def link_xref(self, target: str, label: "Flattenable", lineno: int, *, + invname: Optional[str] = None, + domain: Optional[str] = None, + reftype: Optional[str] = None, + external: bool = False) -> Tag: xref: "Flattenable" try: - resolved = self._resolve_identifier_xref(target, lineno) + resolved = self._resolve_identifier_xref(target, lineno, + invname=invname, domain=domain, + reftype=reftype, external=external) except LookupError: xref = label else: @@ -161,7 +172,12 @@ def link_xref(self, target: str, label: "Flattenable", lineno: int) -> Tag: def _resolve_identifier_xref(self, identifier: str, - lineno: int + lineno: int, + *, + invname: Optional[str] = None, + domain: Optional[str] = None, + reftype: Optional[str] = None, + external: bool = False ) -> Union[str, 'model.Documentable']: """ Resolve a crossreference link to a Python identifier. @@ -172,73 +188,96 @@ def _resolve_identifier_xref(self, should be linked to. @param lineno: The line number within the docstring at which the crossreference is located. + @param invname: Filters by inventory name, implies external=True. + @param domain: Filters by domain. + @param reftype: Filters by reference type. + @param external: Forces the lookup to happen with interspinx. @return: The referenced object within our system, or the URL of an external target (found via Intersphinx). @raise LookupError: If C{identifier} could not be resolved. """ + if invname: assert external + might_be_local_python_ref = not external and domain in ('py', None) and reftype not in ('doc', ) # There is a lot of DWIM here. Look for a global match first, # to reduce the chance of a false positive. # Check if 'identifier' is the fullName of an object. - target = self.obj.system.objForFullName(identifier) - if target is not None: - return target + if might_be_local_python_ref: + target = self.obj.system.objForFullName(identifier) + if target is not None: + return target # Check if the fullID exists in an intersphinx inventory. fullID = self.obj.expandName(identifier) - target_url = self.look_for_intersphinx(fullID) + target_url = self.look_for_intersphinx(fullID, invname=invname, + domain=domain, reftype=reftype) + intersphinx_target_url_unfiltered = self.look_for_intersphinx(fullID) if not target_url: # FIXME: https://github.com/twisted/pydoctor/issues/125 # expandName is unreliable so in the case fullID fails, we # try our luck with 'identifier'. - target_url = self.look_for_intersphinx(identifier) + target_url = self.look_for_intersphinx(identifier, invname=invname, + domain=domain, reftype=reftype) + if not intersphinx_target_url_unfiltered: + intersphinx_target_url_unfiltered = self.look_for_intersphinx(identifier) if target_url: return target_url - - # Since there was no global match, go look for the name in the - # context where it was used. - - # Check if 'identifier' refers to an object by Python name resolution - # in our context. Walk up the object tree and see if 'identifier' refers - # to an object by Python name resolution in each context. - src: Optional['model.Documentable'] = self.obj - while src is not None: - target = src.resolveName(identifier) - if target is not None: - return target - src = src.parent - - # Walk up the object tree again and see if 'identifier' refers to an - # object in an "uncle" object. (So if p.m1 has a class C, the - # docstring for p.m2 can say L{C} to refer to the class in m1). - # If at any level 'identifier' refers to more than one object, complain. - src = self.obj - while src is not None: - target = self.look_for_name(identifier, src.contents.values(), lineno) + + if might_be_local_python_ref: + # Since there was no global match, go look for the name in the + # context where it was used. + + # Check if 'identifier' refers to an object by Python name resolution + # in our context. Walk up the object tree and see if 'identifier' refers + # to an object by Python name resolution in each context. + src: Optional['model.Documentable'] = self.obj + while src is not None: + target = src.resolveName(identifier) + if target is not None: + return target + src = src.parent + + # Walk up the object tree again and see if 'identifier' refers to an + # object in an "uncle" object. (So if p.m1 has a class C, the + # docstring for p.m2 can say L{C} to refer to the class in m1). + # If at any level 'identifier' refers to more than one object, complain. + src = self.obj + while src is not None: + target = self.look_for_name(identifier, src.contents.values(), lineno) + if target is not None: + return target + src = src.parent + + # Examine every module and package in the system and see if 'identifier' + # names an object in each one. Again, if more than one object is + # found, complain. + target = self.look_for_name( + # System.objectsOfType now supports passing the type as string. + identifier, self.obj.system.objectsOfType('pydoctor.model.Module'), lineno) if target is not None: return target - src = src.parent - - # Examine every module and package in the system and see if 'identifier' - # names an object in each one. Again, if more than one object is - # found, complain. - target = self.look_for_name( - # System.objectsOfType now supports passing the type as string. - identifier, self.obj.system.objectsOfType('pydoctor.model.Module'), lineno) - if target is not None: - return target - - message = f'Cannot find link target for "{fullID}"' - if identifier != fullID: - message = f'{message}, resolved from "{identifier}"' - root_idx = fullID.find('.') - if root_idx != -1 and fullID[:root_idx] not in self.obj.system.root_names: - message += ' (you can link to external docs with --intersphinx)' + + message = f'Cannot find link target for "{fullID}"' + if identifier != fullID: + message = f'{message}, resolved from "{identifier}"' + if intersphinx_target_url_unfiltered: + message += f' (your link role filters {intersphinx_target_url_unfiltered!r}, is it by design?)' + else: + root_idx = fullID.find('.') + if root_idx != -1 and fullID[:root_idx] not in self.obj.system.root_names: + message += ' (you can link to external docs with --intersphinx)' + + else: + message = f'Cannot find intersphinx link target for "{fullID}"' + if intersphinx_target_url_unfiltered: + message += f' (your link role filters {intersphinx_target_url_unfiltered!r}, is it by design?)' + if self.reporting_obj: self.reporting_obj.report(message, 'resolve_identifier_xref', lineno) raise LookupError(identifier) + class _AnnotationLinker(DocstringLinker): """ Specialized linker to resolve annotations attached to the given L{Documentable}. @@ -278,9 +317,9 @@ def link_to(self, target: str, label: "Flattenable") -> Tag: else: return self._module_linker.link_to(target, label) - def link_xref(self, target: str, label: "Flattenable", lineno: int) -> Tag: + def link_xref(self, target: str, label: "Flattenable", lineno: int, **kw: Any) -> Tag: with self.switch_context(self._obj): - return self.obj.docstring_linker.link_xref(target, label, lineno) + return self.obj.docstring_linker.link_xref(target, label, lineno, **kw) @contextlib.contextmanager def switch_context(self, ob:Optional['model.Documentable']) -> Iterator[None]: diff --git a/pydoctor/model.py b/pydoctor/model.py index 61f84a2bf..551b95f47 100644 --- a/pydoctor/model.py +++ b/pydoctor/model.py @@ -1445,8 +1445,8 @@ def fetchIntersphinxInventories(self, cache: CacheT) -> None: """ Download and parse intersphinx inventories based on configuration. """ - for url in self.options.intersphinx: - self.intersphinx.update(cache, url) + for i in self.options.intersphinx: + self.intersphinx.update(cache, i) def defaultPostProcess(system:'System') -> None: for cls in system.objectsOfType(Class): diff --git a/pydoctor/node2stan.py b/pydoctor/node2stan.py index bdc3cb543..9bb6bc85d 100644 --- a/pydoctor/node2stan.py +++ b/pydoctor/node2stan.py @@ -5,7 +5,8 @@ import re import optparse -from typing import Any, Callable, ClassVar, Iterable, List, Optional, Union, TYPE_CHECKING +from typing import Any, ClassVar, Iterable, List, Optional, Sequence, Union, TYPE_CHECKING +import attr from docutils.writers import html4css1 from docutils import nodes, frontend, __version_info__ as docutils_version_info @@ -58,10 +59,52 @@ def gettext(node: Union[nodes.Node, List[nodes.Node]]) -> List[str]: _TARGET_RE = re.compile(r'^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') _VALID_IDENTIFIER_RE = re.compile('[^0-9a-zA-Z_]') +@attr.s(auto_attribs=True) +class Reference: + label: Union[str, Sequence[nodes.Node]] + target: str + invname: Optional[str] = None + domain: Optional[str] = None + reftype: Optional[str] = None + external: bool = False + + +def parse_reference(node:nodes.Node) -> Reference: + """ + Split a reference into (label, target). + """ + label: Union[str, Sequence[nodes.Node]] + if 'refuri' in node.attributes: + # Epytext parsed or manually constructed nodes. + label, target = node.children, node.attributes['refuri'] + else: + # RST parsed. + m = _TARGET_RE.match(node.astext()) + if m: + label, target = m.groups() + else: + label = target = node.astext() + # Support linking to functions and methods with parameters + if target.endswith(')'): + # Remove arg lists for functions (e.g., L{_colorize_link()}) + target = re.sub(r'\(.*\)$', '', target) + + return Reference(label, target, + invname=node.attributes.get('invname'), + domain=node.attributes.get('domain'), + reftype=node.attributes.get('reftype'), + external=node.attributes.get('external', False)) + def _valid_identifier(s: str) -> str: """Remove invalid characters to create valid CSS identifiers. """ return _VALID_IDENTIFIER_RE.sub('', s) +def _label2flattenable(label: Union[str, Sequence[nodes.Node]], linker:DocstringLinker) -> "Flattenable": + if not isinstance(label, str): + return node2stan(label, linker) + else: + return label + class HTMLTranslator(html4css1.HTMLTranslator): """ Pydoctor's HTML translator. @@ -100,30 +143,21 @@ def __init__(self, # Handle interpreted text (crossreferences) def visit_title_reference(self, node: nodes.Node) -> None: lineno = get_lineno(node) - self._handle_reference(node, link_func=lambda target, label: self._linker.link_xref(target, label, lineno)) + ref = parse_reference(node) + target = ref.target + label = _label2flattenable(ref.label, self._linker) + link = self._linker.link_xref(target, label, lineno, + invname=ref.invname, domain=ref.domain, + reftype=ref.reftype, external=ref.external) + self.body.append(flatten(link)) + raise nodes.SkipNode() # Handle internal references def visit_obj_reference(self, node: nodes.Node) -> None: - self._handle_reference(node, link_func=self._linker.link_to) - - def _handle_reference(self, node: nodes.Node, link_func: Callable[[str, "Flattenable"], "Flattenable"]) -> None: - label: "Flattenable" - if 'refuri' in node.attributes: - # Epytext parsed or manually constructed nodes. - label, target = node2stan(node.children, self._linker), node.attributes['refuri'] - else: - # RST parsed. - m = _TARGET_RE.match(node.astext()) - if m: - label, target = m.groups() - else: - label = target = node.astext() - - # Support linking to functions and methods with () at the end - if target.endswith('()'): - target = target[:len(target)-2] - - self.body.append(flatten(link_func(target, label))) + ref = parse_reference(node) + target = ref.target + label = _label2flattenable(ref.label, self._linker) + self.body.append(flatten(self._linker.link_to(target, label))) raise nodes.SkipNode() def should_be_compact_paragraph(self, node: nodes.Node) -> bool: diff --git a/pydoctor/options.py b/pydoctor/options.py index 272539dce..ad55f3d37 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -2,6 +2,7 @@ The command-line parsing. """ from __future__ import annotations +import enum import re from typing import Sequence, List, Optional, Type, Tuple, TYPE_CHECKING @@ -16,7 +17,7 @@ from pydoctor import __version__ from pydoctor.themes import get_themes from pydoctor.epydoc.markup import get_supported_docformats -from pydoctor.sphinx import MAX_AGE_HELP, USER_INTERSPHINX_CACHE +from pydoctor.sphinx import MAX_AGE_HELP, USER_INTERSPHINX_CACHE, IntersphinxOption, IntersphinxSource from pydoctor.utils import parse_path, findClassFromDottedName, parse_privacy_tuple, error from pydoctor._configparser import CompositeConfigParser, IniConfigParser, TomlConfigParser, ValidatorParser @@ -174,7 +175,7 @@ def get_parser() -> ArgumentParser: parser.add_argument( '--intersphinx', action='append', dest='intersphinx', - metavar='URL_TO_OBJECTS.INV', default=[], + metavar='[INVENTORY_NAME:]URL_OR_PATH[:BASE_URL]', default=[], help=( "Use Sphinx objects inventory to generate links to external " "documentation. Can be repeated.")) @@ -287,6 +288,161 @@ def _convert_htmlwriter(s: str) -> Type['IWriter']: def _convert_privacy(l: List[str]) -> List[Tuple['model.PrivacyClass', str]]: return list(map(functools.partial(parse_privacy_tuple, opt='--privacy'), l)) +def _intersphinx_source(url_or_path:str) -> IntersphinxSource: + """ + Infer the kind source this string refers to. + + """ + if url_or_path.startswith(HTTP_SCHEMES): + return IntersphinxSource.URL + if not url_or_path.endswith('.inv'): + return IntersphinxSource.URL + return IntersphinxSource.FILE + +def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: + """ + Given a base url OR an url to objects.inv file. + Returns a tuple: (URL_TO_OBJECTS.INV, BASE_URL) + """ + if url.endswith('.inv'): + parts = url.rsplit('/', 1) + if len(parts) != 2: + raise ValueError(f'Failed to parse remote base url for {url}') + base_url = parts[0] + else: + # The URL is the base url, so simply add 'objects.inv' at the end. + base_url = url + if not url.endswith('/'): + url += '/' + else: + base_url = base_url[:-1] + url += 'objects.inv' + return url, base_url + +# So these are the cases that we should handle: +# --intersphinx=http://something.org/ +# --intersphinx=something.org +# --intersphinx=http://something.org/objects.inv +# --intersphinx=http://cnd.abc.something.org/objects.inv:http://something.org/ +# --intersphinx=inventories/pack.inv:http://something.org/ + +# --intersphinx=file.inv:http://something.org/ +# ok so this one and the next one are hard to differenciate... +# we have to require the file to have the extension '.inv' but what about a project +# called 'project.inv' ? So for these cases we should check if the file exists or not. + +# --intersphinx=pydoctor:http://something.org/ +# --intersphinx=pydoctor:http://something.org/objects.inv +# --intersphinx=pydoctor:http://cnd.abc.something.org/objects.inv:http://something.org/ +# --intersphinx=pydoctor:inventories/pack.inv:http://something.org/ +# --intersphinx=pydoctor:c:/data/inventories/pack.inv:http://something.org/ + +def _split_intersphinx_parts(s:str) -> List[str]: + """ + This replies on the fact the filename does not contain a colon. + """ + parts = [''] + part_nb = 0 + # I did not really care about the time complexity of this function + # but it could probably be better avoiding the slicing situation. + for i, c in enumerate(s): + if c == ':': + # It might be a separator. + if s[i:i+3] == '://': + # Not a separator, more like http:// + pass + elif _RE_DRIVE_LIKE.match(s[i-2:i+2]): + # Still not a separator, a windows drive :c:/ + pass + elif len(parts) == 3: + raise ValueError(f'Malformed --intersphinx option, too many parts, beware that colons in filenames are not supported') + elif not parts[part_nb]: + raise ValueError(f'Malformed --intersphinx option, two consecutive colons is not valid') + else: + parts.append('') + part_nb += 1 + continue + parts[part_nb] += c + + return parts + +_RE_DRIVE_LIKE = re.compile(r':[a-z]:(\\|\/)', re.IGNORECASE) +HTTP_SCHEMES = ('http://', 'https://') +def _parse_intersphinx(s:str) -> IntersphinxOption: + """ + Given a string like:: + + [INVENTORY_NAME:]URL_OR_FILEPATH_TO_OBJECTS.INV[:BASE_URL] + + Returns a L{IntersphinxOption} instance. + """ + try: + + parts = _split_intersphinx_parts(s) + + nb_parts = len(parts) + + if nb_parts == 1: + # Just URL_TO_OBJECTS.INV, it cannot be a filepath because a filepath must be + # followed by a base URL. + objects_inv_url, base_url = _object_inv_url_and_base_url(*parts) + return IntersphinxOption( + invname=None, + source=IntersphinxSource.URL, + url_or_path=objects_inv_url, + base_url=base_url + ) + + elif nb_parts == 2: + # If there is only one ':', the first part might + # been either the invname or the url or file path. + p1, p2 = parts + if p1.endswith('.inv') or p1.startswith(HTTP_SCHEMES): + # So at this point we have: URL_OR_FILEPATH_TO_OBJECTS.INV:BASE_URL + invname, url_or_path, base_url = None, p1, p2 + _, base_url = _object_inv_url_and_base_url(base_url) + source = _intersphinx_source(url_or_path) + if source is IntersphinxSource.URL: + url_or_path, _ = _object_inv_url_and_base_url(url_or_path) + else: + # At this point we have: INVENTORY_NAME:URL_TO_OBJECTS.INV + invname, url_or_path = p1, p2 + url_or_path, base_url = _object_inv_url_and_base_url(url_or_path) + source = IntersphinxSource.URL + + return IntersphinxOption( + invname=invname, + source=source, + url_or_path=url_or_path, + base_url=base_url + ) + + elif nb_parts == 3: + # we have INVENTORY_NAME:URL_OR_FILEPATH_TO_OBJECTS.INV:BASE_URL + invname, url_or_path, base_url = parts + source = _intersphinx_source(url_or_path) + if source is IntersphinxSource.URL: + url_or_path, _ = _object_inv_url_and_base_url(url_or_path) + _, base_url = _object_inv_url_and_base_url(base_url) + return IntersphinxOption( + invname=invname, + source=source, + url_or_path=url_or_path, + base_url=base_url + ) + else: + assert False + + except ValueError as e: + error(str(e)) + +def _convert_intersphinx(l: List[str]) -> List[IntersphinxOption]: + """ + Returns list of tuples: (INVENTORY_NAME, URL_OR_FILEPATH_TO_OBJECTS.INV, BASE_URL) + """ + return list(map(_parse_intersphinx, l)) + + _RECOGNIZED_SOURCE_HREF = { # Sourceforge '{mod_source_href}#l{lineno}': re.compile(r'(^https?:\/\/sourceforge\.net\/)'), @@ -355,7 +511,7 @@ class Options: verbosity: int = attr.ib() quietness: int = attr.ib() introspect_c_modules: bool = attr.ib() - intersphinx: List[str] = attr.ib() + intersphinx: List[IntersphinxOption] = attr.ib(converter=_convert_intersphinx) enable_intersphinx_cache: bool = attr.ib() intersphinx_cache_path: str = attr.ib() clear_intersphinx_cache: bool = attr.ib() diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index e60402364..66e5cb97a 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -2,17 +2,19 @@ Support for Sphinx compatibility. """ from __future__ import annotations +from collections import defaultdict +import enum import logging import os +from pathlib import Path import shutil import textwrap import zlib from typing import ( - TYPE_CHECKING, Callable, ContextManager, Dict, IO, Iterable, Mapping, - Optional, Tuple + TYPE_CHECKING, Callable, ContextManager, Dict, IO, Iterable, List, Mapping, + Optional, Set, Tuple ) -import enum import appdirs import attr @@ -32,87 +34,136 @@ def close(self) -> None: ... Documentable = object CacheT = object - logger = logging.getLogger(__name__) -class InvObjectType(enum.Enum): - FUNCTION = ('function', 'func') - METHOD = ('method', 'meth') - MODULE = ('module', 'mod', 'package', 'pack') - CLASS = ('class', 'cls') - ATTRIBUTE = ('attribute', 'attr', 'attrib', 'data') - OBJECT = ('obj', 'object') +class IntersphinxSource(enum.Enum): + FILE = enum.auto() + URL = enum.auto() - def common_name(self) -> str: - v = self.value - assert isinstance(v, tuple) - return v[0] # type:ignore +@attr.s(auto_attribs=True) +class IntersphinxOption: + """ + Represent a single, parsed C{--intersphinx} option. + """ + invname: Optional[str] + source: IntersphinxSource + url_or_path: str + base_url: str - @classmethod - def from_text(cls, txt:str) -> 'InvObjectType': - for e in InvObjectType._member_map_.values(): - if txt in e.value: - assert isinstance(e, InvObjectType) - return e - return InvObjectType.OBJECT - -def _convert_typ(typ:str) -> 'InvObjectType': - if typ.startswith('py:'): - return InvObjectType.from_text(typ[3:].strip(':')) - else: - return InvObjectType.from_text(typ.strip(':')) +def parse_domain_reftype(name: str) -> Tuple[Optional[str], str]: + """ + Given a string like C{class} or C{py:class} or C{rst:directive:option}, + returns a tuple: (domain, reftype). + The reftype is normalized with L{normalize_reftype}. + """ + names = name.split(':', maxsplit=1) + if len(names) == 1: # reftype + domain, reftype = (None, *names) + else: # domain:reftype + domain, reftype = names + return (domain, normalize_reftype(reftype)) + +def normalize_reftype(reftype:str) -> str: + """ + Some reftype can be written in several manners. I.e 'cls' to 'class'. + This function transforms them into their canonical version. + + This is intended to be used for the 'py' domain reftypes. Other kind + of reftypes are returned as is. + """ + return { + + 'cls': 'class', + 'function': 'func', + 'method': 'meth', + 'exception': 'exc', + 'attribute': 'attr', + 'attrib': 'attr', + 'constant': 'const', + 'module': 'mod', + 'object': 'obj', + + }.get(reftype, reftype) @attr.s(auto_attribs=True) class InventoryObject: - name:str - base_url:str - location:str - typ:InvObjectType = attr.ib(converter=_convert_typ) + invname: str + name: str + base_url: str + location: str + reftype: str + domain: Optional[str] + display: str class SphinxInventory: """ Sphinx inventory handler. """ - def __init__( - self, - logger: Callable[..., None], - project_name: Optional[str] = None - ): - """ - @param project_name: Dummy argument to stay compatible with - L{twisted.python._pydoctor}. This will be used when we fix - U{#609 }. - - """ - self._links: Dict[str, InventoryObject] = {} + def __init__(self, logger: Callable[..., None],): + + self._links: Dict[str, List[InventoryObject]] = defaultdict(list) + self._inventories: Set[str] = set() self._logger = logger def error(self, where: str, message: str) -> None: self._logger(where, message, thresh=-1) + + def message(self, where: str, message: str) -> None: + self._logger(where, message, thresh=1) - def update(self, cache: CacheT, url: str) -> None: + def update(self, cache: CacheT, intersphinx: IntersphinxOption) -> None: """ - Update inventory from URL. + Update inventory from an L{IntersphinxOption} instance. """ - parts = url.rsplit('/', 1) - if len(parts) != 2: - self.error( - 'sphinx', 'Failed to get remote base url for %s' % (url,)) - return - - base_url = parts[0] - - data = cache.get(url) - - if not data: - self.error( - 'sphinx', 'Failed to get object inventory from %s' % (url, )) + url_or_path = intersphinx.url_or_path + invname = intersphinx.invname + base_url = intersphinx.base_url + + if intersphinx.source is IntersphinxSource.URL: + # That's an URL. + data = cache.get(url_or_path) + if not data: + msg = f'Failed to get object inventory from url {url_or_path}' + if not url_or_path.startswith(('http://', 'https://') + ) and url_or_path.startswith(base_url): + msg += ('. To load inventory from a file, ' + 'the BASE_URL part of the intersphinx option is required.') + + self.error('sphinx', msg) + return + else: + assert intersphinx.source is IntersphinxSource.FILE + # That's a file. + try: + data = Path(url_or_path).read_bytes() + except FileNotFoundError as e: + self.error('sphinx', + f'Inventory file {url_or_path} does not exist. ' + 'To load inventory from an URL, please include the http(s) scheme.') + except Exception as e: + self.error('sphinx', + f'Failed to read inventory file {url_or_path}: {e}') + return + + inventory_name = invname or str(hash(url_or_path)) + if inventory_name in self._inventories: + if invname: + self.error('sphinx', + f'Duplicate inventory {invname!r} from {url_or_path}') + else: + self.error('sphinx', + f'Duplicate inventory from {url_or_path}') return - + + self._inventories.add(inventory_name) payload = self._getPayload(base_url, data) - self._links.update(self._parseInventory(base_url, payload)) + invdata = self._parseInventory(base_url, payload, + invname=inventory_name) + # Update links + for k,v in invdata.items(): + self._links[k].extend(v) def _getPayload(self, base_url: str, data: bytes) -> str: """ @@ -146,39 +197,90 @@ def _getPayload(self, base_url: str, data: bytes) -> str: def _parseInventory( self, base_url: str, - payload: str - ) -> Dict[str, InventoryObject]: + payload: str, + invname: str + ) -> Dict[str, List[InventoryObject]]: """ Parse clear text payload and return a dict with module to link mapping. """ - result = {} + + result = defaultdict(list) for line in payload.splitlines(): try: name, typ, prio, location, display = _parseInventoryLine(line) - except ValueError: + domain, reftype = parse_domain_reftype(typ) + except ValueError as e: self.error( 'sphinx', - 'Failed to parse line "%s" for %s' % (line, base_url), + f'Failed to parse line {line!r} for {base_url}: {e}', ) continue - - if not typ.startswith('py:'): - # Non-Python references are ignored. - continue - - result[name] = InventoryObject(name=name, typ=typ, - base_url=base_url, location=location) + + result[name].append(InventoryObject( + invname=invname, + name=name, + base_url=base_url, + location=location, + reftype=reftype, + domain=domain, + display=display)) return result - def getInv(self, name) -> Optional[InventoryObject]: - return self._links.get(name) + def getInv(self, target: str, + invname:Optional[str]=None, + domain:Optional[str]=None, + reftype:Optional[str]=None) -> Optional[InventoryObject]: + """ + Get the inventory object instance matching the criteria. + """ + + if target not in self._links: + return None + + options = self._links[target] + + def _filter(inv: InventoryObject) -> bool: + if invname and inv.invname != invname: + return False + if domain and inv.domain != domain: + return False + if reftype and inv.reftype != reftype: + return False + return True + + # apply filters + options = list(filter(_filter, options)) + + if len(options) == 1: + return options[0] + elif not options: + return None - def getLink(self, name: str) -> Optional[str]: + # We still have several options under consideration... + # If the domain is not specified, then the 'py' domain is assumed. + # This typically happens for regular `links` that exists in several domains + # typically like the standard library 'list' (it exists in the terms and in the standard types). + if domain is None: + domain = 'py' + py_options = list(filter(_filter, options)) + if py_options: + # TODO: We might want to leave a message in the case where several objects + # are still in consideration. But for now, we just pick the last one. + return py_options[-1] + + # Last, we fall back to the last matching object because our old version + # of the inventory would override names as they were parsed. + return options[-1] + + def getLink(self, target: str, + invname:Optional[str]=None, + domain:Optional[str]=None, + reftype:Optional[str]=None) -> Optional[str]: """ - Return link for `name` or None if no link is found. + Return link for ``target`` or None if no link is found. """ - invobj = self.getInv(name) + invobj = self.getInv(target, invname, domain, reftype) if not invobj: return None @@ -187,7 +289,7 @@ def getLink(self, name: str) -> Optional[str]: # For links ending with $, replace it with full name. if relative_link.endswith('$'): - relative_link = relative_link[:-1] + name + relative_link = relative_link[:-1] + target return f'{base_url}/{relative_link}' @@ -298,24 +400,24 @@ def _generateLine(self, obj: Documentable) -> str: url = obj.url display = '-' - objtype: InvObjectType + objtype: str if isinstance(obj, model.Module): - objtype = InvObjectType.MODULE + objtype = 'py:module' elif isinstance(obj, model.Class): - objtype = InvObjectType.CLASS + objtype = 'py:class' elif isinstance(obj, model.Function): if obj.kind is model.DocumentableKind.FUNCTION: - objtype = InvObjectType.FUNCTION + objtype = 'py:function' else: - objtype = InvObjectType.METHOD + objtype = 'py:method' elif isinstance(obj, model.Attribute): - objtype = InvObjectType.ATTRIBUTE + objtype = 'py:attribute' else: - objtype = InvObjectType.OBJECT + objtype = 'py:obj' self.error( 'sphinx', "Unknown type %r for %s." % (type(obj), full_name,)) - return f'{full_name} py:{objtype.common_name()} -1 {url} {display}\n' + return f'{full_name} {objtype} -1 {url} {display}\n' USER_INTERSPHINX_CACHE = appdirs.user_cache_dir("pydoctor") diff --git a/pydoctor/test/__init__.py b/pydoctor/test/__init__.py index 09a9e65b9..1092ee3f1 100644 --- a/pydoctor/test/__init__.py +++ b/pydoctor/test/__init__.py @@ -2,7 +2,7 @@ import contextlib from logging import LogRecord -from typing import Iterable, TYPE_CHECKING, Iterator, Optional, Sequence +from typing import Any, Iterable, TYPE_CHECKING, Iterator, Optional, Sequence import sys import pytest from pathlib import Path @@ -90,7 +90,7 @@ class NotFoundLinker(DocstringLinker): def link_to(self, target: str, label: "Flattenable") -> Tag: return tags.transparent(label) - def link_xref(self, target: str, label: "Flattenable", lineno: int) -> Tag: + def link_xref(self, target: str, label: "Flattenable", lineno: int, **kw:Any) -> Tag: return tags.code(label) @contextlib.contextmanager diff --git a/pydoctor/test/test_epydoc2stan.py b/pydoctor/test/test_epydoc2stan.py index c0b15a3c1..24b1bedc2 100644 --- a/pydoctor/test/test_epydoc2stan.py +++ b/pydoctor/test/test_epydoc2stan.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Type, cast, TYPE_CHECKING +from typing import Any, List, Optional, Type, cast, TYPE_CHECKING import re from pytest import mark, raises @@ -168,7 +168,15 @@ def func(): system = mod.system inventory = SphinxInventory(system.msg) - inventory._links['external.func'] = InventoryObject(name='external.func', base_url='https://example.net', location='lib.html#func', typ='func') + inventory._links['external.func'] = [ + InventoryObject( + invname='xxx', + name='external.func', + base_url='https://example.net', + location='lib.html#func', + domain='py', + reftype='func', + display='-')] system.intersphinx = inventory html = docstring2html(mod.contents['func']) @@ -1079,7 +1087,15 @@ def test_EpydocLinker_look_for_intersphinx_hit() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['base.module.other'] = InventoryObject(name='base.module.other', base_url='http://tm.tld', location='some.html', typ='mod') + inventory._links['base.module.other'] = [ + InventoryObject( + invname='xxx', + name='base.module.other', + base_url='http://tm.tld', + location='some.html', + domain='py', + reftype='mod', + display='-')] system.intersphinx = inventory target = model.Module(system, 'ignore-name') sut = target.docstring_linker @@ -1095,7 +1111,15 @@ def test_EpydocLinker_adds_intersphinx_link_css_class() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['base.module.other'] = InventoryObject(name='base.module.other', base_url='http://tm.tld', location='some.html', typ='mod') + inventory._links['base.module.other'] = [ + InventoryObject( + invname='xxx', + name='base.module.other', + base_url='http://tm.tld', + location='some.html', + domain='py', + reftype='mod', + display='-')] system.intersphinx = inventory target = model.Module(system, 'ignore-name') sut = target.docstring_linker @@ -1116,7 +1140,15 @@ def test_EpydocLinker_resolve_identifier_xref_intersphinx_absolute_id() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['base.module.other'] = InventoryObject(name='base.module.other', base_url='http://tm.tld', location='some.html', typ='mod') + inventory._links['base.module.other'] = [ + InventoryObject( + invname='xxx', + name='base.module.other', + base_url='http://tm.tld', + location='some.html', + domain='py', + reftype='mod', + display='-')] system.intersphinx = inventory target = model.Module(system, 'ignore-name') sut = target.docstring_linker @@ -1136,7 +1168,16 @@ def test_EpydocLinker_resolve_identifier_xref_intersphinx_relative_id() -> None: """ system = model.System() inventory = SphinxInventory(system.msg) - inventory._links['ext_package.ext_module'] = InventoryObject(name='ext_package.ext_module', base_url='http://tm.tld', location='some.html', typ='mod') + inventory._links['ext_package.ext_module'] = [ + InventoryObject( + invname='xxx', + name='ext_package.ext_module', + base_url='http://tm.tld', + location='some.html', + domain='py', + reftype='mod', + display='-', + )] system.intersphinx = inventory target = model.Module(system, 'ignore-name') # Here we set up the target module as it would have this import. @@ -1197,7 +1238,7 @@ class InMemoryInventory: 'socket.socket': 'https://docs.python.org/3/library/socket.html#socket.socket', } - def getLink(self, name: str) -> Optional[str]: + def getLink(self, name: str, **kw:Any) -> Optional[str]: return self.INVENTORY.get(name) def test_EpydocLinker_resolve_identifier_xref_order(capsys: CapSys) -> None: @@ -1418,7 +1459,7 @@ def link_to(self, target: str, label: "Flattenable") -> Tag: self.requests.append(target) return tags.transparent(label) - def link_xref(self, target: str, label: "Flattenable", lineno: int) -> Tag: + def link_xref(self, target: str, label: "Flattenable", lineno: int, **kw:Any) -> Tag: assert False @mark.parametrize('annotation', ( diff --git a/pydoctor/test/test_model.py b/pydoctor/test/test_model.py index 9dc9d4f33..ca0e978f4 100644 --- a/pydoctor/test/test_model.py +++ b/pydoctor/test/test_model.py @@ -147,11 +147,9 @@ def test_fetchIntersphinxInventories_content() -> None: Download and parse intersphinx inventories for each configured intersphix. """ - options = Options.defaults() - options.intersphinx = [ - 'http://sphinx/objects.inv', - 'file:///twisted/index.inv', - ] + options = Options.from_args(['--intersphinx=http://sphinx/objects.inv', + '--intersphinx=file:///twisted/index.inv']) + url_content = { 'http://sphinx/objects.inv': zlib.compress( b'sphinx.module py:module -1 sp.html -'), diff --git a/pydoctor/test/test_options.py b/pydoctor/test/test_options.py index 990552840..da16057c8 100644 --- a/pydoctor/test/test_options.py +++ b/pydoctor/test/test_options.py @@ -5,7 +5,8 @@ from io import StringIO from pydoctor import model -from pydoctor.options import PydoctorConfigParser, Options +from pydoctor.options import (PydoctorConfigParser, Options, _split_intersphinx_parts, + _object_inv_url_and_base_url, IntersphinxOption, IntersphinxSource) from pydoctor.test import FixtureRequest, TempPathFactory @@ -168,8 +169,16 @@ def test_config_parsers(project_conf:str, pydoctor_conf:str, tempDir:Path) -> No assert options.verbosity == -1 assert options.warnings_as_errors == True assert options.privacy == [(model.PrivacyClass.HIDDEN, 'pydoctor.test')] - assert options.intersphinx[0] == "https://docs.python.org/3/objects.inv" - assert options.intersphinx[-1] == "https://tristanlatr.github.io/apidocs/docutils/objects.inv" + + assert options.intersphinx[0] == IntersphinxOption( + invname=None, source=IntersphinxSource.URL, + url_or_path='https://docs.python.org/3/objects.inv', + base_url='https://docs.python.org/3') + + assert options.intersphinx[-1] == IntersphinxOption( + invname=None, source=IntersphinxSource.URL, + url_or_path='https://tristanlatr.github.io/apidocs/docutils/objects.inv', + base_url='https://tristanlatr.github.io/apidocs/docutils') def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: config1 = """ @@ -204,21 +213,42 @@ def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: options = Options.defaults() assert options.verbosity == 1 - assert options.intersphinx == ["https://docs.python.org/3/objects.inv",] + assert options.intersphinx == [IntersphinxOption( + invname=None, + source=IntersphinxSource.URL, + url_or_path='https://docs.python.org/3/objects.inv', + base_url='https://docs.python.org/3'),] assert options.projectname == "Hello World!" assert options.projectversion == "2050.4C" options = Options.from_args(['-vv']) assert options.verbosity == 3 - assert options.intersphinx == ["https://docs.python.org/3/objects.inv",] + assert options.intersphinx == [IntersphinxOption( + invname=None, + source=IntersphinxSource.URL, + url_or_path='https://docs.python.org/3/objects.inv', + base_url='https://docs.python.org/3'),] assert options.projectname == "Hello World!" assert options.projectversion == "2050.4C" - options = Options.from_args(['-vv', '--intersphinx=https://twistedmatrix.com/documents/current/api/objects.inv', '--intersphinx=https://urllib3.readthedocs.io/en/latest/objects.inv']) + options = Options.from_args(['-vv', '--intersphinx=https://twistedmatrix.com/documents/current/api/objects.inv', + '--intersphinx=https://urllib3.readthedocs.io/en/latest/objects.inv']) assert options.verbosity == 3 - assert options.intersphinx == ["https://twistedmatrix.com/documents/current/api/objects.inv", "https://urllib3.readthedocs.io/en/latest/objects.inv"] + assert options.intersphinx == [ + IntersphinxOption( + invname=None, + source=IntersphinxSource.URL, + url_or_path='https://twistedmatrix.com/documents/current/api/objects.inv', + base_url='https://twistedmatrix.com/documents/current/api'), + + IntersphinxOption( + invname=None, + source=IntersphinxSource.URL, + url_or_path='https://urllib3.readthedocs.io/en/latest/objects.inv', + base_url='https://urllib3.readthedocs.io/en/latest'), + ] assert options.projectname == "Hello World!" assert options.projectversion == "2050.4C" @@ -259,3 +289,27 @@ def test_validations(tempDir:Path) -> None: assert options.quietness == 0 assert options.warnings_as_errors == False assert options.htmloutput == '1' + +def test_intersphinx_split_on_colon(): + + assert _split_intersphinx_parts('http://something.org/')==['http://something.org/'] + assert _split_intersphinx_parts('something.org')==['something.org'] + assert _split_intersphinx_parts('http://something.org/objects.inv')==['http://something.org/objects.inv'] + assert _split_intersphinx_parts('http://cnd.abc.something.org/objects.inv:http://something.org/')==['http://cnd.abc.something.org/objects.inv', 'http://something.org/'] + assert _split_intersphinx_parts('inventories/pack.inv:http://something.org/')==['inventories/pack.inv', 'http://something.org/'] + assert _split_intersphinx_parts('file.inv:http://something.org/')==['file.inv', 'http://something.org/'] + assert _split_intersphinx_parts('pydoctor:http://something.org/')==['pydoctor', 'http://something.org/'] + assert _split_intersphinx_parts('pydoctor:http://something.org/objects.inv')==['pydoctor', 'http://something.org/objects.inv'] + assert _split_intersphinx_parts('pydoctor:http://cnd.abc.something.org/objects.inv:http://something.org/')==['pydoctor', 'http://cnd.abc.something.org/objects.inv', 'http://something.org/'] + assert _split_intersphinx_parts('pydoctor:inventories/pack.inv:http://something.org/')==['pydoctor', 'inventories/pack.inv', 'http://something.org/'] + assert _split_intersphinx_parts('pydoctor:c:/data/inventories/pack.inv:http://something.org/')==['pydoctor', 'c:/data/inventories/pack.inv', 'http://something.org/'] + + with pytest.raises(ValueError, match='Malformed --intersphinx option, two consecutive colons is not valid'): + _split_intersphinx_parts('pydoctor::') + with pytest.raises(ValueError, match='Malformed --intersphinx option, too many parts'): + _split_intersphinx_parts('pydoctor:a:b:c:d') + +def test_intersphinx_base_url_deductions(): + assert _object_inv_url_and_base_url('http://some.url/api/objects.inv')==('http://some.url/api/objects.inv', 'http://some.url/api') + assert _object_inv_url_and_base_url('http://some.url/api')==('http://some.url/api/objects.inv', 'http://some.url/api') + assert _object_inv_url_and_base_url('http://some.url/api/')==('http://some.url/api/objects.inv', 'http://some.url/api') diff --git a/pydoctor/test/test_sphinx.py b/pydoctor/test/test_sphinx.py index 5a00ca03a..674bab29d 100644 --- a/pydoctor/test/test_sphinx.py +++ b/pydoctor/test/test_sphinx.py @@ -19,7 +19,7 @@ from hypothesis import strategies as st from . import CapLog, FixtureRequest, MonkeyPatch, TempPathFactory -from pydoctor import model, sphinx +from pydoctor import model, sphinx, options @@ -334,7 +334,14 @@ def test_getLink_found(inv_reader_nolog: sphinx.SphinxInventory) -> None: Return the link from internal state. """ - inv_reader_nolog._links['some.name'] = sphinx.InventoryObject(name='some.name', base_url='http://base.tld', location='some/url.php', typ='mod') + inv_reader_nolog._links['some.name'] = [sphinx.InventoryObject( + invname='xxx', + name='some.name', + base_url='http://base.tld', + location='some/url.php', + domain='py', + reftype='mod', + display='-')] assert 'http://base.tld/some/url.php' == inv_reader_nolog.getLink('some.name') @@ -344,7 +351,14 @@ def test_getLink_self_anchor(inv_reader_nolog: sphinx.SphinxInventory) -> None: Return the link with anchor as target name when link end with $. """ - inv_reader_nolog._links['some.name'] = sphinx.InventoryObject(name='some.name', base_url='http://base.tld', location='some/url.php#$', typ='mod') + inv_reader_nolog._links['some.name'] = [sphinx.InventoryObject( + invname='xxx', + name='some.name', + base_url='http://base.tld', + location='some/url.php#$', + reftype='mod', + domain='py', + display='-')] assert 'http://base.tld/some/url.php#some.name' == inv_reader_nolog.getLink('some.name') @@ -365,9 +379,12 @@ def test_update_functional(inv_reader_nolog: sphinx.SphinxInventory) -> None: # The rest of this file is compressed with zlib. """ + zlib.compress(payload) + url = 'http://some.url/api/objects.inv' - inv_reader_nolog.update(cast('sphinx.CacheT', {url: content}), url) + opt = options.Options.from_args([f'--intersphinx={url}']) + + inv_reader_nolog.update(cast('sphinx.CacheT', {url: content}), *opt.intersphinx) assert 'http://some.url/api/module1.html' == inv_reader_nolog.getLink('some.module1') assert 'http://some.url/api/module2.html' == inv_reader_nolog.getLink('other.module2') @@ -375,14 +392,19 @@ def test_update_functional(inv_reader_nolog: sphinx.SphinxInventory) -> None: def test_update_bad_url(inv_reader: InvReader) -> None: """ - Log an error when failing to get base url from url. + Log an error when failing to get objects.inv. """ - inv_reader.update(cast('sphinx.CacheT', {}), 'really.bad.url') + url = 'really.bad.url' + + opt = options.Options.from_args([f'--intersphinx={url}']) + + inv_reader.update(cast('sphinx.CacheT', {}), *opt.intersphinx) assert inv_reader._links == {} expected_log = [( - 'sphinx', 'Failed to get remote base url for really.bad.url', -1 + 'sphinx', ('Failed to get object inventory from url really.bad.url/objects.inv. ' + 'To load inventory from a file, the BASE_URL part of the intersphinx option is required.'), -1 )] assert expected_log == inv_reader._logger.messages @@ -392,12 +414,18 @@ def test_update_fail(inv_reader: InvReader) -> None: Log an error when failing to get content from url. """ - inv_reader.update(cast('sphinx.CacheT', {}), 'http://some.tld/o.inv') + url = 'http://some.tld/o.inv' + + inv_reader.update(cast('sphinx.CacheT', {}), + options.IntersphinxOption( + None, options.IntersphinxSource.URL, + url, 'http://some.tld/', + )) assert inv_reader._links == {} expected_log = [( 'sphinx', - 'Failed to get object inventory from http://some.tld/o.inv', + 'Failed to get object inventory from url http://some.tld/o.inv', -1, )] assert expected_log == inv_reader._logger.messages @@ -408,7 +436,7 @@ def test_parseInventory_empty(inv_reader_nolog: sphinx.SphinxInventory) -> None: Return empty dict for empty input. """ - result = inv_reader_nolog._parseInventory('http://base.tld', '') + result = inv_reader_nolog._parseInventory('http://base.tld', '', 'xxx') assert {} == result @@ -419,9 +447,16 @@ def test_parseInventory_single_line(inv_reader_nolog: sphinx.SphinxInventory) -> """ result = inv_reader_nolog._parseInventory( - 'http://base.tld', 'some.attr py:attr -1 some.html De scription') + 'http://base.tld', 'some.attr py:attr -1 some.html De scription', 'xxx') - assert {'some.attr': sphinx.InventoryObject(name='some.attr', base_url='http://base.tld', location='some.html', typ='py:attr')} == result + assert {'some.attr': [sphinx.InventoryObject( + invname='xxx', + name='some.attr', + base_url='http://base.tld', + location='some.html', + reftype='attr', + domain='py', + display='De scription')]} == result def test_parseInventory_spaces() -> None: @@ -468,31 +503,46 @@ def test_parseInventory_invalid_lines(inv_reader: InvReader) -> None: 'good.again py:module 0 again.html -\n' ) - result = inv_reader._parseInventory(base_url, content) + result = inv_reader._parseInventory(base_url, content, 'xxx') assert { - 'good.attr': sphinx.InventoryObject(name='good.attr', base_url='http://tm.tld', location='some.html', typ='py:attribute'), - 'good.again': sphinx.InventoryObject(name='good.again', base_url='http://tm.tld', location='again.html', typ='py:module'), + 'good.attr': [sphinx.InventoryObject( + invname='xxx', + name='good.attr', + base_url='http://tm.tld', + location='some.html', + reftype='attr', # reftypes are normalized + domain='py', + display='-')], + 'good.again': [sphinx.InventoryObject( + invname='xxx', + name='good.again', + base_url='http://tm.tld', + location='again.html', + reftype='mod', # reftypes are normalized + domain='py', + display='-')], } == result + assert [ ( 'sphinx', - 'Failed to parse line "missing.display.name py:attribute 1 some.html" for http://tm.tld', + 'Failed to parse line \'missing.display.name py:attribute 1 some.html\' for http://tm.tld: Display name column cannot be empty', -1, ), ( 'sphinx', - 'Failed to parse line "bad.attr bad format" for http://tm.tld', + 'Failed to parse line \'bad.attr bad format\' for http://tm.tld: Could not find priority column', -1, ), - ('sphinx', 'Failed to parse line "very.bad" for http://tm.tld', -1), - ('sphinx', 'Failed to parse line "" for http://tm.tld', -1), + ('sphinx', 'Failed to parse line \'very.bad\' for http://tm.tld: Could not find priority column', -1), + ('sphinx', 'Failed to parse line \'\' for http://tm.tld: Could not find priority column', -1), ] == inv_reader._logger.messages -def test_parseInventory_type_filter(inv_reader: InvReader) -> None: +def test_parseInventory_all_kinds(inv_reader: InvReader) -> None: """ - Ignore entries that don't have a 'py:' type field. + All inventory entries are parsed, the one in the 'py' domain as well as others. """ base_url = 'https://docs.python.org/3' @@ -500,13 +550,50 @@ def test_parseInventory_type_filter(inv_reader: InvReader) -> None: 'dict std:label -1 reference/expressions.html#$ Dictionary displays\n' 'dict py:class 1 library/stdtypes.html#$ -\n' 'dict std:2to3fixer 1 library/2to3.html#2to3fixer-$ -\n' + 'py:attribute:value rst:directive:option 1 usage/domains/python.html#directive-option-py-attribute-value -\n' ) - result = inv_reader._parseInventory(base_url, content) + result = inv_reader._parseInventory(base_url, content, 'python') assert { - 'dict': sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='py:class'), + 'dict': [ + sphinx.InventoryObject( + invname='python', + name='dict', + base_url='https://docs.python.org/3', + location='reference/expressions.html#$', + reftype='label', + domain='std', + display='Dictionary displays'), + sphinx.InventoryObject( + invname='python', + name='dict', + base_url='https://docs.python.org/3', + location='library/stdtypes.html#$', + reftype='class', + domain='py', + display='-'), + sphinx.InventoryObject( + invname='python', + name='dict', + base_url='https://docs.python.org/3', + location='library/2to3.html#2to3fixer-$', + reftype='2to3fixer', + domain='std', + display='-'),], + + 'py:attribute:value':[ + sphinx.InventoryObject( + invname='python', + name='py:attribute:value', + base_url='https://docs.python.org/3', + location='usage/domains/python.html#directive-option-py-attribute-value', + reftype='directive:option', + domain='rst', + display='-'), + ], } == result + assert [] == inv_reader._logger.messages @@ -758,12 +845,11 @@ def test_prepareCache( if clearCache: assert not cacheDirectory.exists() -def test_inv_object_typ() -> None: - obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='py:class') - assert obj.typ is sphinx.InvObjectType.CLASS - obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='py:class:') - assert obj.typ is sphinx.InvObjectType.CLASS - obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='cls') - assert obj.typ is sphinx.InvObjectType.CLASS - obj = sphinx.InventoryObject(name='dict', base_url='https://docs.python.org/3', location='library/stdtypes.html#$', typ='class') - assert obj.typ is sphinx.InvObjectType.CLASS \ No newline at end of file +def test_inv_object_reftyp() -> None: + obj = sphinx.InventoryObject(invname='abc', + name='dict', + base_url='https://docs.python.org/3', + location='library/stdtypes.html#$', + domain='py', + reftype='class', + display='-') \ No newline at end of file From 9f4b258345f6d9c3c6ef58f2504e4b4eb1baa360 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Tue, 16 Jan 2024 23:03:04 -0500 Subject: [PATCH 03/20] WIP, we'll need two a new options to avoid confusing behaviours --- pydoctor/options.py | 29 ++++++++++----------- pydoctor/sphinx.py | 47 ++++++++++++++++++++++++----------- pydoctor/test/test_options.py | 14 +++++------ pydoctor/test/test_sphinx.py | 2 +- 4 files changed, 55 insertions(+), 37 deletions(-) diff --git a/pydoctor/options.py b/pydoctor/options.py index ad55f3d37..743c4d837 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -17,7 +17,7 @@ from pydoctor import __version__ from pydoctor.themes import get_themes from pydoctor.epydoc.markup import get_supported_docformats -from pydoctor.sphinx import MAX_AGE_HELP, USER_INTERSPHINX_CACHE, IntersphinxOption, IntersphinxSource +from pydoctor.sphinx import MAX_AGE_HELP, USER_INTERSPHINX_CACHE, IntersphinxOption, URL, FILE from pydoctor.utils import parse_path, findClassFromDottedName, parse_privacy_tuple, error from pydoctor._configparser import CompositeConfigParser, IniConfigParser, TomlConfigParser, ValidatorParser @@ -288,16 +288,16 @@ def _convert_htmlwriter(s: str) -> Type['IWriter']: def _convert_privacy(l: List[str]) -> List[Tuple['model.PrivacyClass', str]]: return list(map(functools.partial(parse_privacy_tuple, opt='--privacy'), l)) -def _intersphinx_source(url_or_path:str) -> IntersphinxSource: - """ - Infer the kind source this string refers to. +# def _intersphinx_source(url_or_path:str) -> object: +# """ +# Infer the kind source this string refers to. - """ - if url_or_path.startswith(HTTP_SCHEMES): - return IntersphinxSource.URL - if not url_or_path.endswith('.inv'): - return IntersphinxSource.URL - return IntersphinxSource.FILE +# """ +# if url_or_path.startswith(HTTP_SCHEMES): +# return URL +# if not url_or_path.endswith('.inv'): +# return URL +# return FILE def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: """ @@ -368,6 +368,7 @@ def _split_intersphinx_parts(s:str) -> List[str]: _RE_DRIVE_LIKE = re.compile(r':[a-z]:(\\|\/)', re.IGNORECASE) HTTP_SCHEMES = ('http://', 'https://') + def _parse_intersphinx(s:str) -> IntersphinxOption: """ Given a string like:: @@ -388,7 +389,7 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: objects_inv_url, base_url = _object_inv_url_and_base_url(*parts) return IntersphinxOption( invname=None, - source=IntersphinxSource.URL, + source=URL, url_or_path=objects_inv_url, base_url=base_url ) @@ -402,13 +403,13 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: invname, url_or_path, base_url = None, p1, p2 _, base_url = _object_inv_url_and_base_url(base_url) source = _intersphinx_source(url_or_path) - if source is IntersphinxSource.URL: + if source is URL: url_or_path, _ = _object_inv_url_and_base_url(url_or_path) else: # At this point we have: INVENTORY_NAME:URL_TO_OBJECTS.INV invname, url_or_path = p1, p2 url_or_path, base_url = _object_inv_url_and_base_url(url_or_path) - source = IntersphinxSource.URL + source = URL return IntersphinxOption( invname=invname, @@ -421,7 +422,7 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: # we have INVENTORY_NAME:URL_OR_FILEPATH_TO_OBJECTS.INV:BASE_URL invname, url_or_path, base_url = parts source = _intersphinx_source(url_or_path) - if source is IntersphinxSource.URL: + if source is URL: url_or_path, _ = _object_inv_url_and_base_url(url_or_path) _, base_url = _object_inv_url_and_base_url(base_url) return IntersphinxOption( diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index 66e5cb97a..23b1d5134 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -36,10 +36,8 @@ def close(self) -> None: ... logger = logging.getLogger(__name__) - -class IntersphinxSource(enum.Enum): - FILE = enum.auto() - URL = enum.auto() +FILE = 1 +URL = 2 @attr.s(auto_attribs=True) class IntersphinxOption: @@ -47,7 +45,7 @@ class IntersphinxOption: Represent a single, parsed C{--intersphinx} option. """ invname: Optional[str] - source: IntersphinxSource + source: object url_or_path: str base_url: str @@ -121,20 +119,13 @@ def update(self, cache: CacheT, intersphinx: IntersphinxOption) -> None: invname = intersphinx.invname base_url = intersphinx.base_url - if intersphinx.source is IntersphinxSource.URL: + if intersphinx.source is URL: # That's an URL. data = cache.get(url_or_path) if not data: - msg = f'Failed to get object inventory from url {url_or_path}' - if not url_or_path.startswith(('http://', 'https://') - ) and url_or_path.startswith(base_url): - msg += ('. To load inventory from a file, ' - 'the BASE_URL part of the intersphinx option is required.') - - self.error('sphinx', msg) + self.error('sphinx', f'Failed to get object inventory from url {url_or_path}') return - else: - assert intersphinx.source is IntersphinxSource.FILE + elif intersphinx.source is FILE: # That's a file. try: data = Path(url_or_path).read_bytes() @@ -142,10 +133,13 @@ def update(self, cache: CacheT, intersphinx: IntersphinxOption) -> None: self.error('sphinx', f'Inventory file {url_or_path} does not exist. ' 'To load inventory from an URL, please include the http(s) scheme.') + return except Exception as e: self.error('sphinx', f'Failed to read inventory file {url_or_path}: {e}') return + else: + assert False inventory_name = invname or str(hash(url_or_path)) if inventory_name in self._inventories: @@ -583,3 +577,26 @@ def prepareCache( maxAgeDictionary, ) return IntersphinxCache(sessionFactory()) + +if __name__ == "__main__": + import sys + from pydoctor.options import Options + + opt = Options.from_args(sys.argv[1:]) + + cache = prepareCache(clearCache=False, enableCache=True, + cachePath=USER_INTERSPHINX_CACHE, + maxAge=MAX_AGE_DEFAULT) + + inv = SphinxInventory(lambda section, msg, **kw: print(msg)) + + for i in opt.intersphinx: + inv.update(cache, i) + + for name, objs in inv._links.items(): + for o in objs: + print(f'{name} ' + f'{(o.domain+":") if o.domain else ""}' + f'{o.reftype} ' + f'{o.location} ' + f'{o.display} ') \ No newline at end of file diff --git a/pydoctor/test/test_options.py b/pydoctor/test/test_options.py index da16057c8..66ac6e0f2 100644 --- a/pydoctor/test/test_options.py +++ b/pydoctor/test/test_options.py @@ -6,7 +6,7 @@ from pydoctor import model from pydoctor.options import (PydoctorConfigParser, Options, _split_intersphinx_parts, - _object_inv_url_and_base_url, IntersphinxOption, IntersphinxSource) + _object_inv_url_and_base_url, IntersphinxOption, FILE, URL) from pydoctor.test import FixtureRequest, TempPathFactory @@ -171,12 +171,12 @@ def test_config_parsers(project_conf:str, pydoctor_conf:str, tempDir:Path) -> No assert options.privacy == [(model.PrivacyClass.HIDDEN, 'pydoctor.test')] assert options.intersphinx[0] == IntersphinxOption( - invname=None, source=IntersphinxSource.URL, + invname=None, source=URL, url_or_path='https://docs.python.org/3/objects.inv', base_url='https://docs.python.org/3') assert options.intersphinx[-1] == IntersphinxOption( - invname=None, source=IntersphinxSource.URL, + invname=None, source=URL, url_or_path='https://tristanlatr.github.io/apidocs/docutils/objects.inv', base_url='https://tristanlatr.github.io/apidocs/docutils') @@ -215,7 +215,7 @@ def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: assert options.verbosity == 1 assert options.intersphinx == [IntersphinxOption( invname=None, - source=IntersphinxSource.URL, + source=URL, url_or_path='https://docs.python.org/3/objects.inv', base_url='https://docs.python.org/3'),] assert options.projectname == "Hello World!" @@ -226,7 +226,7 @@ def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: assert options.verbosity == 3 assert options.intersphinx == [IntersphinxOption( invname=None, - source=IntersphinxSource.URL, + source=URL, url_or_path='https://docs.python.org/3/objects.inv', base_url='https://docs.python.org/3'),] assert options.projectname == "Hello World!" @@ -239,13 +239,13 @@ def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: assert options.intersphinx == [ IntersphinxOption( invname=None, - source=IntersphinxSource.URL, + source=URL, url_or_path='https://twistedmatrix.com/documents/current/api/objects.inv', base_url='https://twistedmatrix.com/documents/current/api'), IntersphinxOption( invname=None, - source=IntersphinxSource.URL, + source=URL, url_or_path='https://urllib3.readthedocs.io/en/latest/objects.inv', base_url='https://urllib3.readthedocs.io/en/latest'), ] diff --git a/pydoctor/test/test_sphinx.py b/pydoctor/test/test_sphinx.py index 674bab29d..f9f153bb4 100644 --- a/pydoctor/test/test_sphinx.py +++ b/pydoctor/test/test_sphinx.py @@ -418,7 +418,7 @@ def test_update_fail(inv_reader: InvReader) -> None: inv_reader.update(cast('sphinx.CacheT', {}), options.IntersphinxOption( - None, options.IntersphinxSource.URL, + None, options.URL, url, 'http://some.tld/', )) From 6a25195d5a2693ee72ad9f591a064a47ee1fb89c Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Tue, 16 Jan 2024 23:03:13 -0500 Subject: [PATCH 04/20] Add readme entries --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index b2b054189..d331f91d8 100644 --- a/README.rst +++ b/README.rst @@ -90,7 +90,7 @@ This is the last major release to support Python 3.7. ``BASE_URL`` is the base for the generated links, it is mandatory if loading the inventory from a file. - Pydoctor now supports linking to arbitrary intersphinx references with Sphinx role ``:external:``. - Other common Sphinx reference roles like ``:ref:``, ``:any:``, ``:class:``, ``py:*``, etc are now - properly interpreted (instead of stripping the with a regex). + properly interpreted (instead of being simply stripping from the docstring). pydoctor 23.9.1 ^^^^^^^^^^^^^^^ From c3692a945ca4ccfcf56d87ba22c2c63c05607bb9 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 10:16:14 -0500 Subject: [PATCH 05/20] Use another option to load intersphinx from local files. This removes the complications --- pydoctor/model.py | 2 + pydoctor/options.py | 140 ++++++++++++++++++++-------------- pydoctor/sphinx.py | 90 +++++++++++----------- pydoctor/test/test_options.py | 54 ++++++------- pydoctor/test/test_sphinx.py | 8 +- 5 files changed, 151 insertions(+), 143 deletions(-) diff --git a/pydoctor/model.py b/pydoctor/model.py index 551b95f47..8ba7a7329 100644 --- a/pydoctor/model.py +++ b/pydoctor/model.py @@ -1447,6 +1447,8 @@ def fetchIntersphinxInventories(self, cache: CacheT) -> None: """ for i in self.options.intersphinx: self.intersphinx.update(cache, i) + for i in self.options.intersphinx_files: + self.intersphinx.update_from_file(i) def defaultPostProcess(system:'System') -> None: for cls in system.objectsOfType(Class): diff --git a/pydoctor/options.py b/pydoctor/options.py index 743c4d837..b3edbc1a2 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -5,7 +5,7 @@ import enum import re -from typing import Sequence, List, Optional, Type, Tuple, TYPE_CHECKING +from typing import Sequence, List, Optional, Type, Tuple, TYPE_CHECKING, TypeAlias import sys import functools from pathlib import Path @@ -17,7 +17,7 @@ from pydoctor import __version__ from pydoctor.themes import get_themes from pydoctor.epydoc.markup import get_supported_docformats -from pydoctor.sphinx import MAX_AGE_HELP, USER_INTERSPHINX_CACHE, IntersphinxOption, URL, FILE +from pydoctor.sphinx import MAX_AGE_HELP, USER_INTERSPHINX_CACHE from pydoctor.utils import parse_path, findClassFromDottedName, parse_privacy_tuple, error from pydoctor._configparser import CompositeConfigParser, IniConfigParser, TomlConfigParser, ValidatorParser @@ -175,11 +175,18 @@ def get_parser() -> ArgumentParser: parser.add_argument( '--intersphinx', action='append', dest='intersphinx', - metavar='[INVENTORY_NAME:]URL_OR_PATH[:BASE_URL]', default=[], + metavar='[INVENTORY_NAME:]URL[:BASE_URL]', default=[], help=( - "Use Sphinx objects inventory to generate links to external " + "Load Sphinx objects inventory from URLs to generate links to external " "documentation. Can be repeated.")) + parser.add_argument( + '--intersphinx-file', action='append', dest='intersphinx_files', + metavar='[INVENTORY_NAME:]PATH:BASE_URL', default=[], + help=( + "Load Sphinx objects inventory from a local file to generate links to external " + "documentation. Can be repeated. Note that the base URL must be given.")) + parser.add_argument( '--enable-intersphinx-cache', dest='enable_intersphinx_cache_deprecated', @@ -288,17 +295,6 @@ def _convert_htmlwriter(s: str) -> Type['IWriter']: def _convert_privacy(l: List[str]) -> List[Tuple['model.PrivacyClass', str]]: return list(map(functools.partial(parse_privacy_tuple, opt='--privacy'), l)) -# def _intersphinx_source(url_or_path:str) -> object: -# """ -# Infer the kind source this string refers to. - -# """ -# if url_or_path.startswith(HTTP_SCHEMES): -# return URL -# if not url_or_path.endswith('.inv'): -# return URL -# return FILE - def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: """ Given a base url OR an url to objects.inv file. @@ -337,9 +333,10 @@ def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: # --intersphinx=pydoctor:inventories/pack.inv:http://something.org/ # --intersphinx=pydoctor:c:/data/inventories/pack.inv:http://something.org/ -def _split_intersphinx_parts(s:str) -> List[str]: +def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: """ This replies on the fact the filename does not contain a colon. + # TODO: find a manner to lift this limitation. """ parts = [''] part_nb = 0 @@ -355,27 +352,63 @@ def _split_intersphinx_parts(s:str) -> List[str]: # Still not a separator, a windows drive :c:/ pass elif len(parts) == 3: - raise ValueError(f'Malformed --intersphinx option, too many parts, beware that colons in filenames are not supported') + raise ValueError(f'Malformed {option} option, too many parts, beware that colons in filenames are not supported') elif not parts[part_nb]: - raise ValueError(f'Malformed --intersphinx option, two consecutive colons is not valid') + raise ValueError(f'Malformed {option} option, two consecutive colons is not valid') else: parts.append('') part_nb += 1 continue parts[part_nb] += c - return parts _RE_DRIVE_LIKE = re.compile(r':[a-z]:(\\|\/)', re.IGNORECASE) -HTTP_SCHEMES = ('http://', 'https://') + +IntersphinxOption: TypeAlias = Tuple[Optional[str], str, str] + +def _parse_intersphinx_file(s:str) -> IntersphinxOption: + """ + Given a string like:: + + [INVENTORY_NAME:]PATH:BASE_URL + + Returns a L{IntersphinxOption} tuple. + """ + try: + parts = _split_intersphinx_parts(s, '--intersphinx-file') + except ValueError as e: + error(str(e)) + + nb_parts = len(parts) + + if nb_parts == 1: + error('Invalid --intesphinx-file option: Missing base URL') + elif nb_parts == 2: + path, base_url = parts + _, base_url = _object_inv_url_and_base_url(base_url) + invname = None + + elif nb_parts == 3: + invname, path, base_url = parts + _, base_url = _object_inv_url_and_base_url(base_url) + + else: + assert False + + if invname and not invname.replace('-', '_').isidentifier(): + error('Invalid --intersphinx option: The inventory name must be an indentifier-like name') + + return invname, path, base_url + + def _parse_intersphinx(s:str) -> IntersphinxOption: """ Given a string like:: - [INVENTORY_NAME:]URL_OR_FILEPATH_TO_OBJECTS.INV[:BASE_URL] + [INVENTORY_NAME:]URL[:BASE_URL] - Returns a L{IntersphinxOption} instance. + Returns a L{IntersphinxOption} tuple. """ try: @@ -386,53 +419,36 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: if nb_parts == 1: # Just URL_TO_OBJECTS.INV, it cannot be a filepath because a filepath must be # followed by a base URL. - objects_inv_url, base_url = _object_inv_url_and_base_url(*parts) - return IntersphinxOption( - invname=None, - source=URL, - url_or_path=objects_inv_url, - base_url=base_url - ) + url, base_url = _object_inv_url_and_base_url(*parts) + invname = None elif nb_parts == 2: # If there is only one ':', the first part might - # been either the invname or the url or file path. + # be either the invname or the url, so we need to use some heuristics p1, p2 = parts - if p1.endswith('.inv') or p1.startswith(HTTP_SCHEMES): - # So at this point we have: URL_OR_FILEPATH_TO_OBJECTS.INV:BASE_URL - invname, url_or_path, base_url = None, p1, p2 + if p1.startswith(('http://', 'https://')): + # So at this point we have: URL:BASE_URL + invname, url, base_url = None, p1, p2 _, base_url = _object_inv_url_and_base_url(base_url) - source = _intersphinx_source(url_or_path) - if source is URL: - url_or_path, _ = _object_inv_url_and_base_url(url_or_path) + url, _ = _object_inv_url_and_base_url(url) else: - # At this point we have: INVENTORY_NAME:URL_TO_OBJECTS.INV - invname, url_or_path = p1, p2 - url_or_path, base_url = _object_inv_url_and_base_url(url_or_path) - source = URL - - return IntersphinxOption( - invname=invname, - source=source, - url_or_path=url_or_path, - base_url=base_url - ) + # At this point we have: INVENTORY_NAME:URL + invname, url = p1, p2 + url, base_url = _object_inv_url_and_base_url(url) elif nb_parts == 3: - # we have INVENTORY_NAME:URL_OR_FILEPATH_TO_OBJECTS.INV:BASE_URL - invname, url_or_path, base_url = parts - source = _intersphinx_source(url_or_path) - if source is URL: - url_or_path, _ = _object_inv_url_and_base_url(url_or_path) - _, base_url = _object_inv_url_and_base_url(base_url) - return IntersphinxOption( - invname=invname, - source=source, - url_or_path=url_or_path, - base_url=base_url - ) + # we have INVENTORY_NAME:URL:BASE_URL + invname, url, base_url = parts + url, _ = _object_inv_url_and_base_url(url) + _, base_url = _object_inv_url_and_base_url(base_url) + else: assert False + + if invname and not invname.replace('-', '_').isidentifier(): + error('Invalid --intersphinx option: The inventory name must be an indentifier-like name') + + return invname, url, base_url except ValueError as e: error(str(e)) @@ -443,6 +459,11 @@ def _convert_intersphinx(l: List[str]) -> List[IntersphinxOption]: """ return list(map(_parse_intersphinx, l)) +def _convert_intersphinx_files(l: List[str]) -> List[IntersphinxOption]: + """ + Returns list of tuples: (INVENTORY_NAME, URL_OR_FILEPATH_TO_OBJECTS.INV, BASE_URL) + """ + return list(map(_parse_intersphinx_file, l)) _RECOGNIZED_SOURCE_HREF = { # Sourceforge @@ -513,6 +534,7 @@ class Options: quietness: int = attr.ib() introspect_c_modules: bool = attr.ib() intersphinx: List[IntersphinxOption] = attr.ib(converter=_convert_intersphinx) + intersphinx_files: List[IntersphinxOption] = attr.ib(converter=_convert_intersphinx_files) enable_intersphinx_cache: bool = attr.ib() intersphinx_cache_path: str = attr.ib() clear_intersphinx_cache: bool = attr.ib() diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index 23b1d5134..02d99cb9c 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -26,6 +26,7 @@ if TYPE_CHECKING: from pydoctor.model import Documentable from typing_extensions import Protocol + from pydoctor.options import IntersphinxOption class CacheT(Protocol): def get(self, url: str) -> Optional[bytes]: ... @@ -36,19 +37,6 @@ def close(self) -> None: ... logger = logging.getLogger(__name__) -FILE = 1 -URL = 2 - -@attr.s(auto_attribs=True) -class IntersphinxOption: - """ - Represent a single, parsed C{--intersphinx} option. - """ - invname: Optional[str] - source: object - url_or_path: str - base_url: str - def parse_domain_reftype(name: str) -> Tuple[Optional[str], str]: """ Given a string like C{class} or C{py:class} or C{rst:directive:option}, @@ -110,48 +98,24 @@ def error(self, where: str, message: str) -> None: def message(self, where: str, message: str) -> None: self._logger(where, message, thresh=1) - - def update(self, cache: CacheT, intersphinx: IntersphinxOption) -> None: - """ - Update inventory from an L{IntersphinxOption} instance. - """ - url_or_path = intersphinx.url_or_path - invname = intersphinx.invname - base_url = intersphinx.base_url - - if intersphinx.source is URL: - # That's an URL. - data = cache.get(url_or_path) - if not data: - self.error('sphinx', f'Failed to get object inventory from url {url_or_path}') - return - elif intersphinx.source is FILE: - # That's a file. - try: - data = Path(url_or_path).read_bytes() - except FileNotFoundError as e: - self.error('sphinx', - f'Inventory file {url_or_path} does not exist. ' - 'To load inventory from an URL, please include the http(s) scheme.') - return - except Exception as e: - self.error('sphinx', - f'Failed to read inventory file {url_or_path}: {e}') - return - else: - assert False - + + def _add_inventory(self, invname:str|None, url_or_path: str) -> str|None: inventory_name = invname or str(hash(url_or_path)) if inventory_name in self._inventories: + # We now trigger warning when the same inventory has been loaded twice. if invname: self.error('sphinx', f'Duplicate inventory {invname!r} from {url_or_path}') else: self.error('sphinx', f'Duplicate inventory from {url_or_path}') - return - + return None self._inventories.add(inventory_name) + return inventory_name + + def _update(self, data: bytes, + base_url:str, + inventory_name: str, ) -> None: payload = self._getPayload(base_url, data) invdata = self._parseInventory(base_url, payload, invname=inventory_name) @@ -159,6 +123,40 @@ def update(self, cache: CacheT, intersphinx: IntersphinxOption) -> None: for k,v in invdata.items(): self._links[k].extend(v) + def update(self, cache: CacheT, intersphinx: IntersphinxOption) -> None: + """ + Update inventory from an L{IntersphinxOption} tuple that is URL-based. + """ + invname, url, base_url = intersphinx + + # That's an URL. + data = cache.get(url) + if not data: + self.error('sphinx', f'Failed to get object inventory from url {url}') + return + + inventory_name = self._add_inventory(invname, url) + if inventory_name: + self._update(data, base_url, inventory_name) + + def update_from_file(self, intersphinx: IntersphinxOption) -> None: + """ + Update inventory from an L{IntersphinxOption} tuple that is File-based. + """ + invname, path, base_url = intersphinx + + # That's a file. + try: + data = Path(path).read_bytes() + except Exception as e: + self.error('sphinx', + f'Failed to read inventory file {path}: {e}') + return + + inventory_name = self._add_inventory(invname, path) + if inventory_name: + self._update(data, base_url, inventory_name) + def _getPayload(self, base_url: str, data: bytes) -> str: """ Parse inventory and return clear text payload without comments. diff --git a/pydoctor/test/test_options.py b/pydoctor/test/test_options.py index 66ac6e0f2..ad921518d 100644 --- a/pydoctor/test/test_options.py +++ b/pydoctor/test/test_options.py @@ -6,7 +6,7 @@ from pydoctor import model from pydoctor.options import (PydoctorConfigParser, Options, _split_intersphinx_parts, - _object_inv_url_and_base_url, IntersphinxOption, FILE, URL) + _object_inv_url_and_base_url ) from pydoctor.test import FixtureRequest, TempPathFactory @@ -170,15 +170,13 @@ def test_config_parsers(project_conf:str, pydoctor_conf:str, tempDir:Path) -> No assert options.warnings_as_errors == True assert options.privacy == [(model.PrivacyClass.HIDDEN, 'pydoctor.test')] - assert options.intersphinx[0] == IntersphinxOption( - invname=None, source=URL, - url_or_path='https://docs.python.org/3/objects.inv', - base_url='https://docs.python.org/3') + assert options.intersphinx[0] == (None, + 'https://docs.python.org/3/objects.inv', + 'https://docs.python.org/3') - assert options.intersphinx[-1] == IntersphinxOption( - invname=None, source=URL, - url_or_path='https://tristanlatr.github.io/apidocs/docutils/objects.inv', - base_url='https://tristanlatr.github.io/apidocs/docutils') + assert options.intersphinx[-1] == (None, + 'https://tristanlatr.github.io/apidocs/docutils/objects.inv', + 'https://tristanlatr.github.io/apidocs/docutils') def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: config1 = """ @@ -213,22 +211,18 @@ def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: options = Options.defaults() assert options.verbosity == 1 - assert options.intersphinx == [IntersphinxOption( - invname=None, - source=URL, - url_or_path='https://docs.python.org/3/objects.inv', - base_url='https://docs.python.org/3'),] + assert options.intersphinx == [(None, + 'https://docs.python.org/3/objects.inv', + 'https://docs.python.org/3'),] assert options.projectname == "Hello World!" assert options.projectversion == "2050.4C" options = Options.from_args(['-vv']) assert options.verbosity == 3 - assert options.intersphinx == [IntersphinxOption( - invname=None, - source=URL, - url_or_path='https://docs.python.org/3/objects.inv', - base_url='https://docs.python.org/3'),] + assert options.intersphinx == [(None, + 'https://docs.python.org/3/objects.inv', + 'https://docs.python.org/3'),] assert options.projectname == "Hello World!" assert options.projectversion == "2050.4C" @@ -237,17 +231,13 @@ def test_repeatable_options_multiple_configs_and_args(tempDir:Path) -> None: assert options.verbosity == 3 assert options.intersphinx == [ - IntersphinxOption( - invname=None, - source=URL, - url_or_path='https://twistedmatrix.com/documents/current/api/objects.inv', - base_url='https://twistedmatrix.com/documents/current/api'), - - IntersphinxOption( - invname=None, - source=URL, - url_or_path='https://urllib3.readthedocs.io/en/latest/objects.inv', - base_url='https://urllib3.readthedocs.io/en/latest'), + (None, + 'https://twistedmatrix.com/documents/current/api/objects.inv', + 'https://twistedmatrix.com/documents/current/api'), + + (None, + 'https://urllib3.readthedocs.io/en/latest/objects.inv', + 'https://urllib3.readthedocs.io/en/latest'), ] assert options.projectname == "Hello World!" assert options.projectversion == "2050.4C" @@ -290,7 +280,7 @@ def test_validations(tempDir:Path) -> None: assert options.warnings_as_errors == False assert options.htmloutput == '1' -def test_intersphinx_split_on_colon(): +def test_intersphinx_split_on_colon() -> None: assert _split_intersphinx_parts('http://something.org/')==['http://something.org/'] assert _split_intersphinx_parts('something.org')==['something.org'] @@ -309,7 +299,7 @@ def test_intersphinx_split_on_colon(): with pytest.raises(ValueError, match='Malformed --intersphinx option, too many parts'): _split_intersphinx_parts('pydoctor:a:b:c:d') -def test_intersphinx_base_url_deductions(): +def test_intersphinx_base_url_deductions() -> None: assert _object_inv_url_and_base_url('http://some.url/api/objects.inv')==('http://some.url/api/objects.inv', 'http://some.url/api') assert _object_inv_url_and_base_url('http://some.url/api')==('http://some.url/api/objects.inv', 'http://some.url/api') assert _object_inv_url_and_base_url('http://some.url/api/')==('http://some.url/api/objects.inv', 'http://some.url/api') diff --git a/pydoctor/test/test_sphinx.py b/pydoctor/test/test_sphinx.py index f9f153bb4..7d5a3727a 100644 --- a/pydoctor/test/test_sphinx.py +++ b/pydoctor/test/test_sphinx.py @@ -403,8 +403,7 @@ def test_update_bad_url(inv_reader: InvReader) -> None: assert inv_reader._links == {} expected_log = [( - 'sphinx', ('Failed to get object inventory from url really.bad.url/objects.inv. ' - 'To load inventory from a file, the BASE_URL part of the intersphinx option is required.'), -1 + 'sphinx', ('Failed to get object inventory from url really.bad.url/objects.inv'), -1 )] assert expected_log == inv_reader._logger.messages @@ -417,10 +416,7 @@ def test_update_fail(inv_reader: InvReader) -> None: url = 'http://some.tld/o.inv' inv_reader.update(cast('sphinx.CacheT', {}), - options.IntersphinxOption( - None, options.URL, - url, 'http://some.tld/', - )) + (None, url, 'http://some.tld/', )) assert inv_reader._links == {} expected_log = [( From 323a98ab76c8b0b87b040084f84c110ad0346c53 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 12:31:45 -0500 Subject: [PATCH 06/20] Small adjustments --- README.rst | 11 +++++++---- pydoctor/options.py | 37 ++++++++++++++++++++++++----------- pydoctor/test/test_options.py | 4 ++-- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/README.rst b/README.rst index d331f91d8..20c75cda7 100644 --- a/README.rst +++ b/README.rst @@ -84,13 +84,16 @@ This is the last major release to support Python 3.7. Highest priority callables will be called first during post-processing. * Fix too noisy ``--verbose`` mode (suppres some ambiguous annotations warnings). * Major improvements of the intersphinx integration: - - The ``--intersphinx`` option now supports the following format: ``[INVENTORY_NAME:]URL_OR_PATH[:BASE_URL]``. - Where ``INVENTORY_NAME`` is a an arbitrary name used to filter ``:external:`` references, - ``URL_OR_PATH`` is an URL pointing to a ``objects.inv`` file OR a file path pointing to a local ``.inv`` file, - ``BASE_URL`` is the base for the generated links, it is mandatory if loading the inventory from a file. - Pydoctor now supports linking to arbitrary intersphinx references with Sphinx role ``:external:``. - Other common Sphinx reference roles like ``:ref:``, ``:any:``, ``:class:``, ``py:*``, etc are now properly interpreted (instead of being simply stripping from the docstring). + - The ``--intersphinx`` option now supports the following format: ``[INVENTORY_NAME:]URL[:BASE_URL]``. + Where ``INVENTORY_NAME`` is a an arbitrary name used to filter ``:external:`` references, + ``URL`` is an URL pointing to a ``objects.inv`` file (it can also be the base URL, ``/objects.inv`` will be added to the URL in this case). + It is recommended to always include the HTTP scheme in the intersphinx URLs. + - The ``--intersphinx-file`` option has been added in order to load a local inventory file, this option + support the following format: ``[INVENTORY_NAME:]PATH:BASE_URL``. + ``BASE_URL`` is the base for the generated links, it is mandatory if loading the inventory from a file. pydoctor 23.9.1 ^^^^^^^^^^^^^^^ diff --git a/pydoctor/options.py b/pydoctor/options.py index b3edbc1a2..492701d9e 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -315,6 +315,17 @@ def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: url += 'objects.inv' return url, base_url +def _is_identifier_like(s:str) -> bool: + """ + Examples of identifier-like strings: + + identifier + identifier_thing + zope.interface + identifier-like + """ + return s.replace('-', '_').replace('.', '_').isidentifier() + # So these are the cases that we should handle: # --intersphinx=http://something.org/ # --intersphinx=something.org @@ -333,6 +344,7 @@ def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: # --intersphinx=pydoctor:inventories/pack.inv:http://something.org/ # --intersphinx=pydoctor:c:/data/inventories/pack.inv:http://something.org/ +_RE_DRIVE_LIKE = re.compile(r':[a-z]:(\\|\/)', re.IGNORECASE) def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: """ This replies on the fact the filename does not contain a colon. @@ -352,9 +364,9 @@ def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: # Still not a separator, a windows drive :c:/ pass elif len(parts) == 3: - raise ValueError(f'Malformed {option} option, too many parts, beware that colons in filenames are not supported') + raise ValueError(f'Malformed {option} option {s!r}: too many parts, beware that colons in filenames are not supported') elif not parts[part_nb]: - raise ValueError(f'Malformed {option} option, two consecutive colons is not valid') + raise ValueError(f'Malformed {option} option {s!r}: two consecutive colons is not valid') else: parts.append('') part_nb += 1 @@ -362,8 +374,6 @@ def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: parts[part_nb] += c return parts -_RE_DRIVE_LIKE = re.compile(r':[a-z]:(\\|\/)', re.IGNORECASE) - IntersphinxOption: TypeAlias = Tuple[Optional[str], str, str] def _parse_intersphinx_file(s:str) -> IntersphinxOption: @@ -382,7 +392,8 @@ def _parse_intersphinx_file(s:str) -> IntersphinxOption: nb_parts = len(parts) if nb_parts == 1: - error('Invalid --intesphinx-file option: Missing base URL') + error(f'Malformed --intesphinx-file option {s!r}: Missing base URL') + elif nb_parts == 2: path, base_url = parts _, base_url = _object_inv_url_and_base_url(base_url) @@ -395,8 +406,8 @@ def _parse_intersphinx_file(s:str) -> IntersphinxOption: else: assert False - if invname and not invname.replace('-', '_').isidentifier(): - error('Invalid --intersphinx option: The inventory name must be an indentifier-like name') + if invname and not _is_identifier_like(invname): + error(f'Malformed --intersphinx option {s!r}: The inventory name must be an indentifier-like name') return invname, path, base_url @@ -417,8 +428,7 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: nb_parts = len(parts) if nb_parts == 1: - # Just URL_TO_OBJECTS.INV, it cannot be a filepath because a filepath must be - # followed by a base URL. + # Just URL url, base_url = _object_inv_url_and_base_url(*parts) invname = None @@ -431,6 +441,11 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: invname, url, base_url = None, p1, p2 _, base_url = _object_inv_url_and_base_url(base_url) url, _ = _object_inv_url_and_base_url(url) + + elif not p2.startswith(('http://', 'https://')): + # This is ambiguous, so raise an error. + error(f'Ambiguous --intersphinx option {s!r}: Please include the HTTP scheme on all URLs') + else: # At this point we have: INVENTORY_NAME:URL invname, url = p1, p2 @@ -445,8 +460,8 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: else: assert False - if invname and not invname.replace('-', '_').isidentifier(): - error('Invalid --intersphinx option: The inventory name must be an indentifier-like name') + if invname and not _is_identifier_like(invname): + error(f'Malformed --intersphinx option {s!r}: The inventory name must be an indentifier-like name') return invname, url, base_url diff --git a/pydoctor/test/test_options.py b/pydoctor/test/test_options.py index ad921518d..667a03cc4 100644 --- a/pydoctor/test/test_options.py +++ b/pydoctor/test/test_options.py @@ -294,9 +294,9 @@ def test_intersphinx_split_on_colon() -> None: assert _split_intersphinx_parts('pydoctor:inventories/pack.inv:http://something.org/')==['pydoctor', 'inventories/pack.inv', 'http://something.org/'] assert _split_intersphinx_parts('pydoctor:c:/data/inventories/pack.inv:http://something.org/')==['pydoctor', 'c:/data/inventories/pack.inv', 'http://something.org/'] - with pytest.raises(ValueError, match='Malformed --intersphinx option, two consecutive colons is not valid'): + with pytest.raises(ValueError, match='Malformed --intersphinx option \'pydoctor::\': two consecutive colons is not valid'): _split_intersphinx_parts('pydoctor::') - with pytest.raises(ValueError, match='Malformed --intersphinx option, too many parts'): + with pytest.raises(ValueError, match='Malformed --intersphinx option \'pydoctor:a:b:c:d\': too many parts'): _split_intersphinx_parts('pydoctor:a:b:c:d') def test_intersphinx_base_url_deductions() -> None: From 8e3244dc9be11eb1616bdb3347426ab234a63d3f Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 13:30:44 -0500 Subject: [PATCH 07/20] Use an implicit narrative documentation link from epytext docstring in our own docstring. --- pydoctor/astbuilder.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 218bb5da9..96a7ff9c1 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -439,17 +439,10 @@ def _importNames(self, modname: str, names: Iterable[ast.alias]) -> None: _localNameToFullName[asname] = f'{modname}.{orgname}' def visit_Import(self, node: ast.Import) -> None: - """Process an import statement. - - The grammar for the statement is roughly: - - mod_as := DOTTEDNAME ['as' NAME] - import_stmt := 'import' mod_as (',' mod_as)* + """ + Process an import statement. - and this is translated into a node which is an instance of Import wih - an attribute 'names', which is in turn a list of 2-tuples - (dotted_name, as_name) where as_name is None if there was no 'as foo' - part of the statement. + See L{import}. """ if not isinstance(self.builder.current, model.CanContainImportsDocumentable): # processing import statement in odd context From 13a9314bc68000172ffbb31b3eced360d3fd0d8f Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 16:17:32 -0500 Subject: [PATCH 08/20] Relax the epytext syntax over link targets: Now a target can be anything. --- pydoctor/epydoc/markup/epytext.py | 10 +++++----- pydoctor/linker.py | 15 +++++++++++++++ pydoctor/test/epydoc/test_epytext.py | 14 ++++++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/pydoctor/epydoc/markup/epytext.py b/pydoctor/epydoc/markup/epytext.py index 0026614d2..111542b0a 100644 --- a/pydoctor/epydoc/markup/epytext.py +++ b/pydoctor/epydoc/markup/epytext.py @@ -1181,8 +1181,10 @@ def _colorize_link(link: Element, token: Token, end: int, errors: List[ParseErro # Clean up the target. For URIs, assume http or mailto if they # don't specify (no relative urls) - target = re.sub(r'\s', '', target) + # we used to stip spaces from the target here but that's no good + # since intersphinx targets can contain spaces. if link.tag=='uri': + target = re.sub(r'\s', '', target) if not re.match(r'\w+:', target): if re.match(r'\w+@(\w+)(\.\w+)*', target): target = 'mailto:' + target @@ -1191,10 +1193,8 @@ def _colorize_link(link: Element, token: Token, end: int, errors: List[ParseErro elif link.tag=='link': # Remove arg lists for functions (e.g., L{_colorize_link()}) target = re.sub(r'\(.*\)$', '', target) - if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): - estr = "Bad link target." - errors.append(ColorizingError(estr, token, end)) - return + # We used to validate the link target against the following regex: r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$' + # but we don;t do that anymore since the intersphinx taget names can contain any kind of text. # Construct the target element. target_elt = Element('target', target, lineno=str(token.startline)) diff --git a/pydoctor/linker.py b/pydoctor/linker.py index 595077a31..cb3b96c44 100644 --- a/pydoctor/linker.py +++ b/pydoctor/linker.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +import re import contextlib from twisted.web.template import Tag, tags from typing import ( @@ -54,6 +55,9 @@ def intersphinx_link(label:"Flattenable", url:str) -> Tag: """ return tags.a(label, href=url, class_='intersphinx-link') + +_CONTAINS_SPACE_RE = re.compile(r'.*\s.*') +_SPACE_RE = re.compile(r'\s') class _EpydocLinker(DocstringLinker): """ This linker implements the xref lookup logic. @@ -273,6 +277,17 @@ def _resolve_identifier_xref(self, if intersphinx_target_url_unfiltered: message += f' (your link role filters {intersphinx_target_url_unfiltered!r}, is it by design?)' + # To cope with the fact that we're not striping spaces from epytext parsed target anymore, + # some target that span over multiple lines will be misinterpreted with having a space + # So we check if the taget has spaces, and if it does we try again without the spaces. + if _CONTAINS_SPACE_RE.match(identifier): + try: + return self._resolve_identifier_xref(_SPACE_RE.sub('', identifier), + lineno, invname=invname, domain=domain, + reftype=reftype, external=external) + except LookupError: + pass + if self.reporting_obj: self.reporting_obj.report(message, 'resolve_identifier_xref', lineno) raise LookupError(identifier) diff --git a/pydoctor/test/epydoc/test_epytext.py b/pydoctor/test/epydoc/test_epytext.py index c3a1ef452..fa4485a10 100644 --- a/pydoctor/test/epydoc/test_epytext.py +++ b/pydoctor/test/epydoc/test_epytext.py @@ -21,6 +21,20 @@ def parse(s: str) -> str: # this strips off the ... return ''.join(str(n) for n in element.children) +def test_links() -> None: + L1 = 'L{link}' + L2 = 'L{something.link}' + L3 = 'L{any kind of text since intersphinx name can contain spaces}' + L4 = 'L{looks-like-identifier}' + L5 = 'L{this stuff }' + L6 = 'L{this stuff }' + + assert parse(L1) == "linklink" + assert parse(L2) == "something.linksomething.link" + assert parse(L3) == "any kind of text since intersphinx name can contain spacesany kind of text since intersphinx name can contain spaces" + assert parse(L4) == "looks-like-identifierlooks-like-identifier" + assert parse(L5) == "this stuffany kind of text" + assert parse(L6) == "this stufflooks-like-identifier" def test_basic_list() -> None: P1 = "This is a paragraph." From 4af7de804dc158d4e9ebf518313e6e2fce320aab Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 16:29:34 -0500 Subject: [PATCH 09/20] Add a test for the no-space lookup --- pydoctor/test/test_epydoc2stan.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pydoctor/test/test_epydoc2stan.py b/pydoctor/test/test_epydoc2stan.py index 24b1bedc2..fc08005cc 100644 --- a/pydoctor/test/test_epydoc2stan.py +++ b/pydoctor/test/test_epydoc2stan.py @@ -1080,6 +1080,29 @@ def test_EpydocLinker_look_for_intersphinx_no_link() -> None: assert None is result +def test_EpydocLinker_look_for_intersphinx_with_spaces() -> None: + """ + Return the link from inventory based on first package name. + """ + system = model.System() + inventory = SphinxInventory(system.msg) + inventory._links['base.module.other'] = [ + InventoryObject( + invname='xxx', + name='base.module.other', + base_url='http://tm.tld', + location='some.html', + domain='py', + reftype='mod', + display='-')] + system.intersphinx = inventory + target = model.Module(system, 'ignore-name') + sut = target.docstring_linker + assert isinstance(sut, linker._EpydocLinker) + + result = sut.look_for_intersphinx('base .module .other') + + assert 'http://tm.tld/some.html' == result def test_EpydocLinker_look_for_intersphinx_hit() -> None: """ From c2b2339da2ee413ab742a8a7b3bf21619de44e05 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 16:29:42 -0500 Subject: [PATCH 10/20] Fix docstring --- pydoctor/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/options.py b/pydoctor/options.py index 492701d9e..e40109694 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -317,7 +317,7 @@ def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: def _is_identifier_like(s:str) -> bool: """ - Examples of identifier-like strings: + Examples of identifier-like strings:: identifier identifier_thing From 8bc7c186c4daef9fd216803b8ccb75931284f8f2 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 16:31:31 -0500 Subject: [PATCH 11/20] Select the first matching item when we're not dealing with the 'py' domain. --- pydoctor/sphinx.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index 02d99cb9c..7f28a3fbf 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -258,12 +258,13 @@ def _filter(inv: InventoryObject) -> bool: py_options = list(filter(_filter, options)) if py_options: # TODO: We might want to leave a message in the case where several objects - # are still in consideration. But for now, we just pick the last one. + # are still in consideration. But for now, we just pick the last one because our old version + # of the inventory (that only dealing with the 'py' domain) would override names as they were parsed. return py_options[-1] - # Last, we fall back to the last matching object because our old version - # of the inventory would override names as they were parsed. - return options[-1] + # If it hasn't been found in the 'py' domain, then we use the first mathing object because it makes + # more sens in the case of the `std` domain of the standard library. + return options[0] def getLink(self, target: str, invname:Optional[str]=None, From 92c98d85e072f0e3c2c46e89608200b2b8d5c124 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 22:06:35 -0500 Subject: [PATCH 12/20] Small improvments --- pydoctor/epydoc/markup/restructuredtext.py | 172 ++++++++------------- pydoctor/linker.py | 41 +++-- pydoctor/options.py | 13 +- pydoctor/sphinx.py | 140 +++++++++++++++-- pydoctor/test/test_epydoc2stan.py | 5 +- 5 files changed, 226 insertions(+), 145 deletions(-) diff --git a/pydoctor/epydoc/markup/restructuredtext.py b/pydoctor/epydoc/markup/restructuredtext.py index baee389e1..e550cecea 100644 --- a/pydoctor/epydoc/markup/restructuredtext.py +++ b/pydoctor/epydoc/markup/restructuredtext.py @@ -62,7 +62,9 @@ from pydoctor.epydoc.markup.plaintext import ParsedPlaintextDocstring from pydoctor.epydoc.docutils import new_document, set_node_attributes from pydoctor.model import Documentable -from pydoctor.sphinx import parse_domain_reftype +from pydoctor.sphinx import (ALL_SUPPORTED_ROLES, SUPPORTED_DEFAULT_REFTYPES, + SUPPORTED_DOMAINS, SUPPORTED_EXTERNAL_DOMAINS, + SUPPORTED_EXTERNAL_STD_REFTYPES, parse_domain_reftype) #: A dictionary whose keys are the "consolidated fields" that are #: recognized by epydoc; and whose values are the corresponding epydoc @@ -99,7 +101,7 @@ def parse_docstring(docstring: str, """ writer = _DocumentPseudoWriter() reader = _EpydocReader(errors) # Outputs errors to the list. - with patch_docutils_role_function(): + with patch_docutils_role_function(errors): publish_string(docstring, writer=writer, reader=reader, settings_overrides={'report_level':10000, 'halt_level':10000, @@ -529,115 +531,66 @@ def parse_external(name: str) -> Tuple[Optional[str], Optional[str]]: raise ValueError(msg) return None, None -# roles._RoleFn -def link_role(role: str, rawtext: str, text: str, lineno: int, - inliner: docutils.parsers.rst.states.Inliner, - options:Any=None, content:Any=None) -> 'tuple[list[nodes.Node], list[nodes.Node]]': +class LinkRole: + def __init__(self, errors: List[ParseError]) -> None: + self.errors = errors - # See https://www.sphinx-doc.org/en/master/usage/referencing.htm - # and https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html - invname: Optional[str] = None - domain: Optional[str] = None - reftype: Optional[str] = None - external: bool = False - if role.startswith('external'): - try: - invname, suffix = parse_external(role) - if suffix is not None: - domain, reftype = parse_domain_reftype(suffix) - except ValueError as e: - print(f'{lineno}: {e}') - return [], [] # TODO: report the error - else: - external = True - elif role: - try: - domain, reftype = parse_domain_reftype(role) - except ValueError as e: - print(f'{lineno}: {e}') + # roles._RoleFn + def __call__(self, role: str, rawtext: str, text: str, lineno: int, + inliner: docutils.parsers.rst.states.Inliner, + options:Any=None, content:Any=None) -> 'tuple[list[nodes.Node], list[nodes.Node]]': + + # See https://www.sphinx-doc.org/en/master/usage/referencing.html + # and https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html + invname: Optional[str] = None + domain: Optional[str] = None + reftype: Optional[str] = None + external: bool = False + if role.startswith('external'): + try: + invname, suffix = parse_external(role) + if suffix is not None: + domain, reftype = parse_domain_reftype(suffix) + except ValueError as e: + self.errors.append(ParseError(str(e), lineno, is_fatal=False)) + return [], [] + else: + external = True + elif role: + try: + domain, reftype = parse_domain_reftype(role) + except ValueError as e: + self.errors.append(ParseError(str(e), lineno, is_fatal=False)) + return [], [] + + if reftype in SUPPORTED_DOMAINS and domain is None: + self.errors.append(ParseError('Malformed role name, domain is missing reference type', + lineno, is_fatal=False)) return [], [] - - if reftype in SUPPORTED_DOMAINS and domain is None: - print(f'{lineno}: Malformed role name, domain is missing reference type') - return [], [] - if reftype in SUPPORTED_DEFAULT_REFTYPES: - reftype = None - - if reftype in SUPPORTED_EXTERNAL_STD_REFTYPES and domain is None: - external = True - domain = 'std' - - if domain in SUPPORTED_EXTERNAL_DOMAINS: - external = True - - text_node = nodes.Text(text) - node = nodes.title_reference(rawtext, '', - invname=invname, - domain=domain, - reftype=reftype, - external=external, - lineno=lineno) - set_node_attributes(node, children=[text_node], document=inliner.document) # type: ignore - return [node], [] - -SUPPORTED_LOCAL_DOMAINS = set(( - # When using a domain specification, one must also give the reftype. - # links like :py:`something` will trigger an error. - # python domain references - 'py', -)) - -SUPPORTED_EXTERNAL_DOMAINS = set(( - # domain of other languages, complement this list as necessary - 'c', 'cpp', 'js', 'rust', - 'erl', 'php', 'rb', 'go', - # the standard domain - 'std', -)) - -SUPPORTED_DOMAINS = SUPPORTED_LOCAL_DOMAINS | SUPPORTED_EXTERNAL_DOMAINS - -SUPPORTED_PY_REFTYPES = set(( - # Specific objects types in the 'py' domains. - 'mod', 'module', - 'func', 'function', - 'meth', 'method', - 'data', - 'const', 'constant', - 'class', 'cls', - 'attr', 'attrib', 'attribute', - 'exc', 'exception', - # py:obj doesn't exists in cpython sphinx docs, - # so it's not listed here: it's listed down there. -)) - -SUPPORTED_EXTERNAL_STD_REFTYPES = set(( - # Narrative documentation refs and other standard domain - # present in cpython documentation. These roles are always - # implicitely external. Stuff not explicitely listed here - # might still be linked to with an :external: role. - # These reftypes also implicitely belong to the 'std' domain. - 'doc', 'cmdoption', 'option', 'envvar', - 'label', 'opcode', 'term', 'token' -)) - -SUPPORTED_DEFAULT_REFTYPES = set(( - # equivalent to None. - 'ref', 'any', 'obj', 'object', -)) - -ALL_SUPPORTED_ROLES = set(( - # external references - 'external', - *SUPPORTED_DOMAINS, - *SUPPORTED_PY_REFTYPES, - *SUPPORTED_EXTERNAL_STD_REFTYPES, - *SUPPORTED_DEFAULT_REFTYPES - )) + if reftype in SUPPORTED_DEFAULT_REFTYPES: + reftype = None + + if reftype in SUPPORTED_EXTERNAL_STD_REFTYPES and domain is None: + external = True + domain = 'std' + + if domain in SUPPORTED_EXTERNAL_DOMAINS: + external = True + + text_node = nodes.Text(text) + node = nodes.title_reference(rawtext, '', + invname=invname, + domain=domain, + reftype=reftype, + external=external, + lineno=lineno) + + set_node_attributes(node, children=[text_node], document=inliner.document) # type: ignore + return [node], [] @contextmanager -def patch_docutils_role_function() -> Iterator[None]: +def patch_docutils_role_function(errors:List[ParseError]) -> Iterator[None]: r""" Like sphinx, we are patching the L{docutils.parsers.rst.roles.role} function. This function is a factory for role handlers functions. In order to handle any kind @@ -658,7 +611,8 @@ def new_role(role_name: str, language_module: ModuleType, if role_name in ALL_SUPPORTED_ROLES or any( role_name.startswith(f'{n}:') for n in ALL_SUPPORTED_ROLES) or \ role_name.startswith('external+'): # 'external+' is a special case - return link_role, [] + return LinkRole(errors), [] + return old_role(role_name, language_module, lineno, reporter) # type: ignore roles.role = new_role @@ -666,7 +620,9 @@ def new_role(role_name: str, language_module: ModuleType, roles.role = old_role # https://docutils.sourceforge.io/docs/ref/rst/directives.html#default-role -roles.register_local_role('default-role', link_role) +# there is no possible code path that triggers messages from the default role, +# so that's ok to use an anonymous list here +roles.register_local_role('default-role', LinkRole([])) directives.register_directive('python', PythonCodeDirective) directives.register_directive('code', DocutilsAndSphinxCodeBlockAdapter) diff --git a/pydoctor/linker.py b/pydoctor/linker.py index cb3b96c44..4a5590604 100644 --- a/pydoctor/linker.py +++ b/pydoctor/linker.py @@ -12,6 +12,7 @@ ) from pydoctor.epydoc.markup import DocstringLinker +from pydoctor.sphinx import SUPPORTED_PY_DOMAINS, SUPPORTED_EXTERNAL_STD_REFTYPES if TYPE_CHECKING: from twisted.web.template import Flattenable @@ -130,15 +131,25 @@ def look_for_name(self, def look_for_intersphinx(self, name: str, *, invname: Optional[str] = None, domain: Optional[str] = None, - reftype: Optional[str] = None) -> Optional[str]: + reftype: Optional[str] = None, + lineno: Optional[int] = None) -> Optional[str]: """ Return link for `name` based on intersphinx inventory. Return None if link is not found. """ - return self.obj.system.intersphinx.getLink(name, + try: + return self.obj.system.intersphinx.getLink(name, invname=invname, domain=domain, - reftype=reftype) + reftype=reftype, strict=True) + + except ValueError as e: + link = self.obj.system.intersphinx.getLink(name, + invname=invname, domain=domain, + reftype=reftype) + if lineno is not None: + self.reporting_obj.report(str(e), 'resolve_identifier_xref', lineno, thresh=1) + return link def link_to(self, identifier: str, label: "Flattenable") -> Tag: fullID = self.obj.expandName(identifier) @@ -200,8 +211,12 @@ def _resolve_identifier_xref(self, an external target (found via Intersphinx). @raise LookupError: If C{identifier} could not be resolved. """ - if invname: assert external - might_be_local_python_ref = not external and domain in ('py', None) and reftype not in ('doc', ) + if invname: + assert external + + # Wether to try to resolve the target as a local python object + might_be_local_python_ref = (not external + and domain in (*SUPPORTED_PY_DOMAINS, None)) # There is a lot of DWIM here. Look for a global match first, # to reduce the chance of a false positive. @@ -214,17 +229,25 @@ def _resolve_identifier_xref(self, # Check if the fullID exists in an intersphinx inventory. fullID = self.obj.expandName(identifier) - target_url = self.look_for_intersphinx(fullID, invname=invname, - domain=domain, reftype=reftype) + target_url = self.look_for_intersphinx(fullID, + invname=invname, + domain=domain, + reftype=reftype, + # passing lineno here enabled the reporting of the ambiguous intersphinx ref + lineno=lineno) intersphinx_target_url_unfiltered = self.look_for_intersphinx(fullID) if not target_url: # FIXME: https://github.com/twisted/pydoctor/issues/125 # expandName is unreliable so in the case fullID fails, we # try our luck with 'identifier'. - target_url = self.look_for_intersphinx(identifier, invname=invname, - domain=domain, reftype=reftype) + target_url = self.look_for_intersphinx(identifier, + invname=invname, + domain=domain, + reftype=reftype, + lineno=lineno) if not intersphinx_target_url_unfiltered: intersphinx_target_url_unfiltered = self.look_for_intersphinx(identifier) + if target_url: return target_url diff --git a/pydoctor/options.py b/pydoctor/options.py index e40109694..4f8b8962e 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -331,18 +331,13 @@ def _is_identifier_like(s:str) -> bool: # --intersphinx=something.org # --intersphinx=http://something.org/objects.inv # --intersphinx=http://cnd.abc.something.org/objects.inv:http://something.org/ -# --intersphinx=inventories/pack.inv:http://something.org/ - -# --intersphinx=file.inv:http://something.org/ -# ok so this one and the next one are hard to differenciate... -# we have to require the file to have the extension '.inv' but what about a project -# called 'project.inv' ? So for these cases we should check if the file exists or not. - # --intersphinx=pydoctor:http://something.org/ # --intersphinx=pydoctor:http://something.org/objects.inv # --intersphinx=pydoctor:http://cnd.abc.something.org/objects.inv:http://something.org/ -# --intersphinx=pydoctor:inventories/pack.inv:http://something.org/ -# --intersphinx=pydoctor:c:/data/inventories/pack.inv:http://something.org/ +# --intersphinx-file=inventories/pack.inv:http://something.org/ +# --intersphinx-file=file.inv:http://something.org/ +# --intersphinx-file=pydoctor:inventories/pack.inv:http://something.org/ +# --intersphinx-file=pydoctor:c:/data/inventories/pack.inv:http://something.org/ _RE_DRIVE_LIKE = re.compile(r':[a-z]:(\\|\/)', re.IGNORECASE) def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index 7f28a3fbf..9eb1ca49c 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -4,6 +4,7 @@ from __future__ import annotations from collections import defaultdict import enum +from itertools import product import logging import os @@ -13,7 +14,7 @@ import zlib from typing import ( TYPE_CHECKING, Callable, ContextManager, Dict, IO, Iterable, List, Mapping, - Optional, Set, Tuple + Optional, Sequence, Set, Tuple ) import appdirs @@ -37,6 +38,70 @@ def close(self) -> None: ... logger = logging.getLogger(__name__) + +SUPPORTED_PY_DOMAINS = set(( + # When using a domain specification, one must also give the reftype. + # links like :py:`something` will trigger an error. + # python domain references + 'py', +)) + +SUPPORTED_EXTERNAL_DOMAINS = set(( + # domain of other languages, complement this list as necessary + 'c', 'cpp', 'js', 'rust', + 'erl', 'php', 'rb', 'go', + # the standard domain + 'std', +)) + +SUPPORTED_DOMAINS = SUPPORTED_PY_DOMAINS | SUPPORTED_EXTERNAL_DOMAINS + +SUPPORTED_PY_REFTYPES = set(( + # Specific objects types in the 'py' domains. + 'mod', 'module', + 'func', 'function', + 'meth', 'method', + 'data', + 'const', 'constant', + 'class', 'cls', + 'attr', 'attrib', 'attribute', + 'exc', 'exception', + # py:obj doesn't exists in cpython sphinx docs, + # so it's not listed here: it's listed down there. +)) + +SUPPORTED_EXTERNAL_STD_REFTYPES = set(( + # Narrative documentation refs and other standard domain + # present in cpython documentation. These roles are always + # implicitely external. Stuff not explicitely listed here + # might still be linked to with an :external: role. + # These reftypes also implicitely belong to the 'std' domain. + 'doc', 'cmdoption', 'option', 'envvar', + 'label', 'opcode', 'term', 'token' +)) + +SUPPORTED_DEFAULT_REFTYPES = set(( + # equivalent to None. + 'ref', 'any', 'obj', 'object', +)) + +for i,j in product(*[(SUPPORTED_PY_DOMAINS, + SUPPORTED_EXTERNAL_DOMAINS, + SUPPORTED_PY_REFTYPES, + SUPPORTED_EXTERNAL_STD_REFTYPES, + SUPPORTED_DEFAULT_REFTYPES),]*2): + if i!=j: + assert not i.intersection(j) + +ALL_SUPPORTED_ROLES = set(( + # external references + 'external', + *SUPPORTED_DOMAINS, + *SUPPORTED_PY_REFTYPES, + *SUPPORTED_EXTERNAL_STD_REFTYPES, + *SUPPORTED_DEFAULT_REFTYPES + )) + def parse_domain_reftype(name: str) -> Tuple[Optional[str], str]: """ Given a string like C{class} or C{py:class} or C{rst:directive:option}, @@ -96,9 +161,6 @@ def __init__(self, logger: Callable[..., None],): def error(self, where: str, message: str) -> None: self._logger(where, message, thresh=-1) - def message(self, where: str, message: str) -> None: - self._logger(where, message, thresh=1) - def _add_inventory(self, invname:str|None, url_or_path: str) -> str|None: inventory_name = invname or str(hash(url_or_path)) if inventory_name in self._inventories: @@ -219,10 +281,40 @@ def _parseInventory( return result + def _raise_ambiguous_ref(self, target: str, + invname:Optional[str], + domain:Optional[str], + reftype:Optional[str], + options: Sequence[InventoryObject]): + + # Build the taget string + target_str = target + if invname or domain or reftype: + parts = [] + for name, value in zip(['invname', 'domain', 'reftype'], + [invname, domain, reftype]): + if value: + parts.append(f'{name}={value}') + target_str += f' ({", ".join(parts)})' + + missing_filters = list(filter(None, [invname, domain, reftype])) + options_urls = [self._getLinkFromObj(o) for o in options] + + if not missing_filters: + # there is a problem with this inventory... + msg = f'complete intersphinx ref is still ambiguous {target_str}, could be {",".join(options_urls)}' + msg += ', this is likely an issue with the inventory' + else: + msg = f'ambiguous intersphinx ref to {target_str}, could be {",".join(options_urls)}' + msg += f', try adding one of {", ".join(missing_filters)} filter to your RST role' + raise ValueError(msg) + def getInv(self, target: str, invname:Optional[str]=None, domain:Optional[str]=None, - reftype:Optional[str]=None) -> Optional[InventoryObject]: + reftype:Optional[str]=None, + *, + strict:bool=False) -> Optional[InventoryObject]: """ Get the inventory object instance matching the criteria. """ @@ -245,8 +337,10 @@ def _filter(inv: InventoryObject) -> bool: options = list(filter(_filter, options)) if len(options) == 1: + # Exact match return options[0] elif not options: + # No match return None # We still have several options under consideration... @@ -257,34 +351,46 @@ def _filter(inv: InventoryObject) -> bool: domain = 'py' py_options = list(filter(_filter, options)) if py_options: - # TODO: We might want to leave a message in the case where several objects + if len(py_options)>1 and strict: + self._raise_ambiguous_ref(target, invname, domain, reftype, options=options) + # are still in consideration. But for now, we just pick the last one because our old version # of the inventory (that only dealing with the 'py' domain) would override names as they were parsed. return py_options[-1] # If it hasn't been found in the 'py' domain, then we use the first mathing object because it makes # more sens in the case of the `std` domain of the standard library. + if strict: + self._raise_ambiguous_ref(target, invname, domain, reftype, options=options) return options[0] + + def _getLinkFromObj(self, inv: InventoryObject) -> str: + base_url = inv.base_url + relative_link = inv.location + + # For links ending with $, replace it with full name. + if relative_link.endswith('$'): + relative_link = relative_link[:-1] + inv.name + + return f'{base_url}/{relative_link}' def getLink(self, target: str, invname:Optional[str]=None, domain:Optional[str]=None, - reftype:Optional[str]=None) -> Optional[str]: + reftype:Optional[str]=None, + *, + strict:bool=False) -> Optional[str]: """ Return link for ``target`` or None if no link is found. """ - invobj = self.getInv(target, invname, domain, reftype) + invobj = self.getInv(target, + invname, + domain, + reftype, + strict=strict) if not invobj: return None - - base_url = invobj.base_url - relative_link = invobj.location - - # For links ending with $, replace it with full name. - if relative_link.endswith('$'): - relative_link = relative_link[:-1] + target - - return f'{base_url}/{relative_link}' + return self._getLinkFromObj(invobj) def _parseInventoryLine(line: str) -> Tuple[str, str, int, str, str]: diff --git a/pydoctor/test/test_epydoc2stan.py b/pydoctor/test/test_epydoc2stan.py index fc08005cc..79ab56d0c 100644 --- a/pydoctor/test/test_epydoc2stan.py +++ b/pydoctor/test/test_epydoc2stan.py @@ -1100,9 +1100,10 @@ def test_EpydocLinker_look_for_intersphinx_with_spaces() -> None: sut = target.docstring_linker assert isinstance(sut, linker._EpydocLinker) - result = sut.look_for_intersphinx('base .module .other') + result = sut.link_xref('base .module .other', 'thing', 0) + assert 'http://tm.tld/some.html' in flatten(result) - assert 'http://tm.tld/some.html' == result +# TODO: Test filtering look_for_intersphinx(name, invname, domain, reftype) def test_EpydocLinker_look_for_intersphinx_hit() -> None: """ From 1dad5f6f7b783a743f0f4dbe4cfaf0c031658f44 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 1 Mar 2024 22:17:51 -0500 Subject: [PATCH 13/20] Fix TypeAlias import --- pydoctor/options.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pydoctor/options.py b/pydoctor/options.py index 4f8b8962e..13d9609ce 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -5,7 +5,7 @@ import enum import re -from typing import Sequence, List, Optional, Type, Tuple, TYPE_CHECKING, TypeAlias +from typing import Sequence, List, Optional, Type, Tuple, TYPE_CHECKING import sys import functools from pathlib import Path @@ -22,6 +22,7 @@ from pydoctor._configparser import CompositeConfigParser, IniConfigParser, TomlConfigParser, ValidatorParser if TYPE_CHECKING: + from typing import TypeAlias from pydoctor import model from pydoctor.templatewriter import IWriter @@ -369,7 +370,7 @@ def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: parts[part_nb] += c return parts -IntersphinxOption: TypeAlias = Tuple[Optional[str], str, str] +IntersphinxOption: 'TypeAlias' = Tuple[Optional[str], str, str] def _parse_intersphinx_file(s:str) -> IntersphinxOption: """ From a30353ed34b080d9c2800b93d73c168e6a2f3315 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 2 Mar 2024 09:55:09 -0500 Subject: [PATCH 14/20] Fix duplicated warnings issue --- pydoctor/linker.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pydoctor/linker.py b/pydoctor/linker.py index 4a5590604..7849bcb04 100644 --- a/pydoctor/linker.py +++ b/pydoctor/linker.py @@ -192,7 +192,8 @@ def _resolve_identifier_xref(self, invname: Optional[str] = None, domain: Optional[str] = None, reftype: Optional[str] = None, - external: bool = False + external: bool = False, + no_warnings: bool = False, ) -> Union[str, 'model.Documentable']: """ Resolve a crossreference link to a Python identifier. @@ -307,12 +308,13 @@ def _resolve_identifier_xref(self, try: return self._resolve_identifier_xref(_SPACE_RE.sub('', identifier), lineno, invname=invname, domain=domain, - reftype=reftype, external=external) + reftype=reftype, external=external, no_warnings=True) except LookupError: pass - if self.reporting_obj: + if self.reporting_obj and no_warnings is False: self.reporting_obj.report(message, 'resolve_identifier_xref', lineno) + raise LookupError(identifier) From e3bcda6d2b3b9a0658368e66604f9e9b96334f78 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 2 Mar 2024 10:05:50 -0500 Subject: [PATCH 15/20] Fix test --- pydoctor/test/test_commandline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydoctor/test/test_commandline.py b/pydoctor/test/test_commandline.py index 7634b2138..d41949883 100644 --- a/pydoctor/test/test_commandline.py +++ b/pydoctor/test/test_commandline.py @@ -186,7 +186,7 @@ def test_main_return_zero_on_warnings() -> None: assert exit_code == 0 assert "__init__.py:8: Unknown field 'bad_field'" in stream.getvalue() - assert 'report_module.py:9: Cannot find link target for "BadLink"' in stream.getvalue() + assert 'report_module.py:9: Cannot find link target for "Bad Link"' in stream.getvalue() def test_main_return_non_zero_on_warnings() -> None: @@ -203,7 +203,7 @@ def test_main_return_non_zero_on_warnings() -> None: assert exit_code == 3 assert "__init__.py:8: Unknown field 'bad_field'" in stream.getvalue() - assert 'report_module.py:9: Cannot find link target for "BadLink"' in stream.getvalue() + assert 'report_module.py:9: Cannot find link target for "Bad Link"' in stream.getvalue() def test_main_symlinked_paths(tmp_path: Path) -> None: From 3da2cd34098c3380934309897be7151541f07b19 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 2 Mar 2024 11:24:10 -0500 Subject: [PATCH 16/20] Fix mypy --- pydoctor/linker.py | 2 +- pydoctor/sphinx.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pydoctor/linker.py b/pydoctor/linker.py index 7849bcb04..755bee59b 100644 --- a/pydoctor/linker.py +++ b/pydoctor/linker.py @@ -147,7 +147,7 @@ def look_for_intersphinx(self, name: str, *, link = self.obj.system.intersphinx.getLink(name, invname=invname, domain=domain, reftype=reftype) - if lineno is not None: + if self.reporting_obj is not None and lineno is not None: self.reporting_obj.report(str(e), 'resolve_identifier_xref', lineno, thresh=1) return link diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index 9eb1ca49c..74b89204a 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -285,7 +285,7 @@ def _raise_ambiguous_ref(self, target: str, invname:Optional[str], domain:Optional[str], reftype:Optional[str], - options: Sequence[InventoryObject]): + options: Sequence[InventoryObject]) -> None: # Build the taget string target_str = target From 3f8dbd8df004c1b65e78bad9ff1d979d49919e5d Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 2 Mar 2024 14:01:28 -0500 Subject: [PATCH 17/20] Relac again epytext parsing to take care of it in node2stan --- pydoctor/epydoc/markup/epytext.py | 8 ++++---- pydoctor/node2stan.py | 16 +++++++++++----- pydoctor/test/epydoc/test_epytext2node.py | 2 +- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pydoctor/epydoc/markup/epytext.py b/pydoctor/epydoc/markup/epytext.py index 111542b0a..3a7497dee 100644 --- a/pydoctor/epydoc/markup/epytext.py +++ b/pydoctor/epydoc/markup/epytext.py @@ -1191,10 +1191,10 @@ def _colorize_link(link: Element, token: Token, end: int, errors: List[ParseErro else: target = 'http://'+target elif link.tag=='link': - # Remove arg lists for functions (e.g., L{_colorize_link()}) - target = re.sub(r'\(.*\)$', '', target) - # We used to validate the link target against the following regex: r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$' - # but we don;t do that anymore since the intersphinx taget names can contain any kind of text. + # Here we used to process the target in order to remove arg lists for functions + # and validate it. But now this happens in node2stan.parse_reference(). + # The target is not validated anymore since the intersphinx taget names can contain any kind of text. + pass # Construct the target element. target_elt = Element('target', target, lineno=str(token.startline)) diff --git a/pydoctor/node2stan.py b/pydoctor/node2stan.py index 9bb6bc85d..4457c8a4c 100644 --- a/pydoctor/node2stan.py +++ b/pydoctor/node2stan.py @@ -58,6 +58,8 @@ def gettext(node: Union[nodes.Node, List[nodes.Node]]) -> List[str]: _TARGET_RE = re.compile(r'^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') _VALID_IDENTIFIER_RE = re.compile('[^0-9a-zA-Z_]') +_CALLABLE_RE = re.compile(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$') +_CALLABLE_ARGS_RE = re.compile(r'\(.*\)$') @attr.s(auto_attribs=True) class Reference: @@ -68,7 +70,6 @@ class Reference: reftype: Optional[str] = None external: bool = False - def parse_reference(node:nodes.Node) -> Reference: """ Split a reference into (label, target). @@ -85,10 +86,15 @@ def parse_reference(node:nodes.Node) -> Reference: else: label = target = node.astext() # Support linking to functions and methods with parameters - if target.endswith(')'): - # Remove arg lists for functions (e.g., L{_colorize_link()}) - target = re.sub(r'\(.*\)$', '', target) - + try: + begin_parameters = target.index('(') + except ValueError: + pass + else: + if target.endswith(')') and _CALLABLE_RE.match(target, endpos=begin_parameters): + # Remove arg lists for functions (e.g., L{_colorize_link()}) + target = _CALLABLE_ARGS_RE.sub('', target) + return Reference(label, target, invname=node.attributes.get('invname'), domain=node.attributes.get('domain'), diff --git a/pydoctor/test/epydoc/test_epytext2node.py b/pydoctor/test/epydoc/test_epytext2node.py index fbd0c9d56..49984893a 100644 --- a/pydoctor/test/epydoc/test_epytext2node.py +++ b/pydoctor/test/epydoc/test_epytext2node.py @@ -39,7 +39,7 @@ def test_nested_markup() -> None: expected = ''' It becomes a little bit complicated with - + custom links From 6971745cbcdc76b32f343df6a615e3d2efede382 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 2 Mar 2024 14:04:11 -0500 Subject: [PATCH 18/20] Use python3 .10 in order to type check pydoctor so we can use type aliases --- .github/workflows/static.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static.yaml b/.github/workflows/static.yaml index 66335344c..f36d6d182 100644 --- a/.github/workflows/static.yaml +++ b/.github/workflows/static.yaml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.8' + python-version: '3.10' - name: Install tox run: | From ef7f528adeceaa4a47d30fe105d928823d9c808f Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 2 Mar 2024 14:21:55 -0500 Subject: [PATCH 19/20] Fix bug with link spanning on several lines --- pydoctor/epydoc/markup/epytext.py | 3 ++- pydoctor/node2stan.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pydoctor/epydoc/markup/epytext.py b/pydoctor/epydoc/markup/epytext.py index 3a7497dee..721fb7f23 100644 --- a/pydoctor/epydoc/markup/epytext.py +++ b/pydoctor/epydoc/markup/epytext.py @@ -1194,7 +1194,8 @@ def _colorize_link(link: Element, token: Token, end: int, errors: List[ParseErro # Here we used to process the target in order to remove arg lists for functions # and validate it. But now this happens in node2stan.parse_reference(). # The target is not validated anymore since the intersphinx taget names can contain any kind of text. - pass + # We simply normalize it. + target = re.sub(r'\s', ' ', target) # Construct the target element. target_elt = Element('target', target, lineno=str(token.startline)) diff --git a/pydoctor/node2stan.py b/pydoctor/node2stan.py index 4457c8a4c..9bd34d75c 100644 --- a/pydoctor/node2stan.py +++ b/pydoctor/node2stan.py @@ -80,11 +80,13 @@ def parse_reference(node:nodes.Node) -> Reference: label, target = node.children, node.attributes['refuri'] else: # RST parsed. - m = _TARGET_RE.match(node.astext()) + # Sanitize links spaning over multiple lines + node_text = re.sub(r'\s', ' ', node.astext()) + m = _TARGET_RE.match(node_text) if m: label, target = m.groups() else: - label = target = node.astext() + label = target = node_text # Support linking to functions and methods with parameters try: begin_parameters = target.index('(') From fb20cad4ad90802a25ba36f576bf84195c9ad465 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 31 Aug 2024 13:01:31 -0400 Subject: [PATCH 20/20] Add some tests declaration missing implementations. Tries doctests but looks like we're not running them so this will need to be moved to the actual test directory. --- pydoctor/options.py | 44 +++++++++++++++++++++++++--------- pydoctor/sphinx.py | 8 ++++--- pydoctor/test/test_options.py | 2 ++ pydoctor/test/test_sphinx.py | 45 ++++++++++++++++++++++++++++++++++- 4 files changed, 84 insertions(+), 15 deletions(-) diff --git a/pydoctor/options.py b/pydoctor/options.py index e46c26361..33df72bcc 100644 --- a/pydoctor/options.py +++ b/pydoctor/options.py @@ -305,8 +305,16 @@ def _convert_privacy(l: List[str]) -> List[Tuple['model.PrivacyClass', str]]: def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: """ - Given a base url OR an url to objects.inv file. + Given a base url OR an url to .inv file. Returns a tuple: (URL_TO_OBJECTS.INV, BASE_URL) + + >>> _object_inv_url_and_base_url('thing.inv') # error + >>> _object_inv_url_and_base_url('hello.com/thing.inv') + ('hello.com/thing.inv', 'hello.com') + >>> _object_inv_url_and_base_url('hello.com') + ('hello.com/objects.inv', 'hello.com') + >>> _object_inv_url_and_base_url('hello.com/') + ('hello.com/objects.inv', 'hello.com') """ if url.endswith('.inv'): parts = url.rsplit('/', 1) @@ -325,12 +333,12 @@ def _object_inv_url_and_base_url(url:str) -> Tuple[str, str]: def _is_identifier_like(s:str) -> bool: """ - Examples of identifier-like strings:: + True if C{s} is an identifier-like strings - identifier - identifier_thing - zope.interface - identifier-like + >>> assert _is_identifier_like('identifier') + >>> assert _is_identifier_like('identifier_thing') + >>> assert _is_identifier_like('zope.interface') + >>> assert _is_identifier_like('identifier-like') """ return s.replace('-', '_').replace('.', '_').isidentifier() @@ -350,8 +358,7 @@ def _is_identifier_like(s:str) -> bool: _RE_DRIVE_LIKE = re.compile(r':[a-z]:(\\|\/)', re.IGNORECASE) def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: """ - This replies on the fact the filename does not contain a colon. - # TODO: find a manner to lift this limitation. + Colons in filenames must be escaped with a backslash. """ parts = [''] part_nb = 0 @@ -366,10 +373,14 @@ def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: elif _RE_DRIVE_LIKE.match(s[i-2:i+2]): # Still not a separator, a windows drive :c:/ pass + elif s[i-1] == '\\': + # An escaped colon, remove the backslash and keep the colon + parts[part_nb] = parts[part_nb][:-1] + pass elif len(parts) == 3: - raise ValueError(f'Malformed {option} option {s!r}: too many parts, beware that colons in filenames are not supported') + raise ValueError(f'Malformed {option} option {s!r}: too many parts, beware that colons in filenames must be escaped with a backslash') elif not parts[part_nb]: - raise ValueError(f'Malformed {option} option {s!r}: two consecutive colons is not valid') + raise ValueError(f'Malformed {option} option {s!r}: two consecutive colons is not valid, beware that colons in filenames must be escaped with a backslash') else: parts.append('') part_nb += 1 @@ -377,7 +388,7 @@ def _split_intersphinx_parts(s:str, option:str='--intersphinx') -> List[str]: parts[part_nb] += c return parts -IntersphinxOption: 'TypeAlias' = Tuple[Optional[str], str, str] +IntersphinxOption: TypeAlias = Tuple[Optional[str], str, str] def _parse_intersphinx_file(s:str) -> IntersphinxOption: """ @@ -386,6 +397,14 @@ def _parse_intersphinx_file(s:str) -> IntersphinxOption: [INVENTORY_NAME:]PATH:BASE_URL Returns a L{IntersphinxOption} tuple. + + >>> _parse_intersphinx_file('privpackage:./shinx inventories/privpackage.inv:https://myprivatehost/apidocs/privpackage/') + >>> _parse_intersphinx_file('./shinx inventories/privpackage.inv:https://myprivatehost/apidocs/privpackage/') + >>> _parse_intersphinx_file('privpackage:d:/shinx inventories/privpackage.inv:https://myprivatehost/apidocs/privpackage/') + >>> _parse_intersphinx_file('./shinx inventories/privpackage.inv') # error + >>> _parse_intersphinx_file('https://myprivatehost/apidocs/privpackage/') # error + >>> _parse_intersphinx_file(r'shinx inventories\\:aka intersphinx/privpackage.inv:https://myprivatehost/apidocs/privpackage/') + """ try: parts = _split_intersphinx_parts(s, '--intersphinx-file') @@ -423,6 +442,9 @@ def _parse_intersphinx(s:str) -> IntersphinxOption: [INVENTORY_NAME:]URL[:BASE_URL] Returns a L{IntersphinxOption} tuple. + + >>> _parse_intersphinx('docs.stuff.org') + >>> _parse_intersphinx('https://docs.stuff.org') """ try: diff --git a/pydoctor/sphinx.py b/pydoctor/sphinx.py index 94e9b5ddf..8835a58d4 100644 --- a/pydoctor/sphinx.py +++ b/pydoctor/sphinx.py @@ -301,11 +301,13 @@ def _raise_ambiguous_ref(self, target: str, if not missing_filters: # there is a problem with this inventory... - msg = f'complete intersphinx ref is still ambiguous {target_str}, could be {",".join(options_urls)}' - msg += ', this is likely an issue with the inventory' + # TODO: should we even report such an issue ? + # probably mnot since the dev cannot do anything about it + msg = (f'there is an issue with the inventory {invname}: ' + f' {target_str}, could be {",".join(options_urls)}') else: msg = f'ambiguous intersphinx ref to {target_str}, could be {",".join(options_urls)}' - msg += f', try adding one of {", ".join(missing_filters)} filter to your RST role' + msg += f', try adding one of {", ".join(missing_filters)} filter to your role' raise ValueError(msg) def getInv(self, target: str, diff --git a/pydoctor/test/test_options.py b/pydoctor/test/test_options.py index 667a03cc4..5cd797bf2 100644 --- a/pydoctor/test/test_options.py +++ b/pydoctor/test/test_options.py @@ -294,6 +294,8 @@ def test_intersphinx_split_on_colon() -> None: assert _split_intersphinx_parts('pydoctor:inventories/pack.inv:http://something.org/')==['pydoctor', 'inventories/pack.inv', 'http://something.org/'] assert _split_intersphinx_parts('pydoctor:c:/data/inventories/pack.inv:http://something.org/')==['pydoctor', 'c:/data/inventories/pack.inv', 'http://something.org/'] + assert _split_intersphinx_parts('file with a colon\\: in the name.inv:http://something.org/')==['file with a colon: in the name.inv', 'http://something.org/'] + with pytest.raises(ValueError, match='Malformed --intersphinx option \'pydoctor::\': two consecutive colons is not valid'): _split_intersphinx_parts('pydoctor::') with pytest.raises(ValueError, match='Malformed --intersphinx option \'pydoctor:a:b:c:d\': too many parts'): diff --git a/pydoctor/test/test_sphinx.py b/pydoctor/test/test_sphinx.py index 7d5a3727a..a22a6bd25 100644 --- a/pydoctor/test/test_sphinx.py +++ b/pydoctor/test/test_sphinx.py @@ -848,4 +848,47 @@ def test_inv_object_reftyp() -> None: location='library/stdtypes.html#$', domain='py', reftype='class', - display='-') \ No newline at end of file + display='-') + +def test_get_inventory_filtered_by_invname() -> None: + ... + +def test_get_inventory_filtered_by_domain() -> None: + ... + +def test_get_inventory_filtered_by_reftype() -> None: + ... + +def test_get_inventory_filtered_by_both_domain_and_reftype() -> None: + ... + +def test_get_inventory_filtered_by_invname_and_domain() -> None: + ... + +def test_get_inventory_py_domain_has_precedence() -> None: + ... + +def test_get_inventory_ambigous_ref_in_std_domain() -> None: + ... + +def test_duplicate_inventory_from_url() -> None: + ... + +def test_duplicate_inventory_from_file() -> None: + ... + +def test_inventory_from_file_with_colon_in_the_filename() -> None: + ... + +def test_duplicate_inventory_from_both_file_and_url() -> None: + ... + +def test_inventory_from_file_fails_because_of_io_error() -> None: + ... + +def test_get_inventory_object_ambiguous_missing_some_filters() -> None: + ... + +def test_get_inventory_object_truly_ambiguous() -> None: + ... +