Skip to content

Commit

Permalink
Support multiple documentation items with the same name
Browse files Browse the repository at this point in the history
The architecture of doxylink was based on a name:item dict, which
doesn't support storing multiple items with the same name. This lead to
several workarounds:
* To support overloaded functions, function items were turned to
  name:FunctionList;
* To prevent friend declarations from conflicting with classes, friends
  were skipped.

Issues remained nevertheless: C++ allows declaring a class, a function,
and a variable with the same name:

    int a;
    void a();
    class a;

In addition, a common pattern with Qt is to declare properties with the
same name as the getter (Qt itself does this all the time):

    class Foo: public QObject
    {
        Q_OBJECT
	Q_PROPERTY(int bar GET bar)
    public:
        int bar() const;
    };

In all the above examples, doxylink was just crashing when parsing the
Doxygen index.

Instead of adding new exceptions, this commit changes the architecture
to allow multiple symbols with the same name. Duplicate names may still
raise an error, but only when one tries to refer to such an entry. This
means that all other symbols in the project can now be used instead of
failing during initialisation.

With this PR, it should be possible to create a domain in the future,
supporting much of the same syntax as the built-in [cpp
domain](https://www.sphinx-doc.org/en/master/usage/domains/cpp.html#cross-referencing)

Supersedes #31.
Closes #54.
  • Loading branch information
lmoureaux committed Dec 29, 2024
1 parent adb807f commit c67a45b
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 121 deletions.
5 changes: 4 additions & 1 deletion examples/my_lib.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <string>
#include <QObject>

/**
* Example documented function
Expand All @@ -18,8 +19,10 @@ int my_func(int b, std::string a);
/// @{
namespace my_namespace
{
class MyClass
class MyClass: public QObject
{
Q_OBJECT
Q_PROPERTY(double my_method READ my_method);
public:
MyClass();

Expand Down
263 changes: 154 additions & 109 deletions sphinxcontrib/doxylink/doxylink.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import bisect
import os
import re
import requests
Expand All @@ -6,7 +7,7 @@
import xml.etree.ElementTree as ET
import urllib.parse
from collections import namedtuple
from typing import Dict, Iterable, MutableMapping, Set, Union
from typing import Optional

from dateutil.parser import parse as parsedate
from docutils import nodes, utils
Expand All @@ -20,7 +21,48 @@
from . import __version__
from .parsing import normalise, ParseException

Entry = namedtuple('Entry', ['kind', 'file'])

class Entry(namedtuple('_Entry', ['name', 'kind', 'file', 'arglist'])):
'''Represents a documentation entry produced by Doxygen.'''

def matches(self, name: str, kind: Optional[str], arglist: Optional[str]) -> bool:
'''
Checks whether this entry has the specified name, kind, and argument list.
Args:
name (str): symbol name
kind (Optional[str]): restrict to symbols of this kind
arglist (Optional[str]): normalized argument list for overload resolution
'''

# Are we of the correct kind?
if kind and self.kind != kind:
return False

# Ensure the name matches
if not self.name.endswith(name):
return False

# "do_foo" doesn't match "foo"
prefix = self.name[:-len(name)]
if prefix and (prefix[-1].isidentifier() or prefix[-1].isnumeric()):
return False

if not arglist:
# If no argument list is provided, anything matches
return True

return self.arglist == arglist


def is_class(self) -> bool:
'''Returns true if this is a class entry (``kind`` is ``"class"``)'''
return self.kind == 'class'


def is_template(self) -> bool:
'''Returns true if this is a template entry'''
return '<' in self.name


def report_info(env, msg, docname=None, lineno=None):
Expand Down Expand Up @@ -77,110 +119,142 @@ def is_url(str_to_validate: str) -> bool:
return bool(re.match(regex, str_to_validate))


class FunctionList:
"""A FunctionList maps argument lists to specific entries"""
def __init__(self):
self.kind = 'function_list'
self._arglist: MutableMapping[str, str] = {}
class SymbolMap:
"""A SymbolMap maps symbols to Entries."""
def __init__(self, xml_doc: ET.ElementTree) -> None:
mapping = parse_tag_file(xml_doc)

def __getitem__(self, arglist: str) -> Entry:
# If the user has requested a specific function through specifying an arglist then get the right anchor
if arglist:
try:
filename = self._arglist[arglist]
except KeyError:
# TODO Offer fuzzy suggestion
raise LookupError('Argument list match not found')
else:
# Otherwise just return the first entry (if they don't care they get whatever comes first)
filename = list(self._arglist.values())[0]
# Sort the mapping for use with bisect
self._mapping = sorted(mapping, key=self._mapping_key)

return Entry(kind='function', file=filename)

def add_overload(self, arglist: str, file: str) -> None:
self._arglist[arglist] = file
@staticmethod
def _mapping_key(entry: Entry) -> str:
'''
Sorting key for the internal mapping. We sort by the reversed entry name so we
can bisect names based on the last element. This allows us to match "foo::bar"
when searching for "bar".
def __repr__(self):
return f"FunctionList({self._arglist})"
Args:
entry (Entry): the entry to generated a key for
Returns:
str: the reversed entry name
'''

class SymbolMap:
"""A SymbolMap maps symbols to Entries or FunctionLists"""
def __init__(self, xml_doc: ET.ElementTree) -> None:
self._mapping = parse_tag_file(xml_doc)
return entry.name[::-1]


def _find_entries(self, name: str, kind: Optional[str], arglist: Optional[str]) -> list[Entry]:
'''
Finds all potentially matching entries in the symbol list.
Args:
name (str): the name symbol to search for
kind (Optional[str]): the kind of symbols to search for
arglist (str): normalised function argument list
Returns:
list[Entry]: all entries whose name ends with 'name'
'''

matches = []

# Thanks to the sorting, all we need to do is iterate from the first to
# the last matching entry.
start = bisect.bisect_left(self._mapping, name[::-1], key=self._mapping_key)
for candidate in self._mapping[start:]:
if not candidate.name.endswith(name):
# Reached the end of entries that end in 'name'
break

def _get_symbol_match(self, symbol: str) -> str:
if self._mapping.get(symbol):
return symbol
if candidate.matches(name, kind, arglist):
# Found one
matches.append(candidate)

piecewise_list = match_piecewise(self._mapping.keys(), symbol)
return matches

# If there is only one match, return it.
if len(piecewise_list) == 1:
return list(piecewise_list)[0]

# If there is more than one item in piecewise_list then there is an ambiguity
def _disambiguate(self, name: str, candidates: list[Entry]) -> Entry:
'''
Returns the best-fitting candidate for the given symbol name. All
candidates are expected to be valid.
Args:
name (str): symbol name
candidates (list[Entry]): list of candidates to choose from
Returns:
Entry: the best candidate
'''

if not candidates:
raise LookupError(f'No documentation entry matching "{name}"')

# An exact match would appear at the beginning of the list.
if len(candidates) == 1 or candidates[0].name == name:
return candidates[0]

# If there is more than one candidate then there is an ambiguity
# Often this is due to the symbol matching the name of the constructor as well as the class name itself
# We will prefer the class
classes_list = {s for s in piecewise_list if self._mapping[s].kind == 'class'}
classes = [c for c in candidates if c.is_class()]

# If there is only one by here we return it.
if len(classes_list) == 1:
return list(classes_list)[0]
if len(classes) == 1:
return classes[0]

# Now, to disambiguate between ``PolyVox::Array< 1, ElementType >::operator[]`` and ``PolyVox::Array::operator[]`` matching ``operator[]``,
# we will ignore templated (as in C++ templates) tag names by removing names containing ``<``
no_templates_list = {s for s in piecewise_list if '<' not in s}
no_templates = [c for c in candidates if not c.is_template()]

if len(no_templates) == 1:
return no_templates[0]

if len(no_templates_list) == 1:
return list(no_templates_list)[0]
def pretty_entry(entry):
return entry.kind + ' ' + entry.name + (entry.arglist or '')

# If not found by now, return the shortest match, assuming that's the most specific
if no_templates_list:
if no_templates:
# TODO return a warning here?
return min(no_templates_list, key=len)
return min(no_templates, key=lambda entry: len(entry.name))

# TODO Offer fuzzy suggestion
raise LookupError('Could not find a match')


def __getitem__(self, item: str) -> Entry:
symbol, normalised_arglist = normalise(item)

matched_symbol = self._get_symbol_match(symbol)
entry = self._mapping[matched_symbol]

if isinstance(entry, FunctionList):
entry = entry[normalised_arglist]

return entry
# Restrict to functions when given an argument list
kind = 'function' if normalised_arglist else None
candidates = self._find_entries(symbol, kind, normalised_arglist)
return self._disambiguate(symbol, candidates)


def parse_tag_file(doc: ET.ElementTree) -> Dict[str, Union[Entry, FunctionList]]:
def parse_tag_file(doc: ET.ElementTree) -> list[Entry]:
"""
Takes in an XML tree from a Doxygen tag file and returns a dictionary that looks something like:
Takes in an XML tree from a Doxygen tag file and returns a list that looks something like:
.. code-block:: python
{'PolyVox': Entry(...),
'PolyVox::Array': Entry(...),
'PolyVox::Array1DDouble': Entry(...),
'PolyVox::Array1DFloat': Entry(...),
'PolyVox::Array1DInt16': Entry(...),
'QScriptContext::throwError': FunctionList(...),
'QScriptContext::toString': FunctionList(...)
}
Note the different form for functions. This is required to allow for 'overloading by argument type'.
[Entry('PolyVox', ...),
Entry('PolyVox::Array', ...),
Entry('PolyVox::Array1DDouble'),
Entry('PolyVox::Array1DFloat'),
Entry('PolyVox::Array1DInt16'),
Entry('QScriptContext::throwError'),
Entry('QScriptContext::toString'),
]
:Parameters:
doc : xml.etree.ElementTree
The XML DOM object
:return: a dictionary mapping fully qualified symbols to files
:return: a list of entries mapping fully qualified symbols to files
"""

mapping: Dict[str, Union[Entry, FunctionList]] = {}
function_list = [] # This is a list of function to be parsed and inserted into mapping at the end of the function.
mapping: list[Entry] = []
for compound in doc.findall('./compound'):
compound_kind = compound.get('kind')
if compound_kind not in {'namespace', 'class', 'struct', 'file', 'define', 'group', 'page'}:
Expand All @@ -201,7 +275,7 @@ def parse_tag_file(doc: ET.ElementTree) -> Dict[str, Union[Entry, FunctionList]]
compound_filename = compound_filename + '.html'

# If it's a compound we can simply add it
mapping[compound_name] = Entry(kind=compound_kind, file=compound_filename)
mapping.append(Entry(compound_name, kind=compound_kind, file=compound_filename, arglist=None))

for member in compound.findall('member'):
# If the member doesn't have an <anchorfile> element, use the parent compounds <filename> instead
Expand All @@ -212,54 +286,25 @@ def parse_tag_file(doc: ET.ElementTree) -> Dict[str, Union[Entry, FunctionList]]
raise KeyError(f"Member of {compound_name} does not have a name")
member_symbol = compound_name + '::' + member_name
member_kind = member.get('kind')
arglist_text = member.findtext('./arglist') # If it has an <arglist> then we assume it's a function. Empty <arglist> returns '', not None. Things like typedefs and enums can have empty arglists

if member_kind == "friend": # ignore friend class definitions because it results in double class entries that will throw a RuntimeError (see below at the end of this function)
continue
if arglist_text and member_kind not in {'variable', 'typedef', 'enumeration', "enumvalue"}:
function_list.append((member_symbol, arglist_text, member_kind, join(anchorfile, '#', member.findtext('anchor'))))
arglist = member.findtext('./arglist') # If it has an <arglist> then we assume it's a function. Empty <arglist> returns '', not None. Things like typedefs and enums can have empty arglists

member_file = join(anchorfile, '#', member.findtext('anchor'))

if arglist and member_kind not in {'variable', 'typedef', 'enumeration', "enumvalue"}:
try:
# Parse arguments to do overload resolution later
normalised_arglist = normalise(member_symbol + arglist)[1]
mapping.append(
Entry(name=member_symbol, kind=member_kind, file=member_file, arglist=normalised_arglist))
except ParseException as e:
print(f'Skipping {member_kind} {member_symbol}{arglist}. Error reported from parser was: {e}')
else:
# Put the simple things directly into the mapping
mapping[member_symbol] = Entry(kind=member.get('kind'), file=join(anchorfile, '#', member.findtext('anchor')))

for member_symbol, arglist, kind, anchor_link in function_list:
try:
normalised_arglist = normalise(member_symbol + arglist)[1]
except ParseException as e:
print(f'Skipping {kind} {member_symbol}{arglist}. Error reported from parser was: {e}')
else:
if member_symbol not in mapping:
mapping[member_symbol] = FunctionList()
member_mapping = mapping[member_symbol]
if not isinstance(member_mapping, FunctionList):
raise RuntimeError(f"Cannot add override to non-function '{member_symbol}'")
member_mapping.add_overload(normalised_arglist, anchor_link)
# Put the simple things directly into the list
mapping.append(Entry(name=member_symbol, kind=member_kind, file=member_file, arglist=None))

return mapping


def match_piecewise(candidates: Iterable[str], symbol: str, sep: str='::') -> set:
"""
Match the requested symbol against the candidates.
It is allowed to under-specify the base namespace so that ``"MyClass"`` can match ``my_namespace::MyClass``
Args:
candidates: set of possible matches for symbol
symbol: the symbol to match against
sep: the separator between identifier elements
Returns:
set of matches
"""
min_length = len(symbol)
piecewise_list = set()
for item in candidates:
if symbol == item[-min_length:] and item[-min_length-len(sep):-min_length] in [sep, '']:
piecewise_list.add(item)

return piecewise_list


def join(*args):
return ''.join(args)

Expand Down
Loading

0 comments on commit c67a45b

Please sign in to comment.