From bc455472a61f7a7cc8b9ab26a755be8d550e8db6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 1 Dec 2024 12:14:00 +0100 Subject: [PATCH] Add tool for linting `Doc/data/refcounts.dat` --- Tools/refcounts/.ruff.toml | 15 +++ Tools/refcounts/lint.py | 234 +++++++++++++++++++++++++++++++++++++ Tools/refcounts/mypy.ini | 10 ++ 3 files changed, 259 insertions(+) create mode 100644 Tools/refcounts/.ruff.toml create mode 100644 Tools/refcounts/lint.py create mode 100644 Tools/refcounts/mypy.ini diff --git a/Tools/refcounts/.ruff.toml b/Tools/refcounts/.ruff.toml new file mode 100644 index 000000000000000..588a07a51181721 --- /dev/null +++ b/Tools/refcounts/.ruff.toml @@ -0,0 +1,15 @@ +target-version = "py312" +line-length = 80 +fix = true + +[lint] +select = [ + "ALL" +] +ignore = [ + "D", # docstrings + "I001", # split imports + "Q00", # prefer double quotes over single quotes + "T201", # print() found + "PLR2004", # magic values +] diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py new file mode 100644 index 000000000000000..2b7c73f9c8f7e77 --- /dev/null +++ b/Tools/refcounts/lint.py @@ -0,0 +1,234 @@ +"""Lint Doc/data/refcounts.dat.""" + +from __future__ import annotations + +import itertools +import re +import tomllib +from argparse import ArgumentParser +from dataclasses import dataclass, field +from enum import auto as _auto, Enum +from pathlib import Path +from typing import TYPE_CHECKING, LiteralString, NamedTuple + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + +C_ELLIPSIS: LiteralString = '...' + +MATCH_TODO: Callable[[str], re.Match | None] +MATCH_TODO = re.compile(r'^#\s*TODO:\s*(\w+)$').match + +OBJECT_TYPES: frozenset[str] = frozenset() + +for qualifier, object_type, suffix in itertools.product( + ('const ', ''), + ( + 'PyObject', + 'PyLongObject', 'PyTypeObject', + 'PyCodeObject', 'PyFrameObject', + 'PyModuleObject', 'PyVarObject', + ), + ('*', '**', '* const *', '* const*'), +): + OBJECT_TYPES |= { + f'{qualifier}{object_type}{suffix}', + f'{qualifier}{object_type} {suffix}', + } +del suffix, object_type, qualifier + +IGNORE_LIST: frozenset[str] = frozenset(( + # part of the stable ABI but should not be used at all + 'PyUnicode_GetSize', + # part of the stable ABI but completely removed + '_PyState_AddModule', +)) + +def flno_(lineno: int) -> str: + # Format the line so that users can C/C from the terminal + # the line number and jump with their editor using Ctrl+G. + return f'{lineno:>5} ' + +class RefType(Enum): + UNKNOWN = _auto() + UNUSED = _auto() + DECREF = _auto() + BORROW = _auto() + INCREF = _auto() + STEALS = _auto() + NULL = _auto() # for return values only + +class LineInfo(NamedTuple): + func: str + ctype: str | None + name: str | None + reftype: RefType | None + comment: str + +@dataclass(slots=True) +class Return: + ctype: str | None + reftype: RefType | None + comment: str + +@dataclass(slots=True) +class Param: + name: str + lineno: int + + ctype: str | None + reftype: RefType | None + comment: str + +@dataclass(slots=True) +class Signature: + name: str + lineno: int + rparam: Return + params: dict[str, Param] = field(default_factory=dict) + +class FileView(NamedTuple): + signatures: Mapping[str, Signature] + incomplete: frozenset[str] + +def parse_line(line: str) -> LineInfo | None: + parts = line.split(':', maxsplit=4) + if len(parts) != 5: + return None + + func, raw_ctype, raw_name, raw_reftype, comment = parts + if not func: + return None + + ctype = raw_ctype.strip() or None + name = raw_name.strip() or None + + raw_reftype = raw_reftype.strip() + if raw_reftype == '-1': + reftype = RefType.DECREF + elif raw_reftype == '0': + reftype = RefType.BORROW + elif raw_reftype in {'+1', '1'}: + reftype = RefType.INCREF + elif raw_reftype == '$': + reftype = RefType.STEALS + elif raw_reftype.lower() == 'null': + reftype = RefType.NULL + elif not raw_reftype: + reftype = RefType.UNUSED + else: + reftype = RefType.UNKNOWN + + comment = comment.strip() + return LineInfo(func, ctype, name, reftype, comment) + +def parse(lines: Iterable[str]) -> FileView: + signatures: dict[str, Signature] = {} + incomplete: set[str] = set() + + for lineno, line in enumerate(map(str.strip, lines), 1): + if not line: + continue + if line.startswith('#'): + if match := MATCH_TODO(line): + incomplete.add(match.group(1)) + continue + + info = parse_line(line) + if info is None: + print(f"{flno_(lineno)} cannot parse: {line}") + continue + + func, ctype, name, reftype, comment = info + + if func not in signatures: + # process return value + if name is not None: + print(f'{flno_(lineno)} named return value in {line!r}') + ret_param = Return(ctype, reftype, comment) + signatures[func] = Signature(func, lineno, ret_param) + else: + # process parameter + if name is None: + print(f'{flno_(lineno)} missing parameter name in {line!r}') + continue + sig: Signature = signatures[func] + if name in sig.params: + print(f'{flno_(lineno)} duplicated parameter name in {line!r}') + continue + sig.params[name] = Param(name, lineno, ctype, reftype, comment) + + return FileView(signatures, frozenset(incomplete)) + +class Warnings: + def __init__(self) -> None: + self.count = 0 + + def block(self, sig: Signature, message: str) -> None: + self.count += 1 + print(f'{flno_(sig.lineno)} {sig.name:50} {message}') + + def param(self, sig: Signature, param: Param, message: str) -> None: + self.count += 1 + fullname = f'{sig.name}[{param.name}]' + print(f'{flno_(param.lineno)} {fullname:50} {message}') + +def check(view: FileView) -> None: + w = Warnings() + + for sig in view.signatures.values(): # type: Signature + # check the return value + rparam = sig.rparam + if not rparam.ctype: + w.block(sig, "missing return value type") + if rparam.reftype is RefType.UNKNOWN: + w.block(sig, "unknown return value type") + # check the parameters + for name, param in sig.params.items(): # type: (str, Param) + ctype, reftype = param.ctype, param.reftype + if ctype in OBJECT_TYPES and reftype is RefType.UNUSED: + w.param(sig, param, "missing reference count management") + if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED: + w.param(sig, param, "unused reference count management") + if name != C_ELLIPSIS and not name.isidentifier(): + # Python accepts the same identifiers as in C + w.param(sig, param, "invalid parameter name") + + print() + if w.count: + print(f"Found {w.count} issues") + names = view.signatures.keys() + if sorted(names) != list(names): + print("Entries are not sorted") + +def check_structure(view: FileView, stable_abi_file: str) -> None: + stable_abi_str = Path(stable_abi_file).read_text() + stable_abi = tomllib.loads(stable_abi_str) + expect = stable_abi['function'].keys() + # check if there are missing entries (those marked as "TODO" are ignored) + actual = IGNORE_LIST | view.incomplete | view.signatures.keys() + if missing := (expect - actual): + print('[!] missing stable ABI entries:') + for name in sorted(missing): + print(name) + +def _create_parser() -> ArgumentParser: + parser = ArgumentParser(prog='lint.py') + parser.add_argument('file', help="the file to check") + parser.add_argument('--stable-abi', help="the stable ABI TOML file to use") + return parser + +def main() -> None: + parser = _create_parser() + args = parser.parse_args() + lines = Path(args.file).read_text().splitlines() + print(" PARSING ".center(80, '-')) + view = parse(lines) + print(" CHECKING ".center(80, '-')) + check(view) + if args.stable_abi: + print(" CHECKING STABLE ABI ".center(80, '-')) + check_structure(view, args.stable_abi) + +if __name__ == "__main__": + main() diff --git a/Tools/refcounts/mypy.ini b/Tools/refcounts/mypy.ini new file mode 100644 index 000000000000000..b8cd5db8727bd88 --- /dev/null +++ b/Tools/refcounts/mypy.ini @@ -0,0 +1,10 @@ +[mypy] +files = Tools/refcounts/lint.py +pretty = True +show_traceback = True +python_version = 3.12 + +strict = True +warn_unreachable = True +enable_error_code = all +warn_return_any = False