From 183388f267540884743dbe4d5aabce231a2a1e23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 1 Dec 2024 12:14:00 +0100 Subject: [PATCH 01/31] Add tool for linting `Doc/data/refcounts.dat` --- Tools/refcounts/.ruff.toml | 17 +++ Tools/refcounts/lint.py | 283 +++++++++++++++++++++++++++++++++++++ Tools/refcounts/mypy.ini | 10 ++ 3 files changed, 310 insertions(+) create mode 100644 Tools/refcounts/.ruff.toml create mode 100644 Tools/refcounts/lint.py create mode 100644 Tools/refcounts/mypy.ini diff --git a/Tools/refcounts/.ruff.toml b/Tools/refcounts/.ruff.toml new file mode 100644 index 00000000000000..ca1534365debec --- /dev/null +++ b/Tools/refcounts/.ruff.toml @@ -0,0 +1,17 @@ +target-version = "py312" +line-length = 80 +fix = true + +[lint] +select = [ + "ALL" +] +ignore = [ + "D", # docstrings + "I001", # split imports + "Q00", # prefer double quotes over single quotes + "T201", # print() found + "PLR2004", # magic values + "C901", # too complex + "PLR0912", # too many branches +] diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py new file mode 100644 index 00000000000000..56aa27d5b7acd1 --- /dev/null +++ b/Tools/refcounts/lint.py @@ -0,0 +1,283 @@ +"""Lint Doc/data/refcounts.dat.""" + +from __future__ import annotations + +import itertools +import re +import tomllib +from argparse import ArgumentParser +from dataclasses import dataclass, field +from enum import auto as _auto, Enum +from pathlib import Path +from typing import TYPE_CHECKING, LiteralString, NamedTuple + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + +C_ELLIPSIS: LiteralString = '...' + +MATCH_TODO: Callable[[str], re.Match | None] +MATCH_TODO = re.compile(r'^#\s*TODO:\s*(\w+)$').match + +OBJECT_TYPES: frozenset[str] = frozenset() + +for qualifier, object_type, suffix in itertools.product( + ('const ', ''), + ( + 'PyObject', + 'PyLongObject', 'PyTypeObject', + 'PyCodeObject', 'PyFrameObject', + 'PyModuleObject', 'PyVarObject', + ), + ('*', '**', '* const *', '* const*'), +): + OBJECT_TYPES |= { + f'{qualifier}{object_type}{suffix}', + f'{qualifier}{object_type} {suffix}', + } +del suffix, object_type, qualifier + +IGNORE_LIST: frozenset[str] = frozenset(( + # part of the stable ABI but should not be used at all + 'PyUnicode_GetSize', + # part of the stable ABI but completely removed + '_PyState_AddModule', +)) + +def flno_(lineno: int) -> str: + # Format the line so that users can C/C from the terminal + # the line number and jump with their editor using Ctrl+G. + return f'{lineno:>5} ' + +class RefType(Enum): + UNKNOWN = _auto() + UNUSED = _auto() + DECREF = _auto() + BORROW = _auto() + INCREF = _auto() + STEALS = _auto() + NULL = _auto() # for return values only + +class LineInfo(NamedTuple): + func: str + ctype: str | None + name: str | None + reftype: RefType | None + comment: str + + raw_func: str + raw_ctype: str + raw_name: str + raw_reftype: str + + strip_func: bool + strip_ctype: bool + strip_name: bool + strip_reftype: bool + +@dataclass(slots=True) +class Return: + ctype: str | None + reftype: RefType | None + comment: str + +@dataclass(slots=True) +class Param: + name: str + lineno: int + + ctype: str | None + reftype: RefType | None + comment: str + +@dataclass(slots=True) +class Signature: + name: str + lineno: int + rparam: Return + params: dict[str, Param] = field(default_factory=dict) + +class FileView(NamedTuple): + signatures: Mapping[str, Signature] + incomplete: frozenset[str] + +def parse_line(line: str) -> LineInfo | None: + parts = line.split(':', maxsplit=4) + if len(parts) != 5: + return None + + raw_func, raw_ctype, raw_name, raw_reftype, comment = parts + + func = raw_func.strip() + strip_func = func != raw_func + if not func: + return None + + clean_ctype = raw_ctype.strip() + ctype = clean_ctype or None + strip_ctype = clean_ctype != raw_ctype + + clean_name = raw_name.strip() + name = clean_name or None + strip_name = clean_name != raw_name + + clean_reftype = raw_reftype.strip() + strip_reftype = clean_reftype != raw_reftype + + if clean_reftype == '-1': + reftype = RefType.DECREF + elif clean_reftype == '0': + reftype = RefType.BORROW + elif clean_reftype == '+1': + reftype = RefType.INCREF + elif clean_reftype == '$': + reftype = RefType.STEALS + elif clean_reftype == 'null': + reftype = RefType.NULL + elif not clean_reftype: + reftype = RefType.UNUSED + else: + reftype = RefType.UNKNOWN + + comment = comment.strip() + return LineInfo(func, ctype, name, reftype, comment, + raw_func, raw_ctype, raw_name, raw_reftype, + strip_func, strip_ctype, strip_name, strip_reftype) + +class ParserReporter: + def __init__(self) -> None: + self.count = 0 + + def info(self, lineno: int, message: str) -> None: + self.count += 1 + print(f'{flno_(lineno)} {message}') + + warn = error = info + +def parse(lines: Iterable[str]) -> FileView: + signatures: dict[str, Signature] = {} + incomplete: set[str] = set() + + w = ParserReporter() + + for lineno, line in enumerate(map(str.strip, lines), 1): + if not line: + continue + if line.startswith('#'): + if match := MATCH_TODO(line): + incomplete.add(match.group(1)) + continue + + e = parse_line(line) + if e is None: + w.error(lineno, f'cannot parse: {line!r}') + continue + + if e.strip_func: + w.warn(lineno, f'[func] whitespaces around {e.raw_func!r}') + if e.strip_ctype: + w.warn(lineno, f'[type] whitespaces around {e.raw_ctype!r}') + if e.strip_name: + w.warn(lineno, f'[name] whitespaces around {e.raw_name!r}') + if e.strip_reftype: + w.warn(lineno, f'[ref] whitespaces around {e.raw_reftype!r}') + + func, name = e.func, e.name + ctype, reftype = e.ctype, e.reftype + comment = e.comment + + if func not in signatures: + # process return value + if name is not None: + w.warn(lineno, f'named return value in {line!r}') + ret_param = Return(ctype, reftype, comment) + signatures[func] = Signature(func, lineno, ret_param) + else: + # process parameter + if name is None: + w.error(lineno, f'missing parameter name in {line!r}') + continue + sig: Signature = signatures[func] + if name in sig.params: + w.error(lineno, f'duplicated parameter name in {line!r}') + continue + sig.params[name] = Param(name, lineno, ctype, reftype, comment) + + if w.count: + print() + print(f"Found {w.count} issue(s)") + + return FileView(signatures, frozenset(incomplete)) + +class CheckerWarnings: + def __init__(self) -> None: + self.count = 0 + + def block(self, sig: Signature, message: str) -> None: + self.count += 1 + print(f'{flno_(sig.lineno)} {sig.name:50} {message}') + + def param(self, sig: Signature, param: Param, message: str) -> None: + self.count += 1 + fullname = f'{sig.name}[{param.name}]' + print(f'{flno_(param.lineno)} {fullname:50} {message}') + +def check(view: FileView) -> None: + w = CheckerWarnings() + + for sig in view.signatures.values(): # type: Signature + # check the return value + rparam = sig.rparam + if not rparam.ctype: + w.block(sig, "missing return value type") + if rparam.reftype is RefType.UNKNOWN: + w.block(sig, "unknown return value type") + # check the parameters + for name, param in sig.params.items(): # type: (str, Param) + ctype, reftype = param.ctype, param.reftype + if ctype in OBJECT_TYPES and reftype is RefType.UNUSED: + w.param(sig, param, "missing reference count management") + if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED: + w.param(sig, param, "unused reference count management") + if name != C_ELLIPSIS and not name.isidentifier(): + # Python accepts the same identifiers as in C + w.param(sig, param, "invalid parameter name") + + if w.count: + print() + print(f"Found {w.count} issue(s)") + names = view.signatures.keys() + if sorted(names) != list(names): + print("Entries are not sorted") + +def check_structure(view: FileView, stable_abi_file: str) -> None: + stable_abi_str = Path(stable_abi_file).read_text() + stable_abi = tomllib.loads(stable_abi_str) + expect = stable_abi['function'].keys() + # check if there are missing entries (those marked as "TODO" are ignored) + actual = IGNORE_LIST | view.incomplete | view.signatures.keys() + if missing := (expect - actual): + print('[!] missing stable ABI entries:') + for name in sorted(missing): + print(name) + +def _create_parser() -> ArgumentParser: + parser = ArgumentParser(prog='lint.py') + parser.add_argument('file', help="the file to check") + parser.add_argument('--stable-abi', help="the stable ABI TOML file to use") + return parser + +def main() -> None: + parser = _create_parser() + args = parser.parse_args() + lines = Path(args.file).read_text().splitlines() + print(" PARSING ".center(80, '-')) + view = parse(lines) + print(" CHECKING ".center(80, '-')) + check(view) + if args.stable_abi: + print(" CHECKING STABLE ABI ".center(80, '-')) + check_structure(view, args.stable_abi) + +if __name__ == "__main__": + main() diff --git a/Tools/refcounts/mypy.ini b/Tools/refcounts/mypy.ini new file mode 100644 index 00000000000000..b8cd5db8727bd8 --- /dev/null +++ b/Tools/refcounts/mypy.ini @@ -0,0 +1,10 @@ +[mypy] +files = Tools/refcounts/lint.py +pretty = True +show_traceback = True +python_version = 3.12 + +strict = True +warn_unreachable = True +enable_error_code = all +warn_return_any = False From 9c2a109b060d9297a6b3f022055bafef442a7d48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 1 Dec 2024 13:32:30 +0100 Subject: [PATCH 02/31] use single-quotes --- Tools/refcounts/.ruff.toml | 35 ++++++++++++++++++++++++++--------- Tools/refcounts/lint.py | 28 ++++++++++++++-------------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/Tools/refcounts/.ruff.toml b/Tools/refcounts/.ruff.toml index ca1534365debec..dc4324660200a7 100644 --- a/Tools/refcounts/.ruff.toml +++ b/Tools/refcounts/.ruff.toml @@ -1,17 +1,34 @@ target-version = "py312" -line-length = 80 +line-length = 79 fix = true +[format] +quote-style = "single" + [lint] select = [ - "ALL" + "ALL", + "TCH", ] ignore = [ - "D", # docstrings - "I001", # split imports - "Q00", # prefer double quotes over single quotes - "T201", # print() found - "PLR2004", # magic values - "C901", # too complex - "PLR0912", # too many branches + # isort + 'I001', # unsorted or unformatted import + # mccabbe + "C901", + # pydocstyle + "D", + # flake8-quotes (Q) + 'Q000', # double quotes found but single quotes preferred + 'Q001', # single quote docstring found but double quotes preferred + # flake8-print (T20) + 'T201', # print found + # pylint (PL) + 'PLR0912', # too many branches + 'PLR2004', # avoid magic values ] + +[lint.isort] +required-imports = ["from __future__ import annotations"] + +[lint.pycodestyle] +max-doc-length = 79 diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 56aa27d5b7acd1..b7f8d7441d19d5 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -205,7 +205,7 @@ def parse(lines: Iterable[str]) -> FileView: if w.count: print() - print(f"Found {w.count} issue(s)") + print(f'Found {w.count} issue(s)') return FileView(signatures, frozenset(incomplete)) @@ -229,26 +229,26 @@ def check(view: FileView) -> None: # check the return value rparam = sig.rparam if not rparam.ctype: - w.block(sig, "missing return value type") + w.block(sig, 'missing return value type') if rparam.reftype is RefType.UNKNOWN: - w.block(sig, "unknown return value type") + w.block(sig, 'unknown return value type') # check the parameters for name, param in sig.params.items(): # type: (str, Param) ctype, reftype = param.ctype, param.reftype if ctype in OBJECT_TYPES and reftype is RefType.UNUSED: - w.param(sig, param, "missing reference count management") + w.param(sig, param, 'missing reference count management') if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED: - w.param(sig, param, "unused reference count management") + w.param(sig, param, 'unused reference count management') if name != C_ELLIPSIS and not name.isidentifier(): # Python accepts the same identifiers as in C - w.param(sig, param, "invalid parameter name") + w.param(sig, param, 'invalid parameter name') if w.count: print() - print(f"Found {w.count} issue(s)") + print(f'Found {w.count} issue(s)') names = view.signatures.keys() if sorted(names) != list(names): - print("Entries are not sorted") + print('Entries are not sorted') def check_structure(view: FileView, stable_abi_file: str) -> None: stable_abi_str = Path(stable_abi_file).read_text() @@ -263,21 +263,21 @@ def check_structure(view: FileView, stable_abi_file: str) -> None: def _create_parser() -> ArgumentParser: parser = ArgumentParser(prog='lint.py') - parser.add_argument('file', help="the file to check") - parser.add_argument('--stable-abi', help="the stable ABI TOML file to use") + parser.add_argument('file', help='the file to check') + parser.add_argument('--stable-abi', help='the stable ABI TOML file to use') return parser def main() -> None: parser = _create_parser() args = parser.parse_args() lines = Path(args.file).read_text().splitlines() - print(" PARSING ".center(80, '-')) + print(' PARSING '.center(80, '-')) view = parse(lines) - print(" CHECKING ".center(80, '-')) + print(' CHECKING '.center(80, '-')) check(view) if args.stable_abi: - print(" CHECKING STABLE ABI ".center(80, '-')) + print(' CHECKING STABLE ABI '.center(80, '-')) check_structure(view, args.stable_abi) -if __name__ == "__main__": +if __name__ == '__main__': main() From 6c1a19e6cbb9e40b6acb6a4b0e87d442892cdef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 1 Dec 2024 14:00:41 +0100 Subject: [PATCH 03/31] add default paths --- Tools/refcounts/lint.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index b7f8d7441d19d5..59f762db25152a 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -5,7 +5,7 @@ import itertools import re import tomllib -from argparse import ArgumentParser +from argparse import ArgumentParser, RawDescriptionHelpFormatter from dataclasses import dataclass, field from enum import auto as _auto, Enum from pathlib import Path @@ -14,6 +14,10 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterable, Mapping +ROOT = Path(__file__).parent.parent.parent.resolve() +DEFAULT_REFCOUNT_DAT_PATH: str = str(ROOT / 'Doc/data/refcounts.dat') +DEFAULT_STABLE_ABI_TOML_PATH: str = str(ROOT / 'Misc/stable_abi.toml') + C_ELLIPSIS: LiteralString = '...' MATCH_TODO: Callable[[str], re.Match | None] @@ -251,20 +255,33 @@ def check(view: FileView) -> None: print('Entries are not sorted') def check_structure(view: FileView, stable_abi_file: str) -> None: + print(f"Stable ABI file: {stable_abi_file}") + print() stable_abi_str = Path(stable_abi_file).read_text() stable_abi = tomllib.loads(stable_abi_str) expect = stable_abi['function'].keys() # check if there are missing entries (those marked as "TODO" are ignored) actual = IGNORE_LIST | view.incomplete | view.signatures.keys() if missing := (expect - actual): - print('[!] missing stable ABI entries:') + print(f'Missing {len(missing)} stable ABI entries:') for name in sorted(missing): print(name) +STABLE_ABI_FILE_SENTINEL = object() + def _create_parser() -> ArgumentParser: - parser = ArgumentParser(prog='lint.py') - parser.add_argument('file', help='the file to check') - parser.add_argument('--stable-abi', help='the stable ABI TOML file to use') + parser = ArgumentParser( + prog='lint.py', + formatter_class=RawDescriptionHelpFormatter, + description='Lint the refcounts.dat file.\n\n' + 'Use --abi or --abi=FILE to check against the stable ABI.', + ) + parser.add_argument('file', nargs='?', default=DEFAULT_REFCOUNT_DAT_PATH, + help='the refcounts.dat file to check ' + '(default: %(default)s)') + parser.add_argument('--abi', nargs='?', default=STABLE_ABI_FILE_SENTINEL, + help='check against the given stable_abi.toml file ' + '(default: %s)' % DEFAULT_STABLE_ABI_TOML_PATH) return parser def main() -> None: @@ -275,9 +292,10 @@ def main() -> None: view = parse(lines) print(' CHECKING '.center(80, '-')) check(view) - if args.stable_abi: + if args.abi is not STABLE_ABI_FILE_SENTINEL: + abi = args.abi or DEFAULT_STABLE_ABI_TOML_PATH print(' CHECKING STABLE ABI '.center(80, '-')) - check_structure(view, args.stable_abi) + check_structure(view, abi) if __name__ == '__main__': main() From 419197bfbd907a9c8852c73f154d47b5f857ae0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 1 Dec 2024 15:37:50 +0100 Subject: [PATCH 04/31] use NamedTuple whenever possible --- Tools/refcounts/lint.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 59f762db25152a..3d5e0a5caf2b63 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -79,14 +79,12 @@ class LineInfo(NamedTuple): strip_name: bool strip_reftype: bool -@dataclass(slots=True) -class Return: +class Return(NamedTuple): ctype: str | None reftype: RefType | None comment: str -@dataclass(slots=True) -class Param: +class Param(NamedTuple): name: str lineno: int @@ -94,7 +92,7 @@ class Param: reftype: RefType | None comment: str -@dataclass(slots=True) +@dataclass(slots=True, frozen=True) class Signature: name: str lineno: int From 58ffbeb5851d27eb0c2168ed6cba103b5226996b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 1 Dec 2024 15:51:29 +0100 Subject: [PATCH 05/31] address Peter's review --- Tools/refcounts/lint.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 3d5e0a5caf2b63..bd0f25a18af414 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -9,14 +9,14 @@ from dataclasses import dataclass, field from enum import auto as _auto, Enum from pathlib import Path -from typing import TYPE_CHECKING, LiteralString, NamedTuple +from typing import TYPE_CHECKING, LiteralString if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Mapping + from collections.abc import Callable, Final, Iterable, Mapping ROOT = Path(__file__).parent.parent.parent.resolve() -DEFAULT_REFCOUNT_DAT_PATH: str = str(ROOT / 'Doc/data/refcounts.dat') -DEFAULT_STABLE_ABI_TOML_PATH: str = str(ROOT / 'Misc/stable_abi.toml') +DEFAULT_REFCOUNT_DAT_PATH: Final[str] = str(ROOT / 'Doc/data/refcounts.dat') +DEFAULT_STABLE_ABI_TOML_PATH: Final[str] = str(ROOT / 'Misc/stable_abi.toml') C_ELLIPSIS: LiteralString = '...' @@ -62,7 +62,8 @@ class RefType(Enum): STEALS = _auto() NULL = _auto() # for return values only -class LineInfo(NamedTuple): +@dataclass(slots=True, frozen=True) +class LineInfo: func: str ctype: str | None name: str | None @@ -79,12 +80,14 @@ class LineInfo(NamedTuple): strip_name: bool strip_reftype: bool -class Return(NamedTuple): +@dataclass(slots=True, frozen=True) +class Return: ctype: str | None reftype: RefType | None comment: str -class Param(NamedTuple): +@dataclass(slots=True, frozen=True) +class Param: name: str lineno: int @@ -99,7 +102,8 @@ class Signature: rparam: Return params: dict[str, Param] = field(default_factory=dict) -class FileView(NamedTuple): +@dataclass(slots=True, frozen=True) +class FileView: signatures: Mapping[str, Signature] incomplete: frozenset[str] @@ -255,7 +259,7 @@ def check(view: FileView) -> None: def check_structure(view: FileView, stable_abi_file: str) -> None: print(f"Stable ABI file: {stable_abi_file}") print() - stable_abi_str = Path(stable_abi_file).read_text() + stable_abi_str = Path(stable_abi_file).read_text(encoding='utf-8') stable_abi = tomllib.loads(stable_abi_str) expect = stable_abi['function'].keys() # check if there are missing entries (those marked as "TODO" are ignored) @@ -265,7 +269,7 @@ def check_structure(view: FileView, stable_abi_file: str) -> None: for name in sorted(missing): print(name) -STABLE_ABI_FILE_SENTINEL = object() +_STABLE_ABI_FILE_SENTINEL: Final = object() def _create_parser() -> ArgumentParser: parser = ArgumentParser( @@ -277,7 +281,7 @@ def _create_parser() -> ArgumentParser: parser.add_argument('file', nargs='?', default=DEFAULT_REFCOUNT_DAT_PATH, help='the refcounts.dat file to check ' '(default: %(default)s)') - parser.add_argument('--abi', nargs='?', default=STABLE_ABI_FILE_SENTINEL, + parser.add_argument('--abi', nargs='?', default=_STABLE_ABI_FILE_SENTINEL, help='check against the given stable_abi.toml file ' '(default: %s)' % DEFAULT_STABLE_ABI_TOML_PATH) return parser @@ -285,7 +289,7 @@ def _create_parser() -> ArgumentParser: def main() -> None: parser = _create_parser() args = parser.parse_args() - lines = Path(args.file).read_text().splitlines() + lines = Path(args.file).read_text(encoding='utf-8').splitlines() print(' PARSING '.center(80, '-')) view = parse(lines) print(' CHECKING '.center(80, '-')) From 9a46f1064b2923cebc72e231833f5f4448b2dc3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 1 Dec 2024 15:54:33 +0100 Subject: [PATCH 06/31] fix typos --- Tools/refcounts/lint.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index bd0f25a18af414..7f554cbb8480b2 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -9,10 +9,11 @@ from dataclasses import dataclass, field from enum import auto as _auto, Enum from pathlib import Path -from typing import TYPE_CHECKING, LiteralString +from typing import TYPE_CHECKING if TYPE_CHECKING: - from collections.abc import Callable, Final, Iterable, Mapping + from collections.abc import Callable, Iterable, Mapping + from typing import Final, LiteralString ROOT = Path(__file__).parent.parent.parent.resolve() DEFAULT_REFCOUNT_DAT_PATH: Final[str] = str(ROOT / 'Doc/data/refcounts.dat') @@ -283,7 +284,7 @@ def _create_parser() -> ArgumentParser: '(default: %(default)s)') parser.add_argument('--abi', nargs='?', default=_STABLE_ABI_FILE_SENTINEL, help='check against the given stable_abi.toml file ' - '(default: %s)' % DEFAULT_STABLE_ABI_TOML_PATH) + f'(default: {DEFAULT_STABLE_ABI_TOML_PATH})') return parser def main() -> None: @@ -294,7 +295,7 @@ def main() -> None: view = parse(lines) print(' CHECKING '.center(80, '-')) check(view) - if args.abi is not STABLE_ABI_FILE_SENTINEL: + if args.abi is not _STABLE_ABI_FILE_SENTINEL: abi = args.abi or DEFAULT_STABLE_ABI_TOML_PATH print(' CHECKING STABLE ABI '.center(80, '-')) check_structure(view, abi) From 8372de68168c9ea962096269995f7e04c0a2154c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:50:57 +0100 Subject: [PATCH 07/31] address Alex's review --- .github/workflows/mypy.yml | 2 + Tools/refcounts/.ruff.toml | 47 +++++++++---------- Tools/refcounts/lint.py | 95 +++++++++++++++++++++++--------------- Tools/refcounts/mypy.ini | 2 +- 4 files changed, 85 insertions(+), 61 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index e5b05302b5ac27..f9e458a6d4b4e6 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -16,6 +16,7 @@ on: - "Tools/jit/**" - "Tools/peg_generator/**" - "Tools/requirements-dev.txt" + - "Tools/refcounts/lint.py" - "Tools/wasm/**" workflow_dispatch: @@ -43,6 +44,7 @@ jobs: "Tools/cases_generator", "Tools/clinic", "Tools/jit", + - "Tools/refcounts", "Tools/peg_generator", "Tools/wasm", ] diff --git a/Tools/refcounts/.ruff.toml b/Tools/refcounts/.ruff.toml index dc4324660200a7..0da2e0d03ed3e8 100644 --- a/Tools/refcounts/.ruff.toml +++ b/Tools/refcounts/.ruff.toml @@ -1,34 +1,33 @@ target-version = "py312" line-length = 79 fix = true - -[format] -quote-style = "single" +preview = true [lint] select = [ - "ALL", - "TCH", -] -ignore = [ - # isort - 'I001', # unsorted or unformatted import - # mccabbe - "C901", - # pydocstyle - "D", - # flake8-quotes (Q) - 'Q000', # double quotes found but single quotes preferred - 'Q001', # single quote docstring found but double quotes preferred - # flake8-print (T20) - 'T201', # print found - # pylint (PL) - 'PLR0912', # too many branches - 'PLR2004', # avoid magic values + "I", # isort + "F", # Enable all pyflakes rules + "E", # Enable all pycodestyle error rules + "W", # Enable all pycodestyle warning rules + "UP", # Enable all pyupgrade rules + "RUF100", # Ban unused `# noqa` comments + "C4", # flake8-comprehensions + "ISC", # flake8-implicit-str-concat + "TCH", # flake8-type-checking + "PTH", # flake8-use-pathlib + "PERF", # perflint + "FURB101", # read-whole-file + "FURB103", # write-whole-file + "FURB110", # if-exp-instead-of-operator + "FURB113", # repeated-append + "FURB116", # f-string-number-format + "FURB129", # readlines-in-for + "FURB132", # check-and-remove-from-set + "FURB136", # if-expr-min-max + "FURB142", # for-loop-set-mutations + "FURB167", # regex-flag-alias + "FURB177", # implicit-cwd ] -[lint.isort] -required-imports = ["from __future__ import annotations"] - [lint.pycodestyle] max-doc-length = 79 diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 7f554cbb8480b2..a460b994c5b8a1 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -2,12 +2,12 @@ from __future__ import annotations +import enum import itertools import re import tomllib from argparse import ArgumentParser, RawDescriptionHelpFormatter from dataclasses import dataclass, field -from enum import auto as _auto, Enum from pathlib import Path from typing import TYPE_CHECKING @@ -21,7 +21,7 @@ C_ELLIPSIS: LiteralString = '...' -MATCH_TODO: Callable[[str], re.Match | None] +MATCH_TODO: Callable[[str], re.Match[str] | None] MATCH_TODO = re.compile(r'^#\s*TODO:\s*(\w+)$').match OBJECT_TYPES: frozenset[str] = frozenset() @@ -49,21 +49,24 @@ '_PyState_AddModule', )) + def flno_(lineno: int) -> str: # Format the line so that users can C/C from the terminal # the line number and jump with their editor using Ctrl+G. return f'{lineno:>5} ' -class RefType(Enum): - UNKNOWN = _auto() - UNUSED = _auto() - DECREF = _auto() - BORROW = _auto() - INCREF = _auto() - STEALS = _auto() - NULL = _auto() # for return values only -@dataclass(slots=True, frozen=True) +class RefType(enum.Enum): + UNKNOWN = enum.auto() + UNUSED = enum.auto() + DECREF = enum.auto() + BORROW = enum.auto() + INCREF = enum.auto() + STEALS = enum.auto() + NULL = enum.auto() # for return values only + + +@dataclass(slots=True, frozen=True, kw_only=True) class LineInfo: func: str ctype: str | None @@ -81,12 +84,14 @@ class LineInfo: strip_name: bool strip_reftype: bool + @dataclass(slots=True, frozen=True) class Return: ctype: str | None reftype: RefType | None comment: str + @dataclass(slots=True, frozen=True) class Param: name: str @@ -96,6 +101,7 @@ class Param: reftype: RefType | None comment: str + @dataclass(slots=True, frozen=True) class Signature: name: str @@ -103,11 +109,13 @@ class Signature: rparam: Return params: dict[str, Param] = field(default_factory=dict) + @dataclass(slots=True, frozen=True) class FileView: signatures: Mapping[str, Signature] incomplete: frozenset[str] + def parse_line(line: str) -> LineInfo | None: parts = line.split(':', maxsplit=4) if len(parts) != 5: @@ -131,29 +139,36 @@ def parse_line(line: str) -> LineInfo | None: clean_reftype = raw_reftype.strip() strip_reftype = clean_reftype != raw_reftype - if clean_reftype == '-1': - reftype = RefType.DECREF - elif clean_reftype == '0': - reftype = RefType.BORROW - elif clean_reftype == '+1': - reftype = RefType.INCREF - elif clean_reftype == '$': - reftype = RefType.STEALS - elif clean_reftype == 'null': - reftype = RefType.NULL - elif not clean_reftype: - reftype = RefType.UNUSED - else: - reftype = RefType.UNKNOWN + match clean_reftype: + case '-1': + reftype = RefType.DECREF + case '0': + reftype = RefType.BORROW + case '+1': + reftype = RefType.INCREF + case '$': + reftype = RefType.STEALS + case 'null': + reftype = RefType.NULL + case '': + reftype = RefType.UNUSED + case _: + reftype = RefType.UNKNOWN comment = comment.strip() - return LineInfo(func, ctype, name, reftype, comment, - raw_func, raw_ctype, raw_name, raw_reftype, - strip_func, strip_ctype, strip_name, strip_reftype) + return LineInfo( + func=func, ctype=ctype, name=name, + reftype=reftype, comment=comment, + raw_func=raw_func, raw_ctype=raw_ctype, + raw_name=raw_name, raw_reftype=raw_reftype, + strip_func=strip_func, strip_ctype=strip_ctype, + strip_name=strip_name, strip_reftype=strip_reftype, + ) + +@dataclass(slots=True) class ParserReporter: - def __init__(self) -> None: - self.count = 0 + count: int = 0 def info(self, lineno: int, message: str) -> None: self.count += 1 @@ -161,6 +176,7 @@ def info(self, lineno: int, message: str) -> None: warn = error = info + def parse(lines: Iterable[str]) -> FileView: signatures: dict[str, Signature] = {} incomplete: set[str] = set() @@ -216,9 +232,10 @@ def parse(lines: Iterable[str]) -> FileView: return FileView(signatures, frozenset(incomplete)) + +@dataclass(slots=True) class CheckerWarnings: - def __init__(self) -> None: - self.count = 0 + count: int = 0 def block(self, sig: Signature, message: str) -> None: self.count += 1 @@ -229,6 +246,7 @@ def param(self, sig: Signature, param: Param, message: str) -> None: fullname = f'{sig.name}[{param.name}]' print(f'{flno_(param.lineno)} {fullname:50} {message}') + def check(view: FileView) -> None: w = CheckerWarnings() @@ -247,7 +265,7 @@ def check(view: FileView) -> None: if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED: w.param(sig, param, 'unused reference count management') if name != C_ELLIPSIS and not name.isidentifier(): - # Python accepts the same identifiers as in C + # C accepts the same identifiers as in Python w.param(sig, param, 'invalid parameter name') if w.count: @@ -257,6 +275,7 @@ def check(view: FileView) -> None: if sorted(names) != list(names): print('Entries are not sorted') + def check_structure(view: FileView, stable_abi_file: str) -> None: print(f"Stable ABI file: {stable_abi_file}") print() @@ -270,7 +289,9 @@ def check_structure(view: FileView, stable_abi_file: str) -> None: for name in sorted(missing): print(name) -_STABLE_ABI_FILE_SENTINEL: Final = object() + +_STABLE_ABI_PATH_SENTINEL: Final = object() + def _create_parser() -> ArgumentParser: parser = ArgumentParser( @@ -282,11 +303,12 @@ def _create_parser() -> ArgumentParser: parser.add_argument('file', nargs='?', default=DEFAULT_REFCOUNT_DAT_PATH, help='the refcounts.dat file to check ' '(default: %(default)s)') - parser.add_argument('--abi', nargs='?', default=_STABLE_ABI_FILE_SENTINEL, + parser.add_argument('--abi', nargs='?', default=_STABLE_ABI_PATH_SENTINEL, help='check against the given stable_abi.toml file ' f'(default: {DEFAULT_STABLE_ABI_TOML_PATH})') return parser + def main() -> None: parser = _create_parser() args = parser.parse_args() @@ -295,10 +317,11 @@ def main() -> None: view = parse(lines) print(' CHECKING '.center(80, '-')) check(view) - if args.abi is not _STABLE_ABI_FILE_SENTINEL: + if args.abi is not _STABLE_ABI_PATH_SENTINEL: abi = args.abi or DEFAULT_STABLE_ABI_TOML_PATH print(' CHECKING STABLE ABI '.center(80, '-')) check_structure(view, abi) + if __name__ == '__main__': main() diff --git a/Tools/refcounts/mypy.ini b/Tools/refcounts/mypy.ini index b8cd5db8727bd8..3380e61b11bc43 100644 --- a/Tools/refcounts/mypy.ini +++ b/Tools/refcounts/mypy.ini @@ -6,5 +6,5 @@ python_version = 3.12 strict = True warn_unreachable = True -enable_error_code = all +enable_error_code = ignore-without-code,redundant-expr,truthy-bool warn_return_any = False From 4bcf719c5435b56d50735e16c8bcb58ec181839a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:52:06 +0100 Subject: [PATCH 08/31] format using a mix of black/ruff/picnixz format --- Tools/refcounts/lint.py | 133 +++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index a460b994c5b8a1..82d2c51ec9db6a 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -16,44 +16,43 @@ from typing import Final, LiteralString ROOT = Path(__file__).parent.parent.parent.resolve() -DEFAULT_REFCOUNT_DAT_PATH: Final[str] = str(ROOT / 'Doc/data/refcounts.dat') -DEFAULT_STABLE_ABI_TOML_PATH: Final[str] = str(ROOT / 'Misc/stable_abi.toml') +DEFAULT_REFCOUNT_DAT_PATH: Final[str] = str(ROOT / "Doc/data/refcounts.dat") +DEFAULT_STABLE_ABI_TOML_PATH: Final[str] = str(ROOT / "Misc/stable_abi.toml") -C_ELLIPSIS: LiteralString = '...' +C_ELLIPSIS: LiteralString = "..." MATCH_TODO: Callable[[str], re.Match[str] | None] -MATCH_TODO = re.compile(r'^#\s*TODO:\s*(\w+)$').match +MATCH_TODO = re.compile(r"^#\s*TODO:\s*(\w+)$").match OBJECT_TYPES: frozenset[str] = frozenset() for qualifier, object_type, suffix in itertools.product( - ('const ', ''), + ("const ", ""), ( - 'PyObject', - 'PyLongObject', 'PyTypeObject', - 'PyCodeObject', 'PyFrameObject', - 'PyModuleObject', 'PyVarObject', + "PyObject", "PyLongObject", "PyTypeObject", + "PyCodeObject", "PyFrameObject", "PyModuleObject", + "PyVarObject", ), - ('*', '**', '* const *', '* const*'), + ("*", "**", "* const *", "* const*"), ): OBJECT_TYPES |= { - f'{qualifier}{object_type}{suffix}', - f'{qualifier}{object_type} {suffix}', + f"{qualifier}{object_type}{suffix}", + f"{qualifier}{object_type} {suffix}", } del suffix, object_type, qualifier IGNORE_LIST: frozenset[str] = frozenset(( # part of the stable ABI but should not be used at all - 'PyUnicode_GetSize', + "PyUnicode_GetSize", # part of the stable ABI but completely removed - '_PyState_AddModule', + "_PyState_AddModule", )) def flno_(lineno: int) -> str: # Format the line so that users can C/C from the terminal # the line number and jump with their editor using Ctrl+G. - return f'{lineno:>5} ' + return f"{lineno:>5} " class RefType(enum.Enum): @@ -117,7 +116,7 @@ class FileView: def parse_line(line: str) -> LineInfo | None: - parts = line.split(':', maxsplit=4) + parts = line.split(":", maxsplit=4) if len(parts) != 5: return None @@ -140,25 +139,24 @@ def parse_line(line: str) -> LineInfo | None: strip_reftype = clean_reftype != raw_reftype match clean_reftype: - case '-1': + case "-1": reftype = RefType.DECREF - case '0': + case "0": reftype = RefType.BORROW - case '+1': + case "+1": reftype = RefType.INCREF - case '$': + case "$": reftype = RefType.STEALS - case 'null': + case "null": reftype = RefType.NULL - case '': + case "": reftype = RefType.UNUSED case _: reftype = RefType.UNKNOWN comment = comment.strip() return LineInfo( - func=func, ctype=ctype, name=name, - reftype=reftype, comment=comment, + func=func, ctype=ctype, name=name, reftype=reftype, comment=comment, raw_func=raw_func, raw_ctype=raw_ctype, raw_name=raw_name, raw_reftype=raw_reftype, strip_func=strip_func, strip_ctype=strip_ctype, @@ -172,9 +170,15 @@ class ParserReporter: def info(self, lineno: int, message: str) -> None: self.count += 1 - print(f'{flno_(lineno)} {message}') + print(f"{flno_(lineno)} {message}") - warn = error = info + def warn(self, lineno: int, message: str) -> None: + # same as info() but semantically different + self.info(lineno, message) + + def error(self, lineno: int, message: str) -> None: + # same as info() but semantically different + self.info(lineno, message) def parse(lines: Iterable[str]) -> FileView: @@ -186,24 +190,24 @@ def parse(lines: Iterable[str]) -> FileView: for lineno, line in enumerate(map(str.strip, lines), 1): if not line: continue - if line.startswith('#'): + if line.startswith("#"): if match := MATCH_TODO(line): incomplete.add(match.group(1)) continue e = parse_line(line) if e is None: - w.error(lineno, f'cannot parse: {line!r}') + w.error(lineno, f"cannot parse: {line!r}") continue if e.strip_func: - w.warn(lineno, f'[func] whitespaces around {e.raw_func!r}') + w.warn(lineno, f"[func] whitespaces around {e.raw_func!r}") if e.strip_ctype: - w.warn(lineno, f'[type] whitespaces around {e.raw_ctype!r}') + w.warn(lineno, f"[type] whitespaces around {e.raw_ctype!r}") if e.strip_name: - w.warn(lineno, f'[name] whitespaces around {e.raw_name!r}') + w.warn(lineno, f"[name] whitespaces around {e.raw_name!r}") if e.strip_reftype: - w.warn(lineno, f'[ref] whitespaces around {e.raw_reftype!r}') + w.warn(lineno, f"[ref] whitespaces around {e.raw_reftype!r}") func, name = e.func, e.name ctype, reftype = e.ctype, e.reftype @@ -212,23 +216,23 @@ def parse(lines: Iterable[str]) -> FileView: if func not in signatures: # process return value if name is not None: - w.warn(lineno, f'named return value in {line!r}') + w.warn(lineno, f"named return value in {line!r}") ret_param = Return(ctype, reftype, comment) signatures[func] = Signature(func, lineno, ret_param) else: # process parameter if name is None: - w.error(lineno, f'missing parameter name in {line!r}') + w.error(lineno, f"missing parameter name in {line!r}") continue sig: Signature = signatures[func] if name in sig.params: - w.error(lineno, f'duplicated parameter name in {line!r}') + w.error(lineno, f"duplicated parameter name in {line!r}") continue sig.params[name] = Param(name, lineno, ctype, reftype, comment) if w.count: print() - print(f'Found {w.count} issue(s)') + print(f"Found {w.count} issue(s)") return FileView(signatures, frozenset(incomplete)) @@ -239,12 +243,12 @@ class CheckerWarnings: def block(self, sig: Signature, message: str) -> None: self.count += 1 - print(f'{flno_(sig.lineno)} {sig.name:50} {message}') + print(f"{flno_(sig.lineno)} {sig.name:50} {message}") def param(self, sig: Signature, param: Param, message: str) -> None: self.count += 1 - fullname = f'{sig.name}[{param.name}]' - print(f'{flno_(param.lineno)} {fullname:50} {message}') + fullname = f"{sig.name}[{param.name}]" + print(f"{flno_(param.lineno)} {fullname:50} {message}") def check(view: FileView) -> None: @@ -254,38 +258,38 @@ def check(view: FileView) -> None: # check the return value rparam = sig.rparam if not rparam.ctype: - w.block(sig, 'missing return value type') + w.block(sig, "missing return value type") if rparam.reftype is RefType.UNKNOWN: - w.block(sig, 'unknown return value type') + w.block(sig, "unknown return value type") # check the parameters for name, param in sig.params.items(): # type: (str, Param) ctype, reftype = param.ctype, param.reftype if ctype in OBJECT_TYPES and reftype is RefType.UNUSED: - w.param(sig, param, 'missing reference count management') + w.param(sig, param, "missing reference count management") if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED: - w.param(sig, param, 'unused reference count management') + w.param(sig, param, "unused reference count management") if name != C_ELLIPSIS and not name.isidentifier(): # C accepts the same identifiers as in Python - w.param(sig, param, 'invalid parameter name') + w.param(sig, param, "invalid parameter name") if w.count: print() - print(f'Found {w.count} issue(s)') + print(f"Found {w.count} issue(s)") names = view.signatures.keys() if sorted(names) != list(names): - print('Entries are not sorted') + print("Entries are not sorted") def check_structure(view: FileView, stable_abi_file: str) -> None: print(f"Stable ABI file: {stable_abi_file}") print() - stable_abi_str = Path(stable_abi_file).read_text(encoding='utf-8') + stable_abi_str = Path(stable_abi_file).read_text(encoding="utf-8") stable_abi = tomllib.loads(stable_abi_str) - expect = stable_abi['function'].keys() + expect = stable_abi["function"].keys() # check if there are missing entries (those marked as "TODO" are ignored) actual = IGNORE_LIST | view.incomplete | view.signatures.keys() if missing := (expect - actual): - print(f'Missing {len(missing)} stable ABI entries:') + print(f"Missing {len(missing)} stable ABI entries:") for name in sorted(missing): print(name) @@ -295,33 +299,34 @@ def check_structure(view: FileView, stable_abi_file: str) -> None: def _create_parser() -> ArgumentParser: parser = ArgumentParser( - prog='lint.py', - formatter_class=RawDescriptionHelpFormatter, - description='Lint the refcounts.dat file.\n\n' - 'Use --abi or --abi=FILE to check against the stable ABI.', + prog="lint.py", formatter_class=RawDescriptionHelpFormatter, + description=( + "Lint the refcounts.dat file.\n\n" + "Use --abi or --abi=FILE to check against the stable ABI." + ), ) - parser.add_argument('file', nargs='?', default=DEFAULT_REFCOUNT_DAT_PATH, - help='the refcounts.dat file to check ' - '(default: %(default)s)') - parser.add_argument('--abi', nargs='?', default=_STABLE_ABI_PATH_SENTINEL, - help='check against the given stable_abi.toml file ' - f'(default: {DEFAULT_STABLE_ABI_TOML_PATH})') + _ = parser.add_argument + _("file", nargs="?", default=DEFAULT_REFCOUNT_DAT_PATH, + help="the refcounts.dat file to check (default: %(default)s)") + _("--abi", nargs="?", default=_STABLE_ABI_PATH_SENTINEL, + help=(f"check against the given stable_abi.toml file " + f"(default: {DEFAULT_STABLE_ABI_TOML_PATH})")) return parser def main() -> None: parser = _create_parser() args = parser.parse_args() - lines = Path(args.file).read_text(encoding='utf-8').splitlines() - print(' PARSING '.center(80, '-')) + lines = Path(args.file).read_text(encoding="utf-8").splitlines() + print(" PARSING ".center(80, "-")) view = parse(lines) - print(' CHECKING '.center(80, '-')) + print(" CHECKING ".center(80, "-")) check(view) if args.abi is not _STABLE_ABI_PATH_SENTINEL: abi = args.abi or DEFAULT_STABLE_ABI_TOML_PATH - print(' CHECKING STABLE ABI '.center(80, '-')) + print(" CHECKING STABLE ABI ".center(80, "-")) check_structure(view, abi) -if __name__ == '__main__': +if __name__ == "__main__": main() From 841a4b121271d1052805a10746558d6543893957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:07:53 +0100 Subject: [PATCH 09/31] rename STEALS -> STEAL --- Tools/refcounts/lint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 82d2c51ec9db6a..4c540a05079876 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -61,7 +61,7 @@ class RefType(enum.Enum): DECREF = enum.auto() BORROW = enum.auto() INCREF = enum.auto() - STEALS = enum.auto() + STEAL = enum.auto() NULL = enum.auto() # for return values only @@ -146,7 +146,7 @@ def parse_line(line: str) -> LineInfo | None: case "+1": reftype = RefType.INCREF case "$": - reftype = RefType.STEALS + reftype = RefType.STEAL case "null": reftype = RefType.NULL case "": From 970cbc70822b60cb0e47ac3d623c9c2bb10e4330 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:08:22 +0100 Subject: [PATCH 10/31] detect more ref. count manageable objects --- Doc/data/refcounts.dat | 6 +++--- Tools/refcounts/lint.py | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 6bfcc191b2270b..64e8c0cd7a6532 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -833,11 +833,11 @@ PyEval_EvalCodeEx:PyObject*::+1: PyEval_EvalCodeEx:PyObject*:co:0: PyEval_EvalCodeEx:PyObject*:globals:0: PyEval_EvalCodeEx:PyObject*:locals:0: -PyEval_EvalCodeEx:PyObject*const*:args:: +PyEval_EvalCodeEx:PyObject*const*:args:0: PyEval_EvalCodeEx:int:argcount:: -PyEval_EvalCodeEx:PyObject*const*:kws:: +PyEval_EvalCodeEx:PyObject*const*:kws:0: PyEval_EvalCodeEx:int:kwcount:: -PyEval_EvalCodeEx:PyObject*const*:defs:: +PyEval_EvalCodeEx:PyObject*const*:defs:0: PyEval_EvalCodeEx:int:defcount:: PyEval_EvalCodeEx:PyObject*:kwdefs:0: PyEval_EvalCodeEx:PyObject*:closure:0: diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 4c540a05079876..03cbf82725ecc3 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -33,7 +33,11 @@ "PyCodeObject", "PyFrameObject", "PyModuleObject", "PyVarObject", ), - ("*", "**", "* const *", "* const*"), + ( + "*", + "**", "* *", + "*const*", "*const *", "* const*", "* const *", + ), ): OBJECT_TYPES |= { f"{qualifier}{object_type}{suffix}", From 45584830a048e49abe1f719c5be7ea981f401b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:09:42 +0100 Subject: [PATCH 11/31] add `lineno` to RETURN values and use kw-only --- Tools/refcounts/lint.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 03cbf82725ecc3..251368053076c3 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -93,24 +93,26 @@ class Return: ctype: str | None reftype: RefType | None comment: str + lineno: int = field(kw_only=True) @dataclass(slots=True, frozen=True) class Param: name: str - lineno: int ctype: str | None reftype: RefType | None comment: str + lineno: int = field(kw_only=True) + @dataclass(slots=True, frozen=True) class Signature: name: str - lineno: int rparam: Return params: dict[str, Param] = field(default_factory=dict) + lineno: int = field(kw_only=True) @dataclass(slots=True, frozen=True) @@ -221,18 +223,19 @@ def parse(lines: Iterable[str]) -> FileView: # process return value if name is not None: w.warn(lineno, f"named return value in {line!r}") - ret_param = Return(ctype, reftype, comment) - signatures[func] = Signature(func, lineno, ret_param) + ret_param = Return(ctype, reftype, comment, lineno=lineno) + signatures[func] = Signature(func, ret_param, lineno=lineno) else: # process parameter if name is None: w.error(lineno, f"missing parameter name in {line!r}") continue sig: Signature = signatures[func] - if name in sig.params: + params = sig.params + if name in params: w.error(lineno, f"duplicated parameter name in {line!r}") continue - sig.params[name] = Param(name, lineno, ctype, reftype, comment) + params[name] = Param(name, ctype, reftype, comment, lineno=lineno) if w.count: print() From b8f6090527dbccaf456be6b90569c9c7be61b527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:11:07 +0100 Subject: [PATCH 12/31] use helper for C identifier detection --- Tools/refcounts/lint.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 251368053076c3..51db2157daa172 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -59,6 +59,12 @@ def flno_(lineno: int) -> str: return f"{lineno:>5} " +def is_c_parameter_name(name: str) -> bool: + """Check if *name* is a valid C parameter name.""" + # C accepts the same identifiers as in Python + return name == C_ELLIPSIS or name.isidentifier() + + class RefType(enum.Enum): UNKNOWN = enum.auto() UNUSED = enum.auto() @@ -269,14 +275,13 @@ def check(view: FileView) -> None: if rparam.reftype is RefType.UNKNOWN: w.block(sig, "unknown return value type") # check the parameters - for name, param in sig.params.items(): # type: (str, Param) + for param in sig.params.values(): # type: Param ctype, reftype = param.ctype, param.reftype if ctype in OBJECT_TYPES and reftype is RefType.UNUSED: w.param(sig, param, "missing reference count management") if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED: w.param(sig, param, "unused reference count management") - if name != C_ELLIPSIS and not name.isidentifier(): - # C accepts the same identifiers as in Python + if not is_c_parameter_name(param.name): w.param(sig, param, "invalid parameter name") if w.count: From db9b6e6968c23abea033a6a2ae033271c0ea2b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:12:18 +0100 Subject: [PATCH 13/31] `RefType` -> `RefEffect` --- Tools/refcounts/lint.py | 62 ++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 51db2157daa172..ec4e6fad560891 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -65,12 +65,12 @@ def is_c_parameter_name(name: str) -> bool: return name == C_ELLIPSIS or name.isidentifier() -class RefType(enum.Enum): +class RefEffect(enum.Enum): UNKNOWN = enum.auto() UNUSED = enum.auto() DECREF = enum.auto() - BORROW = enum.auto() INCREF = enum.auto() + BORROW = enum.auto() STEAL = enum.auto() NULL = enum.auto() # for return values only @@ -80,24 +80,24 @@ class LineInfo: func: str ctype: str | None name: str | None - reftype: RefType | None + effect: RefEffect comment: str raw_func: str raw_ctype: str raw_name: str - raw_reftype: str + raw_effect: str strip_func: bool strip_ctype: bool strip_name: bool - strip_reftype: bool + strip_effect: bool @dataclass(slots=True, frozen=True) class Return: ctype: str | None - reftype: RefType | None + effect: RefEffect comment: str lineno: int = field(kw_only=True) @@ -107,7 +107,7 @@ class Param: name: str ctype: str | None - reftype: RefType | None + effect: RefEffect comment: str lineno: int = field(kw_only=True) @@ -132,7 +132,7 @@ def parse_line(line: str) -> LineInfo | None: if len(parts) != 5: return None - raw_func, raw_ctype, raw_name, raw_reftype, comment = parts + raw_func, raw_ctype, raw_name, raw_effect, comment = parts func = raw_func.strip() strip_func = func != raw_func @@ -147,32 +147,32 @@ def parse_line(line: str) -> LineInfo | None: name = clean_name or None strip_name = clean_name != raw_name - clean_reftype = raw_reftype.strip() - strip_reftype = clean_reftype != raw_reftype + clean_effect = raw_effect.strip() + strip_effect = clean_effect != raw_effect - match clean_reftype: + match clean_effect: case "-1": - reftype = RefType.DECREF - case "0": - reftype = RefType.BORROW + effect = RefEffect.DECREF case "+1": - reftype = RefType.INCREF + effect = RefEffect.INCREF + case "0": + effect = RefEffect.BORROW case "$": - reftype = RefType.STEAL + effect = RefEffect.STEAL case "null": - reftype = RefType.NULL + effect = RefEffect.NULL case "": - reftype = RefType.UNUSED + effect = RefEffect.UNUSED case _: - reftype = RefType.UNKNOWN + effect = RefEffect.UNKNOWN comment = comment.strip() return LineInfo( - func=func, ctype=ctype, name=name, reftype=reftype, comment=comment, + func=func, ctype=ctype, name=name, effect=effect, comment=comment, raw_func=raw_func, raw_ctype=raw_ctype, - raw_name=raw_name, raw_reftype=raw_reftype, + raw_name=raw_name, raw_effect=raw_effect, strip_func=strip_func, strip_ctype=strip_ctype, - strip_name=strip_name, strip_reftype=strip_reftype, + strip_name=strip_name, strip_effect=strip_effect, ) @@ -218,18 +218,18 @@ def parse(lines: Iterable[str]) -> FileView: w.warn(lineno, f"[type] whitespaces around {e.raw_ctype!r}") if e.strip_name: w.warn(lineno, f"[name] whitespaces around {e.raw_name!r}") - if e.strip_reftype: - w.warn(lineno, f"[ref] whitespaces around {e.raw_reftype!r}") + if e.strip_effect: + w.warn(lineno, f"[ref] whitespaces around {e.raw_effect!r}") func, name = e.func, e.name - ctype, reftype = e.ctype, e.reftype + ctype, effect = e.ctype, e.effect comment = e.comment if func not in signatures: # process return value if name is not None: w.warn(lineno, f"named return value in {line!r}") - ret_param = Return(ctype, reftype, comment, lineno=lineno) + ret_param = Return(ctype, effect, comment, lineno=lineno) signatures[func] = Signature(func, ret_param, lineno=lineno) else: # process parameter @@ -241,7 +241,7 @@ def parse(lines: Iterable[str]) -> FileView: if name in params: w.error(lineno, f"duplicated parameter name in {line!r}") continue - params[name] = Param(name, ctype, reftype, comment, lineno=lineno) + params[name] = Param(name, ctype, effect, comment, lineno=lineno) if w.count: print() @@ -272,14 +272,14 @@ def check(view: FileView) -> None: rparam = sig.rparam if not rparam.ctype: w.block(sig, "missing return value type") - if rparam.reftype is RefType.UNKNOWN: + if rparam.effect is RefEffect.UNKNOWN: w.block(sig, "unknown return value type") # check the parameters for param in sig.params.values(): # type: Param - ctype, reftype = param.ctype, param.reftype - if ctype in OBJECT_TYPES and reftype is RefType.UNUSED: + ctype, effect = param.ctype, param.effect + if ctype in OBJECT_TYPES and effect is RefEffect.UNUSED: w.param(sig, param, "missing reference count management") - if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED: + if ctype not in OBJECT_TYPES and effect is not RefEffect.UNUSED: w.param(sig, param, "unused reference count management") if not is_c_parameter_name(param.name): w.param(sig, param, "invalid parameter name") From 480f500af06dfa2db354d9dbc956326506605b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:14:27 +0100 Subject: [PATCH 14/31] improve `ParserReporter` --- Tools/refcounts/lint.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index ec4e6fad560891..1b53643d08fe4e 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -178,18 +178,23 @@ def parse_line(line: str) -> LineInfo | None: @dataclass(slots=True) class ParserReporter: - count: int = 0 + + warnings_count: int = 0 + errors_count: int = 0 + + @property + def issues_count(self) -> int: + return self.warnings_count + self.errors_count def info(self, lineno: int, message: str) -> None: - self.count += 1 print(f"{flno_(lineno)} {message}") def warn(self, lineno: int, message: str) -> None: - # same as info() but semantically different + self.warnings_count += 1 self.info(lineno, message) def error(self, lineno: int, message: str) -> None: - # same as info() but semantically different + self.errors_count += 1 self.info(lineno, message) @@ -197,7 +202,7 @@ def parse(lines: Iterable[str]) -> FileView: signatures: dict[str, Signature] = {} incomplete: set[str] = set() - w = ParserReporter() + r = ParserReporter() for lineno, line in enumerate(map(str.strip, lines), 1): if not line: @@ -209,17 +214,17 @@ def parse(lines: Iterable[str]) -> FileView: e = parse_line(line) if e is None: - w.error(lineno, f"cannot parse: {line!r}") + r.error(lineno, f"cannot parse: {line!r}") continue if e.strip_func: - w.warn(lineno, f"[func] whitespaces around {e.raw_func!r}") + r.warn(lineno, f"[func] whitespaces around {e.raw_func!r}") if e.strip_ctype: - w.warn(lineno, f"[type] whitespaces around {e.raw_ctype!r}") + r.warn(lineno, f"[type] whitespaces around {e.raw_ctype!r}") if e.strip_name: - w.warn(lineno, f"[name] whitespaces around {e.raw_name!r}") + r.warn(lineno, f"[name] whitespaces around {e.raw_name!r}") if e.strip_effect: - w.warn(lineno, f"[ref] whitespaces around {e.raw_effect!r}") + r.warn(lineno, f"[ref] whitespaces around {e.raw_effect!r}") func, name = e.func, e.name ctype, effect = e.ctype, e.effect @@ -228,24 +233,24 @@ def parse(lines: Iterable[str]) -> FileView: if func not in signatures: # process return value if name is not None: - w.warn(lineno, f"named return value in {line!r}") + r.warn(lineno, f"named return value in {line!r}") ret_param = Return(ctype, effect, comment, lineno=lineno) signatures[func] = Signature(func, ret_param, lineno=lineno) else: # process parameter if name is None: - w.error(lineno, f"missing parameter name in {line!r}") + r.error(lineno, f"missing parameter name in {line!r}") continue sig: Signature = signatures[func] params = sig.params if name in params: - w.error(lineno, f"duplicated parameter name in {line!r}") + r.error(lineno, f"duplicated parameter name in {line!r}") continue params[name] = Param(name, ctype, effect, comment, lineno=lineno) - if w.count: + if r.issues_count: print() - print(f"Found {w.count} issue(s)") + print(f"Found {r.issues_count} issue(s)") return FileView(signatures, frozenset(incomplete)) From 9814dd7f0ed72d07960f705e980ab1989ca411d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:15:50 +0100 Subject: [PATCH 15/31] disallow stolen ref in return values --- Tools/refcounts/lint.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 1b53643d08fe4e..2870ceb93fce1c 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -277,8 +277,11 @@ def check(view: FileView) -> None: rparam = sig.rparam if not rparam.ctype: w.block(sig, "missing return value type") - if rparam.effect is RefEffect.UNKNOWN: - w.block(sig, "unknown return value type") + match rparam.effect: + case RefEffect.UNKNOWN: + w.block(sig, "unknown return value type") + case RefEffect.STEAL: + w.block(sig, "stolen reference on return value") # check the parameters for param in sig.params.values(): # type: Param ctype, effect = param.ctype, param.effect From 82766b35d63da8b74ca6b7410ec2de27b5f4e062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:19:47 +0100 Subject: [PATCH 16/31] add doc --- Tools/refcounts/lint.py | 97 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 2870ceb93fce1c..983ddf8fe9ae1a 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -24,6 +24,7 @@ MATCH_TODO: Callable[[str], re.Match[str] | None] MATCH_TODO = re.compile(r"^#\s*TODO:\s*(\w+)$").match +#: Set of declarations whose variable has a reference count. OBJECT_TYPES: frozenset[str] = frozenset() for qualifier, object_type, suffix in itertools.product( @@ -45,6 +46,12 @@ } del suffix, object_type, qualifier +#: Set of functions part of the stable ABI that are not +#: in the refcounts.dat file if: +#: +#: - They are ABI-only (either fully deprecated or even removed) +#: - They are so internal that they should not be used at all because +#: they raise a fatal error. IGNORE_LIST: frozenset[str] = frozenset(( # part of the stable ABI but should not be used at all "PyUnicode_GetSize", @@ -66,28 +73,69 @@ def is_c_parameter_name(name: str) -> bool: class RefEffect(enum.Enum): + """The reference count effect of a parameter or a return value.""" + UNKNOWN = enum.auto() + """Indicate an unparsable reference count effect. + + Such annotated entities are reported during the checking phase. + """ + UNUSED = enum.auto() + """Indicate that the entity has no reference count.""" + DECREF = enum.auto() + """Indicate that the reference count is decremented.""" + INCREF = enum.auto() + """Indicate that the reference count is incremented.""" + BORROW = enum.auto() + """Indicate that the reference count is left unchanged. + + Parameters annotated with this effect do not steal a reference. + """ + STEAL = enum.auto() + """Indicate that the reference count is left unchanged. + + Parameters annotated with this effect steal a reference. + """ + NULL = enum.auto() # for return values only + """Only used for a NULL return value. + + This is used by functions that return always return NULL as a "PyObject *". + """ @dataclass(slots=True, frozen=True, kw_only=True) class LineInfo: func: str + """The cleaned function name.""" + ctype: str | None + """"The cleaned parameter or return C type, or None if missing.""" + name: str | None + """The cleaned parameter name or None for the return parameter.""" + effect: RefEffect + """The reference count effect.""" + comment: str + """Optional comment, stripped from surrounding whitespaces.""" + # The raw_* attributes contain the strings before they are cleaned, + # and are only used when reporting an issue during the parsing phase. raw_func: str raw_ctype: str raw_name: str raw_effect: str + # The strip_* attributes indicate whether surrounding whitespaces were + # stripped or not from the corresponding strings. Such action will be + # reported during the parsing phase. strip_func: bool strip_ctype: bool strip_name: bool @@ -96,35 +144,72 @@ class LineInfo: @dataclass(slots=True, frozen=True) class Return: + """Indicate a return value.""" ctype: str | None + """The return C type, if any. + + A missing type is reported during the checking phase. + """ + effect: RefEffect + """The reference count effect. + + A nonsensical reference count effect is reported during the checking phase. + """ + comment: str + """An optional comment.""" + lineno: int = field(kw_only=True) + """The position in the file where the parameter is documented.""" @dataclass(slots=True, frozen=True) class Param: name: str + """The parameter name.""" ctype: str | None + """The parameter C type, if any. + + A missing type is reported during the checking phase. + """ + effect: RefEffect + """The reference count effect. + + A nonsensical reference count effect is reported during the checking phase. + """ + comment: str + """An optional comment.""" lineno: int = field(kw_only=True) + """The position in the file where the parameter is documented.""" @dataclass(slots=True, frozen=True) class Signature: name: str + """The function name.""" + rparam: Return + """The return value specifications.""" + params: dict[str, Param] = field(default_factory=dict) + """The (ordered) formal parameters specifications""" + lineno: int = field(kw_only=True) + """The position in the file where the function is documented.""" @dataclass(slots=True, frozen=True) class FileView: signatures: Mapping[str, Signature] + """A mapping from function names to the corresponding signature.""" + incomplete: frozenset[str] + """A set of function names that are yet to be documented.""" def parse_line(line: str) -> LineInfo | None: @@ -178,22 +263,29 @@ def parse_line(line: str) -> LineInfo | None: @dataclass(slots=True) class ParserReporter: + """Utility for printing messages during the parsing phase.""" warnings_count: int = 0 + """The number of warnings emitted so far.""" errors_count: int = 0 + """The number of errors emitted so far.""" @property def issues_count(self) -> int: + """The number of issues encountered so far.""" return self.warnings_count + self.errors_count def info(self, lineno: int, message: str) -> None: + """Print a simple message.""" print(f"{flno_(lineno)} {message}") def warn(self, lineno: int, message: str) -> None: + """Print a warning message.""" self.warnings_count += 1 self.info(lineno, message) def error(self, lineno: int, message: str) -> None: + """Print an error message.""" self.errors_count += 1 self.info(lineno, message) @@ -257,13 +349,18 @@ def parse(lines: Iterable[str]) -> FileView: @dataclass(slots=True) class CheckerWarnings: + """Utility for emitting warnings during the checking phrase.""" + count: int = 0 + """The number of warnings emitted so far.""" def block(self, sig: Signature, message: str) -> None: + """Print a warning message for the given signature.""" self.count += 1 print(f"{flno_(sig.lineno)} {sig.name:50} {message}") def param(self, sig: Signature, param: Param, message: str) -> None: + """Print a warning message for the given formal parameter.""" self.count += 1 fullname = f"{sig.name}[{param.name}]" print(f"{flno_(param.lineno)} {fullname:50} {message}") From e7a7a10977322e0a5e622c4e1017d4b2948d1c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:25:34 +0100 Subject: [PATCH 17/31] fix workflow --- .github/workflows/mypy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index f9e458a6d4b4e6..b420f8b34bd3e3 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -44,7 +44,7 @@ jobs: "Tools/cases_generator", "Tools/clinic", "Tools/jit", - - "Tools/refcounts", + "Tools/refcounts", "Tools/peg_generator", "Tools/wasm", ] From f64a23d9954cd84ea685fe4c4dbfdab796b61168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:28:19 +0100 Subject: [PATCH 18/31] update pre-commit hook --- .pre-commit-config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccaf2390d99fae..029eede6399dd9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,10 @@ repos: name: Run Ruff (lint) on Argument Clinic args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml] files: ^Tools/clinic/|Lib/test/test_clinic.py + - id: ruff + name: Run Ruff (lint) on Tools/refcounts/lint.py + args: [--exit-non-zero-on-fix, --config=Tools/refcounts/.ruff.toml] + files: ^Tools/refcounts/lint.py - id: ruff-format name: Run Ruff (format) on Doc/ args: [--check] From 2eb541f55bd5b184dd3587838fd281c4ae43f50c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:49:07 +0100 Subject: [PATCH 19/31] fix some typos --- Tools/refcounts/lint.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 983ddf8fe9ae1a..53abdd6691af0d 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -49,7 +49,7 @@ #: Set of functions part of the stable ABI that are not #: in the refcounts.dat file if: #: -#: - They are ABI-only (either fully deprecated or even removed) +#: - They are ABI-only (either fully deprecated or even removed). #: - They are so internal that they should not be used at all because #: they raise a fatal error. IGNORE_LIST: frozenset[str] = frozenset(( @@ -105,7 +105,7 @@ class RefEffect(enum.Enum): NULL = enum.auto() # for return values only """Only used for a NULL return value. - This is used by functions that return always return NULL as a "PyObject *". + This is used by functions that always return NULL as a "PyObject *". """ @@ -145,6 +145,7 @@ class LineInfo: @dataclass(slots=True, frozen=True) class Return: """Indicate a return value.""" + ctype: str | None """The return C type, if any. @@ -166,6 +167,8 @@ class Return: @dataclass(slots=True, frozen=True) class Param: + """Indicate a formal parameter.""" + name: str """The parameter name.""" @@ -190,6 +193,8 @@ class Param: @dataclass(slots=True, frozen=True) class Signature: + """An entry in refcounts.dat.""" + name: str """The function name.""" @@ -205,6 +210,8 @@ class Signature: @dataclass(slots=True, frozen=True) class FileView: + """View of the refcounts.dat file.""" + signatures: Mapping[str, Signature] """A mapping from function names to the corresponding signature.""" @@ -414,7 +421,7 @@ def check_structure(view: FileView, stable_abi_file: str) -> None: _STABLE_ABI_PATH_SENTINEL: Final = object() -def _create_parser() -> ArgumentParser: +def create_parser() -> ArgumentParser: parser = ArgumentParser( prog="lint.py", formatter_class=RawDescriptionHelpFormatter, description=( @@ -432,7 +439,7 @@ def _create_parser() -> ArgumentParser: def main() -> None: - parser = _create_parser() + parser = create_parser() args = parser.parse_args() lines = Path(args.file).read_text(encoding="utf-8").splitlines() print(" PARSING ".center(80, "-")) From cf42e0308e55c086b1be0b0f9dc1cf7d9c7e7061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:56:26 +0100 Subject: [PATCH 20/31] restrict the ruff rules --- Tools/refcounts/.ruff.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Tools/refcounts/.ruff.toml b/Tools/refcounts/.ruff.toml index 0da2e0d03ed3e8..a1a536120ad050 100644 --- a/Tools/refcounts/.ruff.toml +++ b/Tools/refcounts/.ruff.toml @@ -15,7 +15,11 @@ select = [ "ISC", # flake8-implicit-str-concat "TCH", # flake8-type-checking "PTH", # flake8-use-pathlib - "PERF", # perflint + "PERF101", # unnecessary-list-cast + "PERF102", # incorrect-dict-iterator + "PERF401", # manual-list-comprehension + "PERF402", # manual-list-copy + "PERF403", # manual-dict-comprehension "FURB101", # read-whole-file "FURB103", # write-whole-file "FURB110", # if-exp-instead-of-operator From eb893d0eb3a6cf797e6c2fa591af2bc06957a865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:04:30 +0100 Subject: [PATCH 21/31] add ruff docstrings rules --- Tools/refcounts/.ruff.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Tools/refcounts/.ruff.toml b/Tools/refcounts/.ruff.toml index a1a536120ad050..7322e781cd7c9e 100644 --- a/Tools/refcounts/.ruff.toml +++ b/Tools/refcounts/.ruff.toml @@ -6,11 +6,17 @@ preview = true [lint] select = [ "I", # isort + "D204", # 1 blank line required after class docstring + "D205", # 1 blank line required between summary line and description + "D400", # First line should end with a period + "D401", # First line of docstring should be in imperative mood "F", # Enable all pyflakes rules "E", # Enable all pycodestyle error rules "W", # Enable all pycodestyle warning rules "UP", # Enable all pyupgrade rules "RUF100", # Ban unused `# noqa` comments + "PGH003", # Use specific rule codes when ignoring type issues + "PGH004", # Use specific rule codes when using noqa "C4", # flake8-comprehensions "ISC", # flake8-implicit-str-concat "TCH", # flake8-type-checking From dbe29a6617f40d48e67e5a9f9b81f476392a424c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:36:38 +0100 Subject: [PATCH 22/31] address Peter's review --- Tools/refcounts/lint.py | 100 ++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 53abdd6691af0d..a6814a29525adc 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -5,6 +5,7 @@ import enum import itertools import re +import sys import tomllib from argparse import ArgumentParser, RawDescriptionHelpFormatter from dataclasses import dataclass, field @@ -24,27 +25,27 @@ MATCH_TODO: Callable[[str], re.Match[str] | None] MATCH_TODO = re.compile(r"^#\s*TODO:\s*(\w+)$").match + +def generate_object_types(universe: Iterable[str]) -> Iterable[str]: + for qualifier, object_type, suffix in itertools.product( + ("const ", ""), + universe, + ( + "*", + "**", "* *", + "*const*", "*const *", "* const*", "* const *", + ), + ): + yield f"{qualifier}{object_type}{suffix}" + yield f"{qualifier}{object_type} {suffix}" + + #: Set of declarations whose variable has a reference count. -OBJECT_TYPES: frozenset[str] = frozenset() - -for qualifier, object_type, suffix in itertools.product( - ("const ", ""), - ( - "PyObject", "PyLongObject", "PyTypeObject", - "PyCodeObject", "PyFrameObject", "PyModuleObject", - "PyVarObject", - ), - ( - "*", - "**", "* *", - "*const*", "*const *", "* const*", "* const *", - ), -): - OBJECT_TYPES |= { - f"{qualifier}{object_type}{suffix}", - f"{qualifier}{object_type} {suffix}", - } -del suffix, object_type, qualifier +OBJECT_TYPES: frozenset[str] = frozenset(generate_object_types(( + "PyObject", "PyVarObject", "PyTypeObject", + "PyLongObject", "PyCodeObject", "PyFrameObject", + "PyModuleDef", "PyModuleObject", +))) #: Set of functions part of the stable ABI that are not #: in the refcounts.dat file if: @@ -81,28 +82,28 @@ class RefEffect(enum.Enum): Such annotated entities are reported during the checking phase. """ - UNUSED = enum.auto() + UNUSED = "" """Indicate that the entity has no reference count.""" - DECREF = enum.auto() + DECREF = "-1" """Indicate that the reference count is decremented.""" - INCREF = enum.auto() + INCREF = "+1" """Indicate that the reference count is incremented.""" - BORROW = enum.auto() + BORROW = "0" """Indicate that the reference count is left unchanged. Parameters annotated with this effect do not steal a reference. """ - STEAL = enum.auto() + STEAL = "$" """Indicate that the reference count is left unchanged. Parameters annotated with this effect steal a reference. """ - NULL = enum.auto() # for return values only + NULL = "null" # for return values only """Only used for a NULL return value. This is used by functions that always return NULL as a "PyObject *". @@ -241,22 +242,10 @@ def parse_line(line: str) -> LineInfo | None: clean_effect = raw_effect.strip() strip_effect = clean_effect != raw_effect - - match clean_effect: - case "-1": - effect = RefEffect.DECREF - case "+1": - effect = RefEffect.INCREF - case "0": - effect = RefEffect.BORROW - case "$": - effect = RefEffect.STEAL - case "null": - effect = RefEffect.NULL - case "": - effect = RefEffect.UNUSED - case _: - effect = RefEffect.UNKNOWN + try: + effect = RefEffect(clean_effect) + except ValueError: + effect = RefEffect.UNKNOWN comment = comment.strip() return LineInfo( @@ -282,19 +271,25 @@ def issues_count(self) -> int: """The number of issues encountered so far.""" return self.warnings_count + self.errors_count + def _print( # type: ignore[no-untyped-def] + self, lineno: int, message: str, *, file=None, + ) -> None: + """Forward to print().""" + print(f"{flno_(lineno)} {message}", file=file) + def info(self, lineno: int, message: str) -> None: """Print a simple message.""" - print(f"{flno_(lineno)} {message}") + self._print(lineno, message) def warn(self, lineno: int, message: str) -> None: """Print a warning message.""" self.warnings_count += 1 - self.info(lineno, message) + self._print(lineno, message) def error(self, lineno: int, message: str) -> None: """Print an error message.""" self.errors_count += 1 - self.info(lineno, message) + self._print(lineno, message, file=sys.stderr) def parse(lines: Iterable[str]) -> FileView: @@ -429,12 +424,15 @@ def create_parser() -> ArgumentParser: "Use --abi or --abi=FILE to check against the stable ABI." ), ) - _ = parser.add_argument - _("file", nargs="?", default=DEFAULT_REFCOUNT_DAT_PATH, - help="the refcounts.dat file to check (default: %(default)s)") - _("--abi", nargs="?", default=_STABLE_ABI_PATH_SENTINEL, - help=(f"check against the given stable_abi.toml file " - f"(default: {DEFAULT_STABLE_ABI_TOML_PATH})")) + parser.add_argument( + "file", nargs="?", default=DEFAULT_REFCOUNT_DAT_PATH, + help="the refcounts.dat file to check (default: %(default)s)", + ) + parser.add_argument( + "--abi", nargs="?", default=_STABLE_ABI_PATH_SENTINEL, + help=(f"check against the given stable_abi.toml file " + f"(default: {DEFAULT_STABLE_ABI_TOML_PATH})"), + ) return parser From 658e3322ef707f06d650857edbe55bbffa63df2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:57:09 +0100 Subject: [PATCH 23/31] update some variable names --- Tools/refcounts/lint.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index a6814a29525adc..f27d437c7c7a78 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -26,10 +26,11 @@ MATCH_TODO = re.compile(r"^#\s*TODO:\s*(\w+)$").match -def generate_object_types(universe: Iterable[str]) -> Iterable[str]: +def generate_object_types(object_types: Iterable[str]) -> Iterable[str]: + """Generate the type declarations that expect a reference count.""" for qualifier, object_type, suffix in itertools.product( ("const ", ""), - universe, + object_types, ( "*", "**", "* *", @@ -53,7 +54,7 @@ def generate_object_types(universe: Iterable[str]) -> Iterable[str]: #: - They are ABI-only (either fully deprecated or even removed). #: - They are so internal that they should not be used at all because #: they raise a fatal error. -IGNORE_LIST: frozenset[str] = frozenset(( +STABLE_ABI_IGNORE_LIST: frozenset[str] = frozenset(( # part of the stable ABI but should not be used at all "PyUnicode_GetSize", # part of the stable ABI but completely removed @@ -64,7 +65,7 @@ def generate_object_types(universe: Iterable[str]) -> Iterable[str]: def flno_(lineno: int) -> str: # Format the line so that users can C/C from the terminal # the line number and jump with their editor using Ctrl+G. - return f"{lineno:>5} " + return f"{lineno:>6} " def is_c_parameter_name(name: str) -> bool: @@ -406,7 +407,7 @@ def check_structure(view: FileView, stable_abi_file: str) -> None: stable_abi = tomllib.loads(stable_abi_str) expect = stable_abi["function"].keys() # check if there are missing entries (those marked as "TODO" are ignored) - actual = IGNORE_LIST | view.incomplete | view.signatures.keys() + actual = STABLE_ABI_IGNORE_LIST | view.incomplete | view.signatures.keys() if missing := (expect - actual): print(f"Missing {len(missing)} stable ABI entries:") for name in sorted(missing): From 5660ffea5f5bb784320c3ab2299d57598a729037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:04:00 +0100 Subject: [PATCH 24/31] add TODO messages --- Doc/data/refcounts.dat | 18 +++++------ Tools/refcounts/lint.py | 68 +++++++++++++++++++++++++++++------------ 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 64e8c0cd7a6532..d49fd7540a5162 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1433,22 +1433,22 @@ PyModule_CheckExact:int::: PyModule_CheckExact:PyObject*:p:0: PyModule_Create:PyObject*::+1: -PyModule_Create:PyModuleDef*:def:: +PyModule_Create:PyModuleDef*:def:?: PyModule_Create2:PyObject*::+1: -PyModule_Create2:PyModuleDef*:def:: +PyModule_Create2:PyModuleDef*:def:?: PyModule_Create2:int:module_api_version:: PyModule_ExecDef:int::: PyModule_ExecDef:PyObject*:module:0: -PyModule_ExecDef:PyModuleDef*:def:: +PyModule_ExecDef:PyModuleDef*:def:?: PyModule_FromDefAndSpec:PyObject*::+1: -PyModule_FromDefAndSpec:PyModuleDef*:def:: +PyModule_FromDefAndSpec:PyModuleDef*:def:?: PyModule_FromDefAndSpec:PyObject*:spec:0: PyModule_FromDefAndSpec2:PyObject*::+1: -PyModule_FromDefAndSpec2:PyModuleDef*:def:: +PyModule_FromDefAndSpec2:PyModuleDef*:def:?: PyModule_FromDefAndSpec2:PyObject*:spec:0: PyModule_FromDefAndSpec2:int:module_api_version:: @@ -1484,7 +1484,7 @@ PyModule_SetDocString:PyObject*:module:0: PyModule_SetDocString:const char*:docstring:: PyModuleDef_Init:PyObject*::0: -PyModuleDef_Init:PyModuleDef*:def:0: +PyModuleDef_Init:PyModuleDef*:def:?: PyNumber_Absolute:PyObject*::+1: PyNumber_Absolute:PyObject*:o:0: @@ -2150,13 +2150,13 @@ PySlice_Unpack:Py_ssize_t*:step:: PyState_AddModule:int::: PyState_AddModule:PyObject*:module:+1: -PyState_AddModule:PyModuleDef*:def:: +PyState_AddModule:PyModuleDef*:def:?: PyState_FindModule:PyObject*::0: -PyState_FindModule:PyModuleDef*:def:: +PyState_FindModule:PyModuleDef*:def:?: PyState_RemoveModule:int::: -PyState_RemoveModule:PyModuleDef*:def:: +PyState_RemoveModule:PyModuleDef*:def:?: PyStructSequence_GET_ITEM:PyObject*::0: PyStructSequence_GET_ITEM:PyObject*:p:0: diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index f27d437c7c7a78..6528faf705820e 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -83,6 +83,12 @@ class RefEffect(enum.Enum): Such annotated entities are reported during the checking phase. """ + TODO = "?" + """Indicate a reference count effect to be completed (not yet done). + + Such annotated entities are reported during the checking phase as TODOs. + """ + UNUSED = "" """Indicate that the entity has no reference count.""" @@ -351,50 +357,74 @@ def parse(lines: Iterable[str]) -> FileView: @dataclass(slots=True) -class CheckerWarnings: - """Utility for emitting warnings during the checking phrase.""" +class CheckerReporter: + """Utility for emitting messages during the checking phrase.""" - count: int = 0 + warnings_count: int = 0 """The number of warnings emitted so far.""" + todo_count: int = 0 + """The number of TODOs emitted so far.""" - def block(self, sig: Signature, message: str) -> None: - """Print a warning message for the given signature.""" - self.count += 1 + def _print_block(self, sig: Signature, message: str) -> None: + """Print a message for the given signature.""" print(f"{flno_(sig.lineno)} {sig.name:50} {message}") - def param(self, sig: Signature, param: Param, message: str) -> None: - """Print a warning message for the given formal parameter.""" - self.count += 1 + def _print_param(self, sig: Signature, param: Param, message: str) -> None: + """Print a message for the given formal parameter.""" fullname = f"{sig.name}[{param.name}]" print(f"{flno_(param.lineno)} {fullname:50} {message}") + def warn_block(self, sig: Signature, message: str) -> None: + """Print a warning message for the given signature.""" + self.warnings_count += 1 + self._print_block(sig, message) + + def todo_block(self, sig: Signature, message: str) -> None: + """Print a TODO message for the given signature.""" + self.todo_count += 1 + self._print_block(sig, message) + + def warn_param(self, sig: Signature, param: Param, message: str) -> None: + """Print a warning message for the given formal parameter.""" + self.warnings_count += 1 + self._print_param(sig, param, message) + + def todo_param(self, sig: Signature, param: Param, message: str) -> None: + """Print a TODO message for the given formal parameter.""" + self.todo_count += 1 + self._print_param(sig, param, message) + def check(view: FileView) -> None: - w = CheckerWarnings() + r = CheckerReporter() for sig in view.signatures.values(): # type: Signature # check the return value rparam = sig.rparam if not rparam.ctype: - w.block(sig, "missing return value type") + r.warn_block(sig, "missing return value type") match rparam.effect: - case RefEffect.UNKNOWN: - w.block(sig, "unknown return value type") + case RefEffect.TODO: + r.todo_block(sig, "incomplete reference count effect") case RefEffect.STEAL: - w.block(sig, "stolen reference on return value") + r.warn_block(sig, "stolen reference on return value") + case RefEffect.UNKNOWN: + r.warn_block(sig, "unknown return value type") # check the parameters for param in sig.params.values(): # type: Param ctype, effect = param.ctype, param.effect + if effect is RefEffect.TODO: + r.todo_param(sig, param, "incomplete reference count effect") if ctype in OBJECT_TYPES and effect is RefEffect.UNUSED: - w.param(sig, param, "missing reference count management") + r.warn_param(sig, param, "missing reference count effect") if ctype not in OBJECT_TYPES and effect is not RefEffect.UNUSED: - w.param(sig, param, "unused reference count management") + r.warn_param(sig, param, "unused reference count effect") if not is_c_parameter_name(param.name): - w.param(sig, param, "invalid parameter name") + r.warn_param(sig, param, "invalid parameter name") - if w.count: + if r.count: print() - print(f"Found {w.count} issue(s)") + print(f"Found {r.count} issue(s)") names = view.signatures.keys() if sorted(names) != list(names): print("Entries are not sorted") From afceff062ab933243501bca96ac82722ab3208ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:23:59 +0100 Subject: [PATCH 25/31] RefEffect -> Effect (to shorten lines) --- Tools/refcounts/lint.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 6528faf705820e..95f283783dd641 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -74,7 +74,7 @@ def is_c_parameter_name(name: str) -> bool: return name == C_ELLIPSIS or name.isidentifier() -class RefEffect(enum.Enum): +class Effect(enum.Enum): """The reference count effect of a parameter or a return value.""" UNKNOWN = enum.auto() @@ -128,7 +128,7 @@ class LineInfo: name: str | None """The cleaned parameter name or None for the return parameter.""" - effect: RefEffect + effect: Effect """The reference count effect.""" comment: str @@ -160,7 +160,7 @@ class Return: A missing type is reported during the checking phase. """ - effect: RefEffect + effect: Effect """The reference count effect. A nonsensical reference count effect is reported during the checking phase. @@ -186,7 +186,7 @@ class Param: A missing type is reported during the checking phase. """ - effect: RefEffect + effect: Effect """The reference count effect. A nonsensical reference count effect is reported during the checking phase. @@ -250,9 +250,9 @@ def parse_line(line: str) -> LineInfo | None: clean_effect = raw_effect.strip() strip_effect = clean_effect != raw_effect try: - effect = RefEffect(clean_effect) + effect = Effect(clean_effect) except ValueError: - effect = RefEffect.UNKNOWN + effect = Effect.UNKNOWN comment = comment.strip() return LineInfo( @@ -404,20 +404,20 @@ def check(view: FileView) -> None: if not rparam.ctype: r.warn_block(sig, "missing return value type") match rparam.effect: - case RefEffect.TODO: + case Effect.TODO: r.todo_block(sig, "incomplete reference count effect") - case RefEffect.STEAL: + case Effect.STEAL: r.warn_block(sig, "stolen reference on return value") - case RefEffect.UNKNOWN: + case Effect.UNKNOWN: r.warn_block(sig, "unknown return value type") # check the parameters for param in sig.params.values(): # type: Param ctype, effect = param.ctype, param.effect - if effect is RefEffect.TODO: + if effect is Effect.TODO: r.todo_param(sig, param, "incomplete reference count effect") - if ctype in OBJECT_TYPES and effect is RefEffect.UNUSED: + if ctype in OBJECT_TYPES and effect is Effect.UNUSED: r.warn_param(sig, param, "missing reference count effect") - if ctype not in OBJECT_TYPES and effect is not RefEffect.UNUSED: + if ctype not in OBJECT_TYPES and effect is not Effect.UNUSED: r.warn_param(sig, param, "unused reference count effect") if not is_c_parameter_name(param.name): r.warn_param(sig, param, "invalid parameter name") From d173d7a0545166c951b04a42b07a3520db364d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:24:49 +0100 Subject: [PATCH 26/31] extract checking logic into smaller functions --- Tools/refcounts/lint.py | 50 +++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 95f283783dd641..5294e710e418ca 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -395,32 +395,38 @@ def todo_param(self, sig: Signature, param: Param, message: str) -> None: self._print_param(sig, param, message) -def check(view: FileView) -> None: +def check_signature(r: CheckerReporter, sig: Signature) -> None: + rparam = sig.rparam + if not rparam.ctype: + r.warn_block(sig, "missing return value type") + match rparam.effect: + case Effect.TODO: + r.todo_block(sig, "incomplete reference count effect") + case Effect.STEAL: + r.warn_block(sig, "stolen reference on return value") + case Effect.UNKNOWN: + r.warn_block(sig, "unknown return value type") + + +def check_parameter(r: CheckerReporter, sig: Signature, param: Param) -> None: + ctype, effect = param.ctype, param.effect + if effect is Effect.TODO: + r.todo_param(sig, param, "incomplete reference count effect") + if ctype in OBJECT_TYPES and effect is Effect.UNUSED: + r.warn_param(sig, param, "missing reference count effect") + if ctype not in OBJECT_TYPES and effect is not Effect.UNUSED: + r.warn_param(sig, param, "unused reference count effect") + if not is_c_parameter_name(param.name): + r.warn_param(sig, param, "invalid parameter name") + + +def check(view: FileView) -> int: r = CheckerReporter() for sig in view.signatures.values(): # type: Signature - # check the return value - rparam = sig.rparam - if not rparam.ctype: - r.warn_block(sig, "missing return value type") - match rparam.effect: - case Effect.TODO: - r.todo_block(sig, "incomplete reference count effect") - case Effect.STEAL: - r.warn_block(sig, "stolen reference on return value") - case Effect.UNKNOWN: - r.warn_block(sig, "unknown return value type") - # check the parameters + check_signature(r, sig) for param in sig.params.values(): # type: Param - ctype, effect = param.ctype, param.effect - if effect is Effect.TODO: - r.todo_param(sig, param, "incomplete reference count effect") - if ctype in OBJECT_TYPES and effect is Effect.UNUSED: - r.warn_param(sig, param, "missing reference count effect") - if ctype not in OBJECT_TYPES and effect is not Effect.UNUSED: - r.warn_param(sig, param, "unused reference count effect") - if not is_c_parameter_name(param.name): - r.warn_param(sig, param, "invalid parameter name") + check_parameter(r, sig, param) if r.count: print() From 0edd4892d6815d12e0d304d07b889c7d2dba5e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:28:20 +0100 Subject: [PATCH 27/31] add --strict errors mode --- Tools/refcounts/lint.py | 42 +++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 5294e710e418ca..0f180f1600f8a2 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterable, Mapping - from typing import Final, LiteralString + from typing import Final, LiteralString, NoReturn ROOT = Path(__file__).parent.parent.parent.resolve() DEFAULT_REFCOUNT_DAT_PATH: Final[str] = str(ROOT / "Doc/data/refcounts.dat") @@ -226,6 +226,9 @@ class FileView: incomplete: frozenset[str] """A set of function names that are yet to be documented.""" + has_errors: bool + """Indicate whether errors were found during the parsing.""" + def parse_line(line: str) -> LineInfo | None: parts = line.split(":", maxsplit=4) @@ -349,11 +352,11 @@ def parse(lines: Iterable[str]) -> FileView: continue params[name] = Param(name, ctype, effect, comment, lineno=lineno) - if r.issues_count: + if has_errors := (r.issues_count > 0): print() print(f"Found {r.issues_count} issue(s)") - return FileView(signatures, frozenset(incomplete)) + return FileView(signatures, frozenset(incomplete), has_errors) @dataclass(slots=True) @@ -420,7 +423,7 @@ def check_parameter(r: CheckerReporter, sig: Signature, param: Param) -> None: r.warn_param(sig, param, "invalid parameter name") -def check(view: FileView) -> int: +def check(view: FileView, *, strict: bool = False) -> int: r = CheckerReporter() for sig in view.signatures.values(): # type: Signature @@ -428,15 +431,23 @@ def check(view: FileView) -> int: for param in sig.params.values(): # type: Param check_parameter(r, sig, param) - if r.count: + exit_code = 0 + if r.warnings_count: + print() + print(f"Found {r.warnings_count} issue(s)") + exit_code = 1 + if r.todo_count: print() - print(f"Found {r.count} issue(s)") + print(f"Found {r.todo_count} todo(s)") + exit_code = 1 if exit_code or strict else 0 names = view.signatures.keys() if sorted(names) != list(names): print("Entries are not sorted") + exit_code = 1 if exit_code or strict else 0 + return exit_code -def check_structure(view: FileView, stable_abi_file: str) -> None: +def check_structure(view: FileView, stable_abi_file: str) -> int: print(f"Stable ABI file: {stable_abi_file}") print() stable_abi_str = Path(stable_abi_file).read_text(encoding="utf-8") @@ -448,6 +459,8 @@ def check_structure(view: FileView, stable_abi_file: str) -> None: print(f"Missing {len(missing)} stable ABI entries:") for name in sorted(missing): print(name) + return 1 + return 0 _STABLE_ABI_PATH_SENTINEL: Final = object() @@ -470,21 +483,30 @@ def create_parser() -> ArgumentParser: help=(f"check against the given stable_abi.toml file " f"(default: {DEFAULT_STABLE_ABI_TOML_PATH})"), ) + parser.add_argument( + "--strict", action="store_true", + help="treat warnings and TODOs as errors", + ) + return parser -def main() -> None: +def main() -> NoReturn: parser = create_parser() args = parser.parse_args() lines = Path(args.file).read_text(encoding="utf-8").splitlines() print(" PARSING ".center(80, "-")) view = parse(lines) + exit_code = 1 if view.has_errors else 0 print(" CHECKING ".center(80, "-")) - check(view) + exit_code |= check(view, strict=args.strict) if args.abi is not _STABLE_ABI_PATH_SENTINEL: abi = args.abi or DEFAULT_STABLE_ABI_TOML_PATH print(" CHECKING STABLE ABI ".center(80, "-")) - check_structure(view, abi) + check_abi_exit_code = check_structure(view, abi) + if args.strict: + exit_code = exit_code | check_abi_exit_code + raise SystemExit(exit_code) if __name__ == "__main__": From a3becf093e7bdbd5bb05ca9c9d821d4991da0cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:36:40 +0100 Subject: [PATCH 28/31] additional stealing effects --- Tools/refcounts/lint.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 0f180f1600f8a2..01cdc9937d9cb9 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -110,6 +110,18 @@ class Effect(enum.Enum): Parameters annotated with this effect steal a reference. """ + STEAL_AND_DECREF = "$-1" + """Indicate that the reference count is decremented. + + Parameters annotated with this effect steal a reference. + """ + + STEAL_AND_INCREF = "$+1" + """Indicate that the reference count is incremented. + + Parameters annotated with this effect steal a reference. + """ + NULL = "null" # for return values only """Only used for a NULL return value. @@ -405,7 +417,7 @@ def check_signature(r: CheckerReporter, sig: Signature) -> None: match rparam.effect: case Effect.TODO: r.todo_block(sig, "incomplete reference count effect") - case Effect.STEAL: + case Effect.STEAL | Effect.STEAL_AND_DECREF | Effect.STEAL_AND_INCREF: r.warn_block(sig, "stolen reference on return value") case Effect.UNKNOWN: r.warn_block(sig, "unknown return value type") From baf24747778fb31b421cd6a42e86cfaa1a4631ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:42:16 +0100 Subject: [PATCH 29/31] address Hugo's review --- .github/workflows/mypy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index b420f8b34bd3e3..ddfc5bfeb45ce9 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -15,8 +15,8 @@ on: - "Tools/clinic/**" - "Tools/jit/**" - "Tools/peg_generator/**" - - "Tools/requirements-dev.txt" - "Tools/refcounts/lint.py" + - "Tools/requirements-dev.txt" - "Tools/wasm/**" workflow_dispatch: From 549ba49f09b8e54ad9ffd2dec53c2b5adf8597ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:43:52 +0100 Subject: [PATCH 30/31] remove TODO symbols for now (we don't want yet to change the Sphinx extension) --- Doc/data/refcounts.dat | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 7d8cc0adbc2df0..e16985fe2d2d76 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1435,22 +1435,22 @@ PyModule_CheckExact:int::: PyModule_CheckExact:PyObject*:p:0: PyModule_Create:PyObject*::+1: -PyModule_Create:PyModuleDef*:def:?: +PyModule_Create:PyModuleDef*:def:: PyModule_Create2:PyObject*::+1: -PyModule_Create2:PyModuleDef*:def:?: +PyModule_Create2:PyModuleDef*:def:: PyModule_Create2:int:module_api_version:: PyModule_ExecDef:int::: PyModule_ExecDef:PyObject*:module:0: -PyModule_ExecDef:PyModuleDef*:def:?: +PyModule_ExecDef:PyModuleDef*:def:: PyModule_FromDefAndSpec:PyObject*::+1: -PyModule_FromDefAndSpec:PyModuleDef*:def:?: +PyModule_FromDefAndSpec:PyModuleDef*:def:: PyModule_FromDefAndSpec:PyObject*:spec:0: PyModule_FromDefAndSpec2:PyObject*::+1: -PyModule_FromDefAndSpec2:PyModuleDef*:def:?: +PyModule_FromDefAndSpec2:PyModuleDef*:def:: PyModule_FromDefAndSpec2:PyObject*:spec:0: PyModule_FromDefAndSpec2:int:module_api_version:: @@ -1486,7 +1486,7 @@ PyModule_SetDocString:PyObject*:module:0: PyModule_SetDocString:const char*:docstring:: PyModuleDef_Init:PyObject*::0: -PyModuleDef_Init:PyModuleDef*:def:?: +PyModuleDef_Init:PyModuleDef*:def:: PyNumber_Absolute:PyObject*::+1: PyNumber_Absolute:PyObject*:o:0: @@ -2152,13 +2152,13 @@ PySlice_Unpack:Py_ssize_t*:step:: PyState_AddModule:int::: PyState_AddModule:PyObject*:module:+1: -PyState_AddModule:PyModuleDef*:def:?: +PyState_AddModule:PyModuleDef*:def:: PyState_FindModule:PyObject*::0: -PyState_FindModule:PyModuleDef*:def:?: +PyState_FindModule:PyModuleDef*:def:: PyState_RemoveModule:int::: -PyState_RemoveModule:PyModuleDef*:def:?: +PyState_RemoveModule:PyModuleDef*:def:: PyStructSequence_GET_ITEM:PyObject*::0: PyStructSequence_GET_ITEM:PyObject*:p:0: From c708a4c4fbc68937bbc5262ac6ce03297bd8ba4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:46:41 +0100 Subject: [PATCH 31/31] address Alex's review --- Tools/refcounts/lint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/refcounts/lint.py b/Tools/refcounts/lint.py index 01cdc9937d9cb9..9376e1bfd14993 100644 --- a/Tools/refcounts/lint.py +++ b/Tools/refcounts/lint.py @@ -2,13 +2,13 @@ from __future__ import annotations -import enum import itertools import re import sys import tomllib from argparse import ArgumentParser, RawDescriptionHelpFormatter from dataclasses import dataclass, field +from enum import Enum from pathlib import Path from typing import TYPE_CHECKING @@ -74,10 +74,10 @@ def is_c_parameter_name(name: str) -> bool: return name == C_ELLIPSIS or name.isidentifier() -class Effect(enum.Enum): +class Effect(Enum): """The reference count effect of a parameter or a return value.""" - UNKNOWN = enum.auto() + UNKNOWN = None """Indicate an unparsable reference count effect. Such annotated entities are reported during the checking phase.