Skip to content

Commit

Permalink
Add tool for linting Doc/data/refcounts.dat
Browse files Browse the repository at this point in the history
  • Loading branch information
picnixz committed Dec 1, 2024
1 parent 3afb639 commit bc45547
Show file tree
Hide file tree
Showing 3 changed files with 259 additions and 0 deletions.
15 changes: 15 additions & 0 deletions Tools/refcounts/.ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
target-version = "py312"
line-length = 80
fix = true

[lint]
select = [
"ALL"
]
ignore = [
"D", # docstrings
"I001", # split imports
"Q00", # prefer double quotes over single quotes
"T201", # print() found
"PLR2004", # magic values
]
234 changes: 234 additions & 0 deletions Tools/refcounts/lint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
"""Lint Doc/data/refcounts.dat."""

from __future__ import annotations

import itertools
import re
import tomllib
from argparse import ArgumentParser
from dataclasses import dataclass, field
from enum import auto as _auto, Enum
from pathlib import Path
from typing import TYPE_CHECKING, LiteralString, NamedTuple

if TYPE_CHECKING:
from collections.abc import Callable, Iterable, Mapping

C_ELLIPSIS: LiteralString = '...'

MATCH_TODO: Callable[[str], re.Match | None]
MATCH_TODO = re.compile(r'^#\s*TODO:\s*(\w+)$').match

OBJECT_TYPES: frozenset[str] = frozenset()

for qualifier, object_type, suffix in itertools.product(
('const ', ''),
(
'PyObject',
'PyLongObject', 'PyTypeObject',
'PyCodeObject', 'PyFrameObject',
'PyModuleObject', 'PyVarObject',
),
('*', '**', '* const *', '* const*'),
):
OBJECT_TYPES |= {
f'{qualifier}{object_type}{suffix}',
f'{qualifier}{object_type} {suffix}',
}
del suffix, object_type, qualifier

IGNORE_LIST: frozenset[str] = frozenset((
# part of the stable ABI but should not be used at all
'PyUnicode_GetSize',
# part of the stable ABI but completely removed
'_PyState_AddModule',
))

def flno_(lineno: int) -> str:
# Format the line so that users can C/C from the terminal
# the line number and jump with their editor using Ctrl+G.
return f'{lineno:>5} '

class RefType(Enum):
UNKNOWN = _auto()
UNUSED = _auto()
DECREF = _auto()
BORROW = _auto()
INCREF = _auto()
STEALS = _auto()
NULL = _auto() # for return values only

class LineInfo(NamedTuple):
func: str
ctype: str | None
name: str | None
reftype: RefType | None
comment: str

@dataclass(slots=True)
class Return:
ctype: str | None
reftype: RefType | None
comment: str

@dataclass(slots=True)
class Param:
name: str
lineno: int

ctype: str | None
reftype: RefType | None
comment: str

@dataclass(slots=True)
class Signature:
name: str
lineno: int
rparam: Return
params: dict[str, Param] = field(default_factory=dict)

class FileView(NamedTuple):
signatures: Mapping[str, Signature]
incomplete: frozenset[str]

def parse_line(line: str) -> LineInfo | None:
parts = line.split(':', maxsplit=4)
if len(parts) != 5:
return None

func, raw_ctype, raw_name, raw_reftype, comment = parts
if not func:
return None

ctype = raw_ctype.strip() or None
name = raw_name.strip() or None

raw_reftype = raw_reftype.strip()
if raw_reftype == '-1':
reftype = RefType.DECREF
elif raw_reftype == '0':
reftype = RefType.BORROW
elif raw_reftype in {'+1', '1'}:
reftype = RefType.INCREF
elif raw_reftype == '$':
reftype = RefType.STEALS
elif raw_reftype.lower() == 'null':
reftype = RefType.NULL
elif not raw_reftype:
reftype = RefType.UNUSED
else:
reftype = RefType.UNKNOWN

comment = comment.strip()
return LineInfo(func, ctype, name, reftype, comment)

def parse(lines: Iterable[str]) -> FileView:
signatures: dict[str, Signature] = {}
incomplete: set[str] = set()

for lineno, line in enumerate(map(str.strip, lines), 1):
if not line:
continue
if line.startswith('#'):
if match := MATCH_TODO(line):
incomplete.add(match.group(1))
continue

info = parse_line(line)
if info is None:
print(f"{flno_(lineno)} cannot parse: {line}")
continue

func, ctype, name, reftype, comment = info

if func not in signatures:
# process return value
if name is not None:
print(f'{flno_(lineno)} named return value in {line!r}')
ret_param = Return(ctype, reftype, comment)
signatures[func] = Signature(func, lineno, ret_param)
else:
# process parameter
if name is None:
print(f'{flno_(lineno)} missing parameter name in {line!r}')
continue
sig: Signature = signatures[func]
if name in sig.params:
print(f'{flno_(lineno)} duplicated parameter name in {line!r}')
continue
sig.params[name] = Param(name, lineno, ctype, reftype, comment)

return FileView(signatures, frozenset(incomplete))

class Warnings:
def __init__(self) -> None:
self.count = 0

def block(self, sig: Signature, message: str) -> None:
self.count += 1
print(f'{flno_(sig.lineno)} {sig.name:50} {message}')

def param(self, sig: Signature, param: Param, message: str) -> None:
self.count += 1
fullname = f'{sig.name}[{param.name}]'
print(f'{flno_(param.lineno)} {fullname:50} {message}')

def check(view: FileView) -> None:
w = Warnings()

for sig in view.signatures.values(): # type: Signature
# check the return value
rparam = sig.rparam
if not rparam.ctype:
w.block(sig, "missing return value type")
if rparam.reftype is RefType.UNKNOWN:
w.block(sig, "unknown return value type")
# check the parameters
for name, param in sig.params.items(): # type: (str, Param)
ctype, reftype = param.ctype, param.reftype
if ctype in OBJECT_TYPES and reftype is RefType.UNUSED:
w.param(sig, param, "missing reference count management")
if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED:
w.param(sig, param, "unused reference count management")
if name != C_ELLIPSIS and not name.isidentifier():
# Python accepts the same identifiers as in C
w.param(sig, param, "invalid parameter name")

print()
if w.count:
print(f"Found {w.count} issues")
names = view.signatures.keys()
if sorted(names) != list(names):
print("Entries are not sorted")

def check_structure(view: FileView, stable_abi_file: str) -> None:
stable_abi_str = Path(stable_abi_file).read_text()
stable_abi = tomllib.loads(stable_abi_str)
expect = stable_abi['function'].keys()
# check if there are missing entries (those marked as "TODO" are ignored)
actual = IGNORE_LIST | view.incomplete | view.signatures.keys()
if missing := (expect - actual):
print('[!] missing stable ABI entries:')
for name in sorted(missing):
print(name)

def _create_parser() -> ArgumentParser:
parser = ArgumentParser(prog='lint.py')
parser.add_argument('file', help="the file to check")
parser.add_argument('--stable-abi', help="the stable ABI TOML file to use")
return parser

def main() -> None:
parser = _create_parser()
args = parser.parse_args()
lines = Path(args.file).read_text().splitlines()
print(" PARSING ".center(80, '-'))
view = parse(lines)
print(" CHECKING ".center(80, '-'))
check(view)
if args.stable_abi:
print(" CHECKING STABLE ABI ".center(80, '-'))
check_structure(view, args.stable_abi)

if __name__ == "__main__":
main()
10 changes: 10 additions & 0 deletions Tools/refcounts/mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[mypy]
files = Tools/refcounts/lint.py
pretty = True
show_traceback = True
python_version = 3.12

strict = True
warn_unreachable = True
enable_error_code = all
warn_return_any = False

0 comments on commit bc45547

Please sign in to comment.