From 2586ee6003ea0b5822838bb2072bad9289b130d3 Mon Sep 17 00:00:00 2001 From: Jendrik Seipp Date: Thu, 31 Aug 2023 09:58:17 +0200 Subject: [PATCH] Use end_lineno AST attribute to obtain more accurate line counts. --- CHANGELOG.md | 3 ++- TODO.md | 2 -- tests/test_size.py | 21 ++------------- vulture/core.py | 3 +-- vulture/lines.py | 65 +--------------------------------------------- 5 files changed, 6 insertions(+), 88 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf343b32..69b5b14c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 2.10 (unreleased) -* Drop support for Python 3.7 (Jendrik Seipp). +* Drop support for Python 3.7 (Jendrik Seipp, #323). +* Use `end_lineno` AST attribute to obtain more accurate line counts (Jendrik Seipp). # 2.9.1 (2023-08-21) diff --git a/TODO.md b/TODO.md index 8a09d33d..735bf926 100644 --- a/TODO.md +++ b/TODO.md @@ -7,8 +7,6 @@ * https://github.com/jendrikseipp/vulture/issues/249 * https://github.com/jendrikseipp/vulture/issues/315 Use these as test cases. -* Use end_lineno and end_col_offset attributes once we require Python - 3.8+. * Honor (speaking) pylint error codes (e.g., # pylint: disable=unused-import): unused-import, unused-variable, unused-argument, possibly-unused-variable and unreachable-code. See diff --git a/tests/test_size.py b/tests/test_size.py index 478fb3d2..f09a7c85 100644 --- a/tests/test_size.py +++ b/tests/test_size.py @@ -3,26 +3,9 @@ from vulture import lines -def get_last_line_number_slow(node): - """ - A slower, but also simpler and slightly more accurate version of - get_last_line_number(). - - It traverses all descendants of node and records the highest line - number seen. - - This code is 1.6 times slower than count_lines() on the Python - subset of tensorflow. It reports the same sizes for all test cases - and the functions and classes in tensorflow. - - """ - return max(getattr(node, "lineno", -1) for node in ast.walk(node)) - - def count_lines(node): """Estimate the number of lines of the given AST node.""" last_lineno = lines.get_last_line_number(node) - assert get_last_line_number_slow(node) == last_lineno return last_lineno - lines.get_first_line_number(node) + 1 @@ -209,7 +192,7 @@ def foo(): 'long' 'string') """ - check_size(example, 4) + check_size(example, 6) # We currently cannot handle code ending with comment lines. @@ -302,7 +285,7 @@ class Foo: [a, b ] """ - check_size(example, 2) + check_size(example, 3) def test_size_ellipsis(): diff --git a/vulture/core.py b/vulture/core.py index 06559112..9278f0ee 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -477,14 +477,13 @@ def ignored(lineno): if ignored(first_lineno): self._log(f'Ignoring {typ} "{name}"') else: - last_lineno = lines.get_last_line_number(last_node) collection.append( Item( name, typ, self.filename, first_lineno, - last_lineno, + lines.get_last_line_number(last_node), message=message, confidence=confidence, ) diff --git a/vulture/lines.py b/vulture/lines.py index 82303daf..72e7e214 100644 --- a/vulture/lines.py +++ b/vulture/lines.py @@ -1,68 +1,5 @@ -import ast - - -def _get_last_child_with_lineno(node): - """ - Return the last direct child of `node` that has a lineno attribute, - or None if `node` has no such children. - - Almost all node._field lists are sorted by the order in which they - appear in source code. For some nodes however, we have to skip some - fields that either don't have line numbers (e.g., "ctx" and "names") - or that are in the wrong position (e.g., "decorator_list" and - "returns"). Then we choose the first field (i.e., the field with the - highest line number) that actually contains a node. If it contains a - list of nodes, we return the last one. - - """ - ignored_fields = {"ctx", "decorator_list", "names", "returns"} - fields = node._fields - # The fields of ast.Call are in the wrong order. - if isinstance(node, ast.Call): - fields = ("func", "args", "starargs", "keywords", "kwargs") - for name in reversed(fields): - if name in ignored_fields: - continue - - try: - last_field = getattr(node, name) - except AttributeError: - continue - - # Ignore non-AST objects like "is_async", "level" and "nl". - if isinstance(last_field, ast.AST): - return last_field - elif isinstance(last_field, list) and last_field: - return last_field[-1] - return None - - def get_last_line_number(node): - """Estimate last line number of the given AST node. - - The estimate is based on the line number of the last descendant of - `node` that has a lineno attribute. Therefore, it underestimates the - size of code ending with, e.g., multiline strings and comments. - - When traversing the tree, we may see a mix of nodes with line - numbers and nodes without line numbers. We therefore, store the - maximum line number seen so far and report it at the end. A more - accurate (but also slower to compute) estimate would traverse all - children, instead of just the last one, since choosing the last one - may lead to a path that ends with a node without line number. - - """ - max_lineno = node.lineno - while True: - last_child = _get_last_child_with_lineno(node) - if last_child is None: - return max_lineno - else: - try: - max_lineno = max(max_lineno, last_child.lineno) - except AttributeError: - pass - node = last_child + return node.end_lineno def get_first_line_number(node):