From d958d9f4a1b71c6d30960bf6c53c41046ea94590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Kul=C3=ADk?= Date: Thu, 5 Dec 2024 19:43:19 +0100 Subject: [PATCH 01/14] GH-126727: Fix test_era_nl_langinfo with Japanese ERAs on Solaris (GH-127327) Fix test_era_nl_langinfo with Japanese ERAs on Solaris --- Lib/test/test__locale.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 2c751033ebb3e2..cef84fd9580c37 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -102,6 +102,11 @@ def accept(loc): # ps_AF doesn't work on Windows: see bpo-38324 (msg361830) del known_numerics['ps_AF'] +if sys.platform == 'sunos5': + # On Solaris, Japanese ERAs start with the year 1927, + # and thus there's less of them. + known_era['ja_JP'] = (5, '+:1:2019/05/01:2019/12/31:令和:%EC元年') + class _LocaleTests(unittest.TestCase): def setUp(self): From 23f2e8f13c4e4a34106cf96fad9329cbfbf8844d Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 5 Dec 2024 21:10:46 +0200 Subject: [PATCH 02/14] gh-127221: Add colour to unittest output (#127223) Co-authored-by: Kirill Podoprigora --- Doc/conf.py | 7 ++ Doc/library/doctest.rst | 4 + Doc/library/traceback.rst | 4 + Doc/library/unittest.rst | 4 +- Doc/using/cmdline.rst | 8 -- Doc/whatsnew/3.13.rst | 9 -- Doc/whatsnew/3.14.rst | 7 ++ Lib/test/test_unittest/test_async_case.py | 2 + Lib/test/test_unittest/test_program.py | 6 + Lib/test/test_unittest/test_result.py | 16 ++- Lib/test/test_unittest/test_runner.py | 13 +++ Lib/test/test_unittest/test_skipping.py | 3 + Lib/unittest/result.py | 4 +- Lib/unittest/runner.py | 108 +++++++++++------- ...-11-23-00-17-29.gh-issue-127221.OSXdFE.rst | 1 + 15 files changed, 136 insertions(+), 60 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst diff --git a/Doc/conf.py b/Doc/conf.py index 738c9901eef06f..9cde394cbaed69 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -78,6 +78,13 @@ .. |python_version_literal| replace:: ``Python {version}`` .. |python_x_dot_y_literal| replace:: ``python{version}`` .. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` + +.. Apparently this how you hack together a formatted link: + (https://www.docutils.org/docs/ref/rst/directives.html#replacement-text) +.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` +.. _FORCE_COLOR: https://force-color.org/ +.. |NO_COLOR| replace:: ``NO_COLOR`` +.. _NO_COLOR: https://no-color.org/ """ # There are two options for replacing |today|. Either, you set today to some diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index 6b0282eed49566..106b0a6c95b7be 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -136,6 +136,10 @@ examples of doctests in the standard Python test suite and libraries. Especially useful examples can be found in the standard test file :file:`Lib/test/test_doctest/test_doctest.py`. +.. versionadded:: 3.13 + Output is colorized by default and can be + :ref:`controlled using environment variables `. + .. _doctest-simple-testmod: diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 8f94fc448f2482..4899ed64ebad8d 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -44,6 +44,10 @@ The module's API can be divided into two parts: necessary for later formatting without holding references to actual exception and traceback objects. +.. versionadded:: 3.13 + Output is colorized by default and can be + :ref:`controlled using environment variables `. + Module-Level Functions ---------------------- diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 38bad9405597dd..7f8b710f611002 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -46,7 +46,6 @@ test runner a textual interface, or return a special value to indicate the results of executing the tests. - .. seealso:: Module :mod:`doctest` @@ -198,6 +197,9 @@ For a list of all the command-line options:: In earlier versions it was only possible to run individual test methods and not modules or classes. +.. versionadded:: 3.14 + Output is colorized by default and can be + :ref:`controlled using environment variables `. Command-line options ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 6cf42b27718022..7db2f4820f346a 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -663,14 +663,6 @@ output. To control the color output only in the Python interpreter, the precedence over ``NO_COLOR``, which in turn takes precedence over ``FORCE_COLOR``. -.. Apparently this how you hack together a formatted link: - -.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` -.. _FORCE_COLOR: https://force-color.org/ - -.. |NO_COLOR| replace:: ``NO_COLOR`` -.. _NO_COLOR: https://no-color.org/ - Options you shouldn't use ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 664b1866172378..9f6d98b9950d19 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -252,15 +252,6 @@ Improved error messages the canonical |NO_COLOR|_ and |FORCE_COLOR|_ environment variables. (Contributed by Pablo Galindo Salgado in :gh:`112730`.) -.. Apparently this how you hack together a formatted link: - (https://www.docutils.org/docs/ref/rst/directives.html#replacement-text) - -.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` -.. _FORCE_COLOR: https://force-color.org/ - -.. |NO_COLOR| replace:: ``NO_COLOR`` -.. _NO_COLOR: https://no-color.org/ - * A common mistake is to write a script with the same name as a standard library module. When this results in errors, we now display a more helpful error message: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index e83c509a025ab5..db25c037e509b6 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -616,6 +616,13 @@ unicodedata unittest -------- +* :mod:`unittest` output is now colored by default. + This can be controlled via the :envvar:`PYTHON_COLORS` environment + variable as well as the canonical |NO_COLOR|_ + and |FORCE_COLOR|_ environment variables. + See also :ref:`using-on-controlling-color`. + (Contributed by Hugo van Kemenade in :gh:`127221`.) + * unittest discovery supports :term:`namespace package` as start directory again. It was removed in Python 3.11. (Contributed by Jacob Walls in :gh:`80958`.) diff --git a/Lib/test/test_unittest/test_async_case.py b/Lib/test/test_unittest/test_async_case.py index 00ef55bdf9bc83..8ea244bff05c5f 100644 --- a/Lib/test/test_unittest/test_async_case.py +++ b/Lib/test/test_unittest/test_async_case.py @@ -2,6 +2,7 @@ import contextvars import unittest from test import support +from test.support import force_not_colorized support.requires_working_socket(module=True) @@ -252,6 +253,7 @@ async def on_cleanup(self): test.doCleanups() self.assertEqual(events, ['asyncSetUp', 'test', 'asyncTearDown', 'cleanup']) + @force_not_colorized def test_exception_in_tear_clean_up(self): class Test(unittest.IsolatedAsyncioTestCase): async def asyncSetUp(self): diff --git a/Lib/test/test_unittest/test_program.py b/Lib/test/test_unittest/test_program.py index 7241cf59f73d4f..0b46f338ac77e1 100644 --- a/Lib/test/test_unittest/test_program.py +++ b/Lib/test/test_unittest/test_program.py @@ -4,6 +4,7 @@ from test import support import unittest import test.test_unittest +from test.support import force_not_colorized from test.test_unittest.test_result import BufferedWriter @@ -120,6 +121,7 @@ def run(self, test): self.assertEqual(['test.test_unittest', 'test.test_unittest2'], program.testNames) + @force_not_colorized def test_NonExit(self): stream = BufferedWriter() program = unittest.main(exit=False, @@ -135,6 +137,7 @@ def test_NonExit(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_Exit(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: @@ -152,6 +155,7 @@ def test_Exit(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitAsDefault(self): stream = BufferedWriter() with self.assertRaises(SystemExit): @@ -167,6 +171,7 @@ def test_ExitAsDefault(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitSkippedSuite(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: @@ -179,6 +184,7 @@ def test_ExitSkippedSuite(self): expected = '\n\nOK (skipped=1)\n' self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitEmptySuite(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: diff --git a/Lib/test/test_unittest/test_result.py b/Lib/test/test_unittest/test_result.py index 4e5ec54e9c892a..746b9fa2677717 100644 --- a/Lib/test/test_unittest/test_result.py +++ b/Lib/test/test_unittest/test_result.py @@ -7,6 +7,7 @@ import traceback import unittest from unittest.util import strclass +from test.support import force_not_colorized from test.test_unittest.support import BufferedWriter @@ -14,7 +15,7 @@ class MockTraceback(object): class TracebackException: def __init__(self, *args, **kwargs): self.capture_locals = kwargs.get('capture_locals', False) - def format(self): + def format(self, **kwargs): result = ['A traceback'] if self.capture_locals: result.append('locals') @@ -205,6 +206,7 @@ def test_1(self): self.assertIs(test_case, test) self.assertIsInstance(formatted_exc, str) + @force_not_colorized def test_addFailure_filter_traceback_frames(self): class Foo(unittest.TestCase): def test_1(self): @@ -231,6 +233,7 @@ def get_exc_info(): self.assertEqual(len(dropped), 1) self.assertIn("raise self.failureException(msg)", dropped[0]) + @force_not_colorized def test_addFailure_filter_traceback_frames_context(self): class Foo(unittest.TestCase): def test_1(self): @@ -260,6 +263,7 @@ def get_exc_info(): self.assertEqual(len(dropped), 1) self.assertIn("raise self.failureException(msg)", dropped[0]) + @force_not_colorized def test_addFailure_filter_traceback_frames_chained_exception_self_loop(self): class Foo(unittest.TestCase): def test_1(self): @@ -285,6 +289,7 @@ def get_exc_info(): formatted_exc = result.failures[0][1] self.assertEqual(formatted_exc.count("Exception: Loop\n"), 1) + @force_not_colorized def test_addFailure_filter_traceback_frames_chained_exception_cycle(self): class Foo(unittest.TestCase): def test_1(self): @@ -446,6 +451,7 @@ def testFailFast(self): result.addUnexpectedSuccess(None) self.assertTrue(result.shouldStop) + @force_not_colorized def testFailFastSetByRunner(self): stream = BufferedWriter() runner = unittest.TextTestRunner(stream=stream, failfast=True) @@ -619,6 +625,7 @@ def _run_test(self, test_name, verbosity, tearDownError=None): test.run(result) return stream.getvalue() + @force_not_colorized def testDotsOutput(self): self.assertEqual(self._run_test('testSuccess', 1), '.') self.assertEqual(self._run_test('testSkip', 1), 's') @@ -627,6 +634,7 @@ def testDotsOutput(self): self.assertEqual(self._run_test('testExpectedFailure', 1), 'x') self.assertEqual(self._run_test('testUnexpectedSuccess', 1), 'u') + @force_not_colorized def testLongOutput(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSuccess', 2), @@ -642,17 +650,21 @@ def testLongOutput(self): self.assertEqual(self._run_test('testUnexpectedSuccess', 2), f'testUnexpectedSuccess ({classname}.testUnexpectedSuccess) ... unexpected success\n') + @force_not_colorized def testDotsOutputSubTestSuccess(self): self.assertEqual(self._run_test('testSubTestSuccess', 1), '.') + @force_not_colorized def testLongOutputSubTestSuccess(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSubTestSuccess', 2), f'testSubTestSuccess ({classname}.testSubTestSuccess) ... ok\n') + @force_not_colorized def testDotsOutputSubTestMixed(self): self.assertEqual(self._run_test('testSubTestMixed', 1), 'sFE') + @force_not_colorized def testLongOutputSubTestMixed(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSubTestMixed', 2), @@ -661,6 +673,7 @@ def testLongOutputSubTestMixed(self): f' testSubTestMixed ({classname}.testSubTestMixed) [fail] (c=3) ... FAIL\n' f' testSubTestMixed ({classname}.testSubTestMixed) [error] (d=4) ... ERROR\n') + @force_not_colorized def testDotsOutputTearDownFail(self): out = self._run_test('testSuccess', 1, AssertionError('fail')) self.assertEqual(out, 'F') @@ -671,6 +684,7 @@ def testDotsOutputTearDownFail(self): out = self._run_test('testSkip', 1, AssertionError('fail')) self.assertEqual(out, 'sF') + @force_not_colorized def testLongOutputTearDownFail(self): classname = f'{__name__}.{self.Test.__qualname__}' out = self._run_test('testSuccess', 2, AssertionError('fail')) diff --git a/Lib/test/test_unittest/test_runner.py b/Lib/test/test_unittest/test_runner.py index 1b9cef43e3f9c5..1131cd73128866 100644 --- a/Lib/test/test_unittest/test_runner.py +++ b/Lib/test/test_unittest/test_runner.py @@ -4,6 +4,7 @@ import pickle import subprocess from test import support +from test.support import force_not_colorized import unittest from unittest.case import _Outcome @@ -106,6 +107,7 @@ def cleanup2(*args, **kwargs): self.assertTrue(test.doCleanups()) self.assertEqual(cleanups, [(2, (), {}), (1, (1, 2, 3), dict(four='hello', five='goodbye'))]) + @force_not_colorized def testCleanUpWithErrors(self): class TestableTest(unittest.TestCase): def testNothing(self): @@ -416,6 +418,7 @@ def cleanup2(): self.assertIsInstance(e2[1], CustomError) self.assertEqual(str(e2[1]), 'cleanup1') + @force_not_colorized def test_with_errors_addCleanUp(self): ordering = [] class TestableTest(unittest.TestCase): @@ -439,6 +442,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'cleanup_exc', 'tearDownClass', 'cleanup_good']) + @force_not_colorized def test_run_with_errors_addClassCleanUp(self): ordering = [] class TestableTest(unittest.TestCase): @@ -462,6 +466,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'test', 'cleanup_good', 'tearDownClass', 'cleanup_exc']) + @force_not_colorized def test_with_errors_in_addClassCleanup_and_setUps(self): ordering = [] class_blow_up = False @@ -514,6 +519,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'tearDownClass', 'cleanup_exc']) + @force_not_colorized def test_with_errors_in_tearDownClass(self): ordering = [] class TestableTest(unittest.TestCase): @@ -590,6 +596,7 @@ def test(self): 'inner setup', 'inner test', 'inner cleanup', 'end outer test', 'outer cleanup']) + @force_not_colorized def test_run_empty_suite_error_message(self): class EmptyTest(unittest.TestCase): pass @@ -663,6 +670,7 @@ class Module(object): self.assertEqual(cleanups, [((1, 2), {'function': 'hello'})]) + @force_not_colorized def test_run_module_cleanUp(self): blowUp = True ordering = [] @@ -802,6 +810,7 @@ def tearDownClass(cls): 'tearDownClass', 'cleanup_good']) self.assertEqual(unittest.case._module_cleanups, []) + @force_not_colorized def test_run_module_cleanUp_when_teardown_exception(self): ordering = [] class Module(object): @@ -963,6 +972,7 @@ def testNothing(self): self.assertEqual(cleanups, [((1, 2), {'function': 3, 'self': 4})]) + @force_not_colorized def test_with_errors_in_addClassCleanup(self): ordering = [] @@ -996,6 +1006,7 @@ def tearDownClass(cls): ['setUpModule', 'setUpClass', 'test', 'tearDownClass', 'cleanup_exc', 'tearDownModule', 'cleanup_good']) + @force_not_colorized def test_with_errors_in_addCleanup(self): ordering = [] class Module(object): @@ -1026,6 +1037,7 @@ def tearDown(self): ['setUpModule', 'setUp', 'test', 'tearDown', 'cleanup_exc', 'tearDownModule', 'cleanup_good']) + @force_not_colorized def test_with_errors_in_addModuleCleanup_and_setUps(self): ordering = [] module_blow_up = False @@ -1318,6 +1330,7 @@ def MockResultClass(*args): expectedresult = (runner.stream, DESCRIPTIONS, VERBOSITY) self.assertEqual(runner._makeResult(), expectedresult) + @force_not_colorized @support.requires_subprocess() def test_warnings(self): """ diff --git a/Lib/test/test_unittest/test_skipping.py b/Lib/test/test_unittest/test_skipping.py index f146dcac18ecc0..f5cb860c60b156 100644 --- a/Lib/test/test_unittest/test_skipping.py +++ b/Lib/test/test_unittest/test_skipping.py @@ -1,5 +1,6 @@ import unittest +from test.support import force_not_colorized from test.test_unittest.support import LoggingResult @@ -293,6 +294,7 @@ def test_die(self): self.assertFalse(result.unexpectedSuccesses) self.assertTrue(result.wasSuccessful()) + @force_not_colorized def test_expected_failure_and_fail_in_cleanup(self): class Foo(unittest.TestCase): @unittest.expectedFailure @@ -372,6 +374,7 @@ def test_die(self): self.assertEqual(result.unexpectedSuccesses, [test]) self.assertFalse(result.wasSuccessful()) + @force_not_colorized def test_unexpected_success_and_fail_in_cleanup(self): class Foo(unittest.TestCase): @unittest.expectedFailure diff --git a/Lib/unittest/result.py b/Lib/unittest/result.py index 3ace0a5b7bf2ef..97262735aa8311 100644 --- a/Lib/unittest/result.py +++ b/Lib/unittest/result.py @@ -189,7 +189,9 @@ def _exc_info_to_string(self, err, test): tb_e = traceback.TracebackException( exctype, value, tb, capture_locals=self.tb_locals, compact=True) - msgLines = list(tb_e.format()) + from _colorize import can_colorize + + msgLines = list(tb_e.format(colorize=can_colorize())) if self.buffer: output = sys.stdout.getvalue() diff --git a/Lib/unittest/runner.py b/Lib/unittest/runner.py index 2bcadf0c998bd9..d60c295a1eddf7 100644 --- a/Lib/unittest/runner.py +++ b/Lib/unittest/runner.py @@ -4,6 +4,8 @@ import time import warnings +from _colorize import get_colors + from . import result from .case import _SubTest from .signals import registerResult @@ -13,18 +15,18 @@ class _WritelnDecorator(object): """Used to decorate file-like objects with a handy 'writeln' method""" - def __init__(self,stream): + def __init__(self, stream): self.stream = stream def __getattr__(self, attr): if attr in ('stream', '__getstate__'): raise AttributeError(attr) - return getattr(self.stream,attr) + return getattr(self.stream, attr) def writeln(self, arg=None): if arg: self.write(arg) - self.write('\n') # text-mode streams translate to \r\n if needed + self.write('\n') # text-mode streams translate to \r\n if needed class TextTestResult(result.TestResult): @@ -43,6 +45,7 @@ def __init__(self, stream, descriptions, verbosity, *, durations=None): self.showAll = verbosity > 1 self.dots = verbosity == 1 self.descriptions = descriptions + self._ansi = get_colors() self._newline = True self.durations = durations @@ -76,86 +79,102 @@ def _write_status(self, test, status): def addSubTest(self, test, subtest, err): if err is not None: + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: if issubclass(err[0], subtest.failureException): - self._write_status(subtest, "FAIL") + self._write_status(subtest, f"{red}FAIL{reset}") else: - self._write_status(subtest, "ERROR") + self._write_status(subtest, f"{red}ERROR{reset}") elif self.dots: if issubclass(err[0], subtest.failureException): - self.stream.write('F') + self.stream.write(f"{red}F{reset}") else: - self.stream.write('E') + self.stream.write(f"{red}E{reset}") self.stream.flush() super(TextTestResult, self).addSubTest(test, subtest, err) def addSuccess(self, test): super(TextTestResult, self).addSuccess(test) + green, reset = self._ansi.GREEN, self._ansi.RESET if self.showAll: - self._write_status(test, "ok") + self._write_status(test, f"{green}ok{reset}") elif self.dots: - self.stream.write('.') + self.stream.write(f"{green}.{reset}") self.stream.flush() def addError(self, test, err): super(TextTestResult, self).addError(test, err) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self._write_status(test, "ERROR") + self._write_status(test, f"{red}ERROR{reset}") elif self.dots: - self.stream.write('E') + self.stream.write(f"{red}E{reset}") self.stream.flush() def addFailure(self, test, err): super(TextTestResult, self).addFailure(test, err) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self._write_status(test, "FAIL") + self._write_status(test, f"{red}FAIL{reset}") elif self.dots: - self.stream.write('F') + self.stream.write(f"{red}F{reset}") self.stream.flush() def addSkip(self, test, reason): super(TextTestResult, self).addSkip(test, reason) + yellow, reset = self._ansi.YELLOW, self._ansi.RESET if self.showAll: - self._write_status(test, "skipped {0!r}".format(reason)) + self._write_status(test, f"{yellow}skipped{reset} {reason!r}") elif self.dots: - self.stream.write("s") + self.stream.write(f"{yellow}s{reset}") self.stream.flush() def addExpectedFailure(self, test, err): super(TextTestResult, self).addExpectedFailure(test, err) + yellow, reset = self._ansi.YELLOW, self._ansi.RESET if self.showAll: - self.stream.writeln("expected failure") + self.stream.writeln(f"{yellow}expected failure{reset}") self.stream.flush() elif self.dots: - self.stream.write("x") + self.stream.write(f"{yellow}x{reset}") self.stream.flush() def addUnexpectedSuccess(self, test): super(TextTestResult, self).addUnexpectedSuccess(test) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self.stream.writeln("unexpected success") + self.stream.writeln(f"{red}unexpected success{reset}") self.stream.flush() elif self.dots: - self.stream.write("u") + self.stream.write(f"{red}u{reset}") self.stream.flush() def printErrors(self): + bold_red = self._ansi.BOLD_RED + red = self._ansi.RED + reset = self._ansi.RESET if self.dots or self.showAll: self.stream.writeln() self.stream.flush() - self.printErrorList('ERROR', self.errors) - self.printErrorList('FAIL', self.failures) - unexpectedSuccesses = getattr(self, 'unexpectedSuccesses', ()) + self.printErrorList(f"{red}ERROR{reset}", self.errors) + self.printErrorList(f"{red}FAIL{reset}", self.failures) + unexpectedSuccesses = getattr(self, "unexpectedSuccesses", ()) if unexpectedSuccesses: self.stream.writeln(self.separator1) for test in unexpectedSuccesses: - self.stream.writeln(f"UNEXPECTED SUCCESS: {self.getDescription(test)}") + self.stream.writeln( + f"{red}UNEXPECTED SUCCESS{bold_red}: " + f"{self.getDescription(test)}{reset}" + ) self.stream.flush() def printErrorList(self, flavour, errors): + bold_red, reset = self._ansi.BOLD_RED, self._ansi.RESET for test, err in errors: self.stream.writeln(self.separator1) - self.stream.writeln("%s: %s" % (flavour,self.getDescription(test))) + self.stream.writeln( + f"{flavour}{bold_red}: {self.getDescription(test)}{reset}" + ) self.stream.writeln(self.separator2) self.stream.writeln("%s" % err) self.stream.flush() @@ -232,7 +251,7 @@ def run(self, test): if self.warnings: # if self.warnings is set, use it to filter all the warnings warnings.simplefilter(self.warnings) - startTime = time.perf_counter() + start_time = time.perf_counter() startTestRun = getattr(result, 'startTestRun', None) if startTestRun is not None: startTestRun() @@ -242,8 +261,8 @@ def run(self, test): stopTestRun = getattr(result, 'stopTestRun', None) if stopTestRun is not None: stopTestRun() - stopTime = time.perf_counter() - timeTaken = stopTime - startTime + stop_time = time.perf_counter() + time_taken = stop_time - start_time result.printErrors() if self.durations is not None: self._printDurations(result) @@ -253,10 +272,10 @@ def run(self, test): run = result.testsRun self.stream.writeln("Ran %d test%s in %.3fs" % - (run, run != 1 and "s" or "", timeTaken)) + (run, run != 1 and "s" or "", time_taken)) self.stream.writeln() - expectedFails = unexpectedSuccesses = skipped = 0 + expected_fails = unexpected_successes = skipped = 0 try: results = map(len, (result.expectedFailures, result.unexpectedSuccesses, @@ -264,26 +283,35 @@ def run(self, test): except AttributeError: pass else: - expectedFails, unexpectedSuccesses, skipped = results + expected_fails, unexpected_successes, skipped = results infos = [] + ansi = get_colors() + bold_red = ansi.BOLD_RED + green = ansi.GREEN + red = ansi.RED + reset = ansi.RESET + yellow = ansi.YELLOW + if not result.wasSuccessful(): - self.stream.write("FAILED") + self.stream.write(f"{bold_red}FAILED{reset}") failed, errored = len(result.failures), len(result.errors) if failed: - infos.append("failures=%d" % failed) + infos.append(f"{bold_red}failures={failed}{reset}") if errored: - infos.append("errors=%d" % errored) + infos.append(f"{bold_red}errors={errored}{reset}") elif run == 0 and not skipped: - self.stream.write("NO TESTS RAN") + self.stream.write(f"{yellow}NO TESTS RAN{reset}") else: - self.stream.write("OK") + self.stream.write(f"{green}OK{reset}") if skipped: - infos.append("skipped=%d" % skipped) - if expectedFails: - infos.append("expected failures=%d" % expectedFails) - if unexpectedSuccesses: - infos.append("unexpected successes=%d" % unexpectedSuccesses) + infos.append(f"{yellow}skipped={skipped}{reset}") + if expected_fails: + infos.append(f"{yellow}expected failures={expected_fails}{reset}") + if unexpected_successes: + infos.append( + f"{red}unexpected successes={unexpected_successes}{reset}" + ) if infos: self.stream.writeln(" (%s)" % (", ".join(infos),)) else: diff --git a/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst b/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst new file mode 100644 index 00000000000000..0e4a03caf9f49d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst @@ -0,0 +1 @@ +Add colour to :mod:`unittest` output. Patch by Hugo van Kemenade. From 657d0e99aa8754372786120d6ec00c9d9970e775 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Thu, 5 Dec 2024 21:52:58 +0100 Subject: [PATCH 03/14] [Docs] GDB howto: Fix block type of a cast example (#127621) --- Doc/howto/gdb_helpers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/gdb_helpers.rst b/Doc/howto/gdb_helpers.rst index 53bbf7ddaa2ab9..98ce813ca4ab02 100644 --- a/Doc/howto/gdb_helpers.rst +++ b/Doc/howto/gdb_helpers.rst @@ -180,7 +180,7 @@ regular machine-level integer:: (gdb) p some_python_integer $4 = 42 -The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`: +The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`:: (gdb) p *(PyLongObject*)some_python_integer $5 = {ob_base = {ob_base = {ob_refcnt = 8, ob_type = 0x3dad39f5e0}, ob_size = 1}, From f4f530804b9d8f089eba0f157ec2144c03b13651 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 5 Dec 2024 21:07:31 +0000 Subject: [PATCH 04/14] gh-127582: Make object resurrection thread-safe for free threading. (GH-127612) Objects may be temporarily "resurrected" in destructors when calling finalizers or watcher callbacks. We previously undid the resurrection by decrementing the reference count using `Py_SET_REFCNT`. This was not thread-safe because other threads might be accessing the object (modifying its reference count) if it was exposed by the finalizer, watcher callback, or temporarily accessed by a racy dictionary or list access. This adds internal-only thread-safe functions for temporary object resurrection during destructors. --- Include/internal/pycore_object.h | 44 +++++++++++++++++++ ...-12-05-19-25-00.gh-issue-127582.ogUY2a.rst | 2 + Objects/codeobject.c | 7 +-- Objects/dictobject.c | 7 +-- Objects/funcobject.c | 7 +-- Objects/object.c | 40 ++++++++++++++--- 6 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index ce876b093b2522..6b0b464a6fdb96 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -697,8 +697,52 @@ _PyObject_SetMaybeWeakref(PyObject *op) } } +extern int _PyObject_ResurrectEndSlow(PyObject *op); #endif +// Temporarily resurrects an object during deallocation. The refcount is set +// to one. +static inline void +_PyObject_ResurrectStart(PyObject *op) +{ + assert(Py_REFCNT(op) == 0); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif +#ifdef Py_GIL_DISABLED + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_ThreadId()); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 1); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); +#else + Py_SET_REFCNT(op, 1); +#endif +} + +// Undoes an object resurrection by decrementing the refcount without calling +// _Py_Dealloc(). Returns 0 if the object is dead (the normal case), and +// deallocation should continue. Returns 1 if the object is still alive. +static inline int +_PyObject_ResurrectEnd(PyObject *op) +{ +#ifdef Py_REF_DEBUG + _Py_DecRefTotal(_PyThreadState_GET()); +#endif +#ifndef Py_GIL_DISABLED + Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + return Py_REFCNT(op) != 0; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + Py_ssize_t shared = _Py_atomic_load_ssize_acquire(&op->ob_ref_shared); + if (_Py_IsOwnedByCurrentThread(op) && local == 1 && shared == 0) { + // Fast-path: object has a single refcount and is owned by this thread + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + return 0; + } + // Slow-path: object has a shared refcount or is not owned by this thread + return _PyObject_ResurrectEndSlow(op); +#endif +} + /* Tries to incref op and returns 1 if successful or 0 otherwise. */ static inline int _Py_TryIncref(PyObject *op) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst new file mode 100644 index 00000000000000..59491feeb9bcfa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst @@ -0,0 +1,2 @@ +Fix non-thread-safe object resurrection when calling finalizers and watcher +callbacks in the free threading build. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 148350cc4b9195..eb8de136ee6432 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1867,14 +1867,11 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyCodeObject *co) { - assert(Py_REFCNT(co) == 0); - Py_SET_REFCNT(co, 1); + _PyObject_ResurrectStart((PyObject *)co); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); - if (Py_REFCNT(co) > 1) { - Py_SET_REFCNT(co, Py_REFCNT(co) - 1); + if (_PyObject_ResurrectEnd((PyObject *)co)) { return; } - Py_SET_REFCNT(co, 0); #ifdef Py_GIL_DISABLED PyObject_GC_UnTrack(co); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a13d8084d14d66..1c9f86438dadc3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3162,14 +3162,11 @@ dict_dealloc(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(Py_REFCNT(mp) == 0); - Py_SET_REFCNT(mp, 1); + _PyObject_ResurrectStart(self); _PyDict_NotifyEvent(interp, PyDict_EVENT_DEALLOCATED, mp, NULL, NULL); - if (Py_REFCNT(mp) > 1) { - Py_SET_REFCNT(mp, Py_REFCNT(mp) - 1); + if (_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(mp, 0); PyDictValues *values = mp->ma_values; PyDictKeysObject *keys = mp->ma_keys; Py_ssize_t i, n; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 4ba47285f7152f..cca7f01498013e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1092,14 +1092,11 @@ static void func_dealloc(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); - assert(Py_REFCNT(op) == 0); - Py_SET_REFCNT(op, 1); + _PyObject_ResurrectStart(self); handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); - if (Py_REFCNT(op) > 1) { - Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + if (_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(op, 0); _PyObject_GC_UNTRACK(op); if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); diff --git a/Objects/object.c b/Objects/object.c index 8868fa29066404..74f47fa4239032 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -362,8 +362,10 @@ is_dead(PyObject *o) } # endif -void -_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +// Decrement the shared reference count of an object. Return 1 if the object +// is dead and should be deallocated, 0 otherwise. +static int +_Py_DecRefSharedIsDead(PyObject *o, const char *filename, int lineno) { // Should we queue the object for the owning thread to merge? int should_queue; @@ -404,6 +406,15 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) } else if (new_shared == _Py_REF_MERGED) { // refcount is zero AND merged + return 1; + } + return 0; +} + +void +_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +{ + if (_Py_DecRefSharedIsDead(o, filename, lineno)) { _Py_Dealloc(o); } } @@ -472,6 +483,26 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) &shared, new_shared)); return refcnt; } + +// The more complicated "slow" path for undoing the resurrection of an object. +int +_PyObject_ResurrectEndSlow(PyObject *op) +{ + if (_Py_IsImmortal(op)) { + return 1; + } + if (_Py_IsOwnedByCurrentThread(op)) { + // If the object is owned by the current thread, give up ownership and + // merge the refcount. This isn't necessary in all cases, but it + // simplifies the implementation. + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(op, -1); + return refcount != 0; + } + int is_dead = _Py_DecRefSharedIsDead(op, NULL, 0); + return !is_dead; +} + + #endif /* Py_GIL_DISABLED */ @@ -550,7 +581,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) } /* Temporarily resurrect the object. */ - Py_SET_REFCNT(self, 1); + _PyObject_ResurrectStart(self); PyObject_CallFinalizer(self); @@ -560,8 +591,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) /* Undo the temporary resurrection; can't use DECREF here, it would * cause a recursive call. */ - Py_SET_REFCNT(self, Py_REFCNT(self) - 1); - if (Py_REFCNT(self) == 0) { + if (!_PyObject_ResurrectEnd(self)) { return 0; /* this is the normal path out */ } From 8b3cccf3f9508572d85b0044519f2bd5715dacad Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 5 Dec 2024 21:39:43 +0000 Subject: [PATCH 05/14] GH-125413: Revert addition of `pathlib.Path.scandir()` method (#127377) Remove documentation for `pathlib.Path.scandir()`, and rename the method to `_scandir()`. In the private pathlib ABCs, make `iterdir()` abstract and call it from `_scandir()`. It's not worthwhile to add this method at the moment - see discussion: https://discuss.python.org/t/ergonomics-of-new-pathlib-path-scandir/71721 Co-authored-by: Steve Dower --- Doc/library/pathlib.rst | 29 ----------- Doc/whatsnew/3.14.rst | 6 --- Lib/pathlib/_abc.py | 15 +++--- Lib/pathlib/_local.py | 4 +- Lib/test/test_pathlib/test_pathlib_abc.py | 48 ++++--------------- Misc/NEWS.d/3.14.0a2.rst | 2 +- ...-11-29-00-15-59.gh-issue-125413.WCN0vv.rst | 3 ++ 7 files changed, 22 insertions(+), 85 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index a42ac1f8bcdf71..4b48880d6d9a18 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1289,35 +1289,6 @@ Reading directories raised. -.. method:: Path.scandir() - - When the path points to a directory, return an iterator of - :class:`os.DirEntry` objects corresponding to entries in the directory. The - returned iterator supports the :term:`context manager` protocol. It is - implemented using :func:`os.scandir` and gives the same guarantees. - - Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can - significantly increase the performance of code that also needs file type or - file attribute information, because :class:`os.DirEntry` objects expose - this information if the operating system provides it when scanning a - directory. - - The following example displays the names of subdirectories. The - ``entry.is_dir()`` check will generally not make an additional system call:: - - >>> p = Path('docs') - >>> with p.scandir() as entries: - ... for entry in entries: - ... if entry.is_dir(): - ... entry.name - ... - '_templates' - '_build' - '_static' - - .. versionadded:: 3.14 - - .. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False) Glob the given relative *pattern* in the directory represented by this path, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index db25c037e509b6..b300e348679438 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -532,12 +532,6 @@ pathlib (Contributed by Barney Gale in :gh:`73991`.) -* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator - of :class:`os.DirEntry` objects. This is exactly equivalent to calling - :func:`os.scandir` on a path object. - - (Contributed by Barney Gale in :gh:`125413`.) - pdb --- diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 2b314b6c9a16bf..86617ff2616f33 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -94,7 +94,7 @@ class PathGlobber(_GlobberBase): lexists = operator.methodcaller('exists', follow_symlinks=False) add_slash = operator.methodcaller('joinpath', '') - scandir = operator.methodcaller('scandir') + scandir = operator.methodcaller('_scandir') @staticmethod def concat_path(path, text): @@ -632,13 +632,14 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) - def scandir(self): - """Yield os.DirEntry objects of the directory contents. + def _scandir(self): + """Yield os.DirEntry-like objects of the directory contents. The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(self._unsupported_msg('scandir()')) + import contextlib + return contextlib.nullcontext(self.iterdir()) def iterdir(self): """Yield path objects of the directory contents. @@ -646,9 +647,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - with self.scandir() as entries: - names = [entry.name for entry in entries] - return map(self.joinpath, names) + raise UnsupportedOperation(self._unsupported_msg('iterdir()')) def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: @@ -698,7 +697,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): if not top_down: paths.append((path, dirnames, filenames)) try: - with path.scandir() as entries: + with path._scandir() as entries: for entry in entries: name = entry.name try: diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index b5d9dc49f58463..bb8a252c0e94e2 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -634,8 +634,8 @@ def _filter_trailing_slash(self, paths): path_str = path_str[:-1] yield path_str - def scandir(self): - """Yield os.DirEntry objects of the directory contents. + def _scandir(self): + """Yield os.DirEntry-like objects of the directory contents. The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 5fa2f550cefcf4..7ba3fa823a30b9 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1,5 +1,4 @@ import collections -import contextlib import io import os import errno @@ -1418,24 +1417,6 @@ def close(self): 'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime') -class DummyDirEntry: - """ - Minimal os.DirEntry-like object. Returned from DummyPath.scandir(). - """ - __slots__ = ('name', '_is_symlink', '_is_dir') - - def __init__(self, name, is_symlink, is_dir): - self.name = name - self._is_symlink = is_symlink - self._is_dir = is_dir - - def is_symlink(self): - return self._is_symlink - - def is_dir(self, *, follow_symlinks=True): - return self._is_dir and (follow_symlinks or not self._is_symlink) - - class DummyPath(PathBase): """ Simple implementation of PathBase that keeps files and directories in @@ -1503,25 +1484,14 @@ def open(self, mode='r', buffering=-1, encoding=None, stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) return stream - @contextlib.contextmanager - def scandir(self): - path = self.resolve() - path_str = str(path) - if path_str in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str) - elif path_str in self._directories: - yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]]) + def iterdir(self): + path = str(self.resolve()) + if path in self._files: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) + elif path in self._directories: + return iter([self / name for name in self._directories[path]]) else: - raise FileNotFoundError(errno.ENOENT, "File not found", path_str) - - @property - def _dir_entry(self): - path_str = str(self) - is_symlink = path_str in self._symlinks - is_directory = (path_str in self._directories - if not is_symlink - else self._symlinks[path_str][1]) - return DummyDirEntry(self.name, is_symlink, is_directory) + raise FileNotFoundError(errno.ENOENT, "File not found", path) def mkdir(self, mode=0o777, parents=False, exist_ok=False): path = str(self.parent.resolve() / self.name) @@ -2214,9 +2184,9 @@ def test_iterdir_nodir(self): def test_scandir(self): p = self.cls(self.base) - with p.scandir() as entries: + with p._scandir() as entries: self.assertTrue(list(entries)) - with p.scandir() as entries: + with p._scandir() as entries: for entry in entries: child = p / entry.name self.assertIsNotNone(entry) diff --git a/Misc/NEWS.d/3.14.0a2.rst b/Misc/NEWS.d/3.14.0a2.rst index 7384ce54cb8914..d82ec98b7a3c87 100644 --- a/Misc/NEWS.d/3.14.0a2.rst +++ b/Misc/NEWS.d/3.14.0a2.rst @@ -597,7 +597,7 @@ TypeError is now raised instead of ValueError for some logical errors. .. nonce: Jat5kq .. section: Library -Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory +Add :meth:`!pathlib.Path.scandir` method to efficiently fetch directory children and their file attributes. This is a trivial wrapper of :func:`os.scandir`. diff --git a/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst b/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst new file mode 100644 index 00000000000000..b56a77b4294ace --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst @@ -0,0 +1,3 @@ +Revert addition of :meth:`!pathlib.Path.scandir`. This method was added in +3.14.0a2. The optimizations remain for file system paths, but other +subclasses should only have to implement :meth:`pathlib.Path.iterdir`. From 25eee578c8e369b027da6d9d2725f29df6ef1cbd Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Fri, 6 Dec 2024 03:47:51 +0100 Subject: [PATCH 06/14] gh-127627: Add `posix._emscripten_debugger` function (#127628) Add a posix._emscripten_debugger function to add an emscripten breakpoint. --- ...-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst | 2 ++ Modules/clinic/posixmodule.c.h | 28 ++++++++++++++++++- Modules/posixmodule.c | 22 ++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst b/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst new file mode 100644 index 00000000000000..48a6c7d30b4a26 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst @@ -0,0 +1,2 @@ +Added ``posix._emscripten_debugger()`` to help with debugging the test suite on +the Emscripten target. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index cd0c4faeac83d1..554299b8598299 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -12447,6 +12447,28 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) return os__create_environ_impl(module); } +#if defined(__EMSCRIPTEN__) + +PyDoc_STRVAR(os__emscripten_debugger__doc__, +"_emscripten_debugger($module, /)\n" +"--\n" +"\n" +"Create a breakpoint for the JavaScript debugger. Emscripten only."); + +#define OS__EMSCRIPTEN_DEBUGGER_METHODDEF \ + {"_emscripten_debugger", (PyCFunction)os__emscripten_debugger, METH_NOARGS, os__emscripten_debugger__doc__}, + +static PyObject * +os__emscripten_debugger_impl(PyObject *module); + +static PyObject * +os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__emscripten_debugger_impl(module); +} + +#endif /* defined(__EMSCRIPTEN__) */ + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -13114,4 +13136,8 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=7ee14f5e880092f5 input=a9049054013a1b77]*/ + +#ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF + #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF +#endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ +/*[clinic end generated code: output=9c2ca1dbf986c62c input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 2c26fbeac9a1be..2045c6065b8e7a 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -84,6 +84,9 @@ extern char * _getpty(int *, int, mode_t, int); #endif +#ifdef __EMSCRIPTEN__ +#include "emscripten.h" // emscripten_debugger() +#endif /* * A number of APIs are available on macOS from a certain macOS version. @@ -16845,8 +16848,24 @@ os__create_environ_impl(PyObject *module) } -static PyMethodDef posix_methods[] = { +#ifdef __EMSCRIPTEN__ +/*[clinic input] +os._emscripten_debugger + +Create a breakpoint for the JavaScript debugger. Emscripten only. +[clinic start generated code]*/ + +static PyObject * +os__emscripten_debugger_impl(PyObject *module) +/*[clinic end generated code: output=ad47dc3bf0661343 input=d814b1877fb6083a]*/ +{ + emscripten_debugger(); + Py_RETURN_NONE; +} +#endif /* __EMSCRIPTEN__ */ + +static PyMethodDef posix_methods[] = { OS_STAT_METHODDEF OS_ACCESS_METHODDEF OS_TTYNAME_METHODDEF @@ -17060,6 +17079,7 @@ static PyMethodDef posix_methods[] = { OS__INPUTHOOK_METHODDEF OS__IS_INPUTHOOK_INSTALLED_METHODDEF OS__CREATE_ENVIRON_METHODDEF + OS__EMSCRIPTEN_DEBUGGER_METHODDEF {NULL, NULL} /* Sentinel */ }; From e991ac8f2037d78140e417cc9a9486223eb3e786 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 22:33:03 -0600 Subject: [PATCH 07/14] gh-127655: Ensure `_SelectorSocketTransport.writelines` pauses the protocol if needed (#127656) Ensure `_SelectorSocketTransport.writelines` pauses the protocol if it reaches the high water mark as needed. Co-authored-by: Kumar Aditya --- Lib/asyncio/selector_events.py | 1 + Lib/test/test_asyncio/test_selector_events.py | 12 ++++++++++++ .../2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst | 1 + 3 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index f94bf10b4225e7..f1ab9b12d69a5d 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -1175,6 +1175,7 @@ def writelines(self, list_of_data): # If the entire buffer couldn't be written, register a write handler if self._buffer: self._loop._add_writer(self._sock_fd, self._write_ready) + self._maybe_pause_protocol() def can_write_eof(self): return True diff --git a/Lib/test/test_asyncio/test_selector_events.py b/Lib/test/test_asyncio/test_selector_events.py index aaeda33dd0c677..efca30f37414f9 100644 --- a/Lib/test/test_asyncio/test_selector_events.py +++ b/Lib/test/test_asyncio/test_selector_events.py @@ -805,6 +805,18 @@ def test_writelines_send_partial(self): self.assertTrue(self.sock.send.called) self.assertTrue(self.loop.writers) + def test_writelines_pauses_protocol(self): + data = memoryview(b'data') + self.sock.send.return_value = 2 + self.sock.send.fileno.return_value = 7 + + transport = self.socket_transport() + transport._high_water = 1 + transport.writelines([data]) + self.assertTrue(self.protocol.pause_writing.called) + self.assertTrue(self.sock.send.called) + self.assertTrue(self.loop.writers) + @unittest.skipUnless(selector_events._HAS_SENDMSG, 'no sendmsg') def test_write_sendmsg_full(self): data = memoryview(b'data') diff --git a/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst b/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst new file mode 100644 index 00000000000000..76cfc58121d3bd --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst @@ -0,0 +1 @@ +Fixed the :class:`!asyncio.selector_events._SelectorSocketTransport` transport not pausing writes for the protocol when the buffer reaches the high water mark when using :meth:`asyncio.WriteTransport.writelines`. From 8b7c194c7bf7e547e4f6317528f0dcb9344c18c7 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 6 Dec 2024 13:28:32 +0300 Subject: [PATCH 08/14] gh-120010: Fix invalid (nan+nanj) results in _Py_c_prod() (GH-120287) In some cases, previously computed as (nan+nanj), we could recover meaningful component values in the result, see e.g. the C11, Annex G.5.1, routine _Cmultd(): >>> z = 1e300+1j >>> z*(nan+infj) # was (nan+nanj) (-inf+infj) That also fix some complex powers for small integer exponents, computed with optimized algorithm (by squaring): >>> z**5 # was (nan+nanj) Traceback (most recent call last): File "", line 1, in z**5 ~^^~ OverflowError: complex exponentiation --- Lib/test/test_complex.py | 17 ++++++ ...-06-04-08-26-25.gh-issue-120010._z-AWz.rst | 2 + Objects/complexobject.c | 60 +++++++++++++++++-- 3 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py index 179556f57e884f..fd002fb00ac338 100644 --- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -299,6 +299,22 @@ def test_mul(self): self.assertRaises(TypeError, operator.mul, 1j, None) self.assertRaises(TypeError, operator.mul, None, 1j) + for z, w, r in [(1e300+1j, complex(INF, INF), complex(NAN, INF)), + (1e300+1j, complex(NAN, INF), complex(-INF, INF)), + (1e300+1j, complex(INF, NAN), complex(INF, INF)), + (complex(INF, 1), complex(NAN, INF), complex(NAN, INF)), + (complex(INF, 1), complex(INF, NAN), complex(INF, NAN)), + (complex(NAN, 1), complex(1, INF), complex(-INF, NAN)), + (complex(1, NAN), complex(1, INF), complex(NAN, INF)), + (complex(1e200, NAN), complex(1e200, NAN), complex(INF, NAN)), + (complex(1e200, NAN), complex(NAN, 1e200), complex(NAN, INF)), + (complex(NAN, 1e200), complex(1e200, NAN), complex(NAN, INF)), + (complex(NAN, 1e200), complex(NAN, 1e200), complex(-INF, NAN)), + (complex(NAN, NAN), complex(NAN, NAN), complex(NAN, NAN))]: + with self.subTest(z=z, w=w, r=r): + self.assertComplexesAreIdentical(z * w, r) + self.assertComplexesAreIdentical(w * z, r) + def test_mod(self): # % is no longer supported on complex numbers with self.assertRaises(TypeError): @@ -340,6 +356,7 @@ def test_pow(self): self.assertAlmostEqual(pow(1j, 200), 1) self.assertRaises(ValueError, pow, 1+1j, 1+1j, 1+1j) self.assertRaises(OverflowError, pow, 1e200+1j, 1e200+1j) + self.assertRaises(OverflowError, pow, 1e200+1j, 5) self.assertRaises(TypeError, pow, 1j, None) self.assertRaises(TypeError, pow, None, 1j) self.assertAlmostEqual(pow(1j, 0.5), 0.7071067811865476+0.7071067811865475j) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst new file mode 100644 index 00000000000000..7954c7f5927397 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst @@ -0,0 +1,2 @@ +Correct invalid corner cases which resulted in ``(nan+nanj)`` output in complex +multiplication, e.g., ``(1e300+1j)*(nan+infj)``. Patch by Sergey B Kirpichev. diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 8fbca3cb02d80a..bf6187efac941f 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -85,11 +85,63 @@ _Py_c_neg(Py_complex a) } Py_complex -_Py_c_prod(Py_complex a, Py_complex b) +_Py_c_prod(Py_complex z, Py_complex w) { - Py_complex r; - r.real = a.real*b.real - a.imag*b.imag; - r.imag = a.real*b.imag + a.imag*b.real; + double a = z.real, b = z.imag, c = w.real, d = w.imag; + double ac = a*c, bd = b*d, ad = a*d, bc = b*c; + Py_complex r = {ac - bd, ad + bc}; + + /* Recover infinities that computed as nan+nanj. See e.g. the C11, + Annex G.5.1, routine _Cmultd(). */ + if (isnan(r.real) && isnan(r.imag)) { + int recalc = 0; + + if (isinf(a) || isinf(b)) { /* z is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + a = copysign(isinf(a) ? 1.0 : 0.0, a); + b = copysign(isinf(b) ? 1.0 : 0.0, b); + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (isinf(c) || isinf(d)) { /* w is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + c = copysign(isinf(c) ? 1.0 : 0.0, c); + d = copysign(isinf(d) ? 1.0 : 0.0, d); + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + recalc = 1; + } + if (!recalc && (isinf(ac) || isinf(bd) || isinf(ad) || isinf(bc))) { + /* Recover infinities from overflow by changing nans to 0 */ + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (recalc) { + r.real = Py_INFINITY*(a*c - b*d); + r.imag = Py_INFINITY*(a*d + b*c); + } + } + return r; } From 023b7d2141467017abc27de864f3f44677768cb3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 10:46:59 +0000 Subject: [PATCH 09/14] GH-126491: Lower heap size limit with faster marking (GH-127519) * Faster marking of reachable objects * Changes calculation of work to do and work done. * Merges transitive closure calculations --- InternalDocs/garbage_collector.md | 50 ++++- Lib/test/test_gc.py | 14 +- Objects/dictobject.c | 4 +- Objects/genobject.c | 69 +------ Objects/typeobject.c | 13 ++ Python/gc.c | 301 ++++++++++++++---------------- 6 files changed, 208 insertions(+), 243 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 08db080a200ea4..4761f78f3593e3 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,6 +515,44 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. +Determining how much work to do +------------------------------- + +We need to do a certain amount of work to enusre that garbage is collected, +but doing too much work slows down execution. + +To work out how much work we need to do, consider a heap with `L` live objects +and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects +at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and +we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. +For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which +`G1` garbage objects are created. + +The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, +assuming that the number of live objects remains roughly constant. +If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` +For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. + +In other words, to keep the garbage fraction to 1/3 or less we need to visit +4 times as many objects as are newly created. + +We can do better than this though. Not all new objects will be garbage. +Consider the heap at the end of the scavenge with `L1` live objects and `G1` +garbage. Also, note that `T == M + I` where `M` is the number of objects marked +as reachable and `I` is the number of objects visited in increments. +Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. + +If we choose the amount of work done such that `2*M + I == 6N` then we can do +less work in most cases, but are still guaranteed to keep up. +Since `I ≳ G0 + G1` (not strictly true, but close enough) +`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. + +The reason that this improves performance is that `M` is usually much larger +than `I`. If `M == 10I`, then `T ≅ 3N`. + +Finally, instead of using a fixed multiple of 8, we gradually increase it as the +heap grows. This avoids wasting work for small heaps and during startup. + Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b5140057a69d36..baf8e95dffdfce 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,27 +1161,19 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(20_000): + iterations = max(20_000, initial_heap_size) + for i in range(iterations): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1c9f86438dadc3..de518b8dc5024b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7064,9 +7064,7 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { - return 0; - } + assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index e87f199c2504ba..33679afecb420f 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -882,25 +882,7 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static PyObject * @@ -1242,24 +1224,7 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static void @@ -1464,7 +1429,6 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) - static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1673,24 +1637,7 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; @@ -1935,16 +1882,6 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2068d6aa9be52b..cc95b9857e3f2d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2355,6 +2355,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } + +static int +plain_object_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyTypeObject *type = Py_TYPE(self); + assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); + Py_VISIT(type); + return PyObject_VisitManagedDict(self, visit, arg); +} + static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4147,6 +4157,9 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; + if (type->tp_basicsize == sizeof(PyObject)) { + type->tp_traverse = plain_object_traverse; + } } type->tp_basicsize = slotoffset; diff --git a/Python/gc.c b/Python/gc.c index 5b9588c8741b97..fd29a48518e71b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,18 +1277,13 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is + * new objects (N) to the heap, and incremental collectors + * scan I objects from the old space. + * I > N must be true. We also want I > N * K to be where + * K > 2. Higher values of K mean that the old space is * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) */ - -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 +#define SCAN_RATE_DIVISOR 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1330,69 +1325,76 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -#ifndef NDEBUG -static inline int -IS_IN_VISITED(PyGC_Head *gc, int visited_space) +typedef struct work_stack { + PyGC_Head *top; + int visited_space; +} WorkStack; + +/* Remove gc from the list it is currently in and push it to the stack */ +static inline void +push_to_stack(PyGC_Head *gc, WorkStack *stack) { - assert(visited_space == 0 || other_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = GC_NEXT(gc); + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + _PyGCHead_SET_PREV(gc, stack->top); + stack->top = gc; } -#endif -struct container_and_flag { - PyGC_Head *container; - int visited_space; - intptr_t size; -}; +static inline PyGC_Head * +pop_from_stack(WorkStack *stack) +{ + PyGC_Head *gc = stack->top; + stack->top = _PyGCHead_PREV(gc); + return gc; +} -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) +/* append list `from` to `stack`; `from` becomes an empty list */ +static void +move_list_to_stack(PyGC_Head *from, WorkStack *stack) { - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + if (!gc_list_is_empty(from)) { + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); + _PyGCHead_SET_PREV(from_head, stack->top); + stack->top = from_tail; + gc_list_init(from); + } +} + +static inline void +move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +{ + assert(op != NULL); + if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { + gc_old_space(gc) != visited_space) { + assert(!_Py_IsImmortal(op)); gc_flip_old_space(gc); - gc_list_move(gc, cf->container); - cf->size++; + push_to_stack(gc, stack); } } - return 0; } -static intptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - .size = 0 - }; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(IS_IN_VISITED(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; - } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); - } - return arg.size; +static void +move_unvisited(PyObject *op, WorkStack *stack, int visited_space) +{ + move_to_stack(op, stack, visited_space); +} + +#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) + +/* A traversal callback for adding to container */ +static int +visit_add_to_container(PyObject *op, void *arg) +{ + OBJECT_STAT_INC(object_visits); + WorkStack *stack = (WorkStack *)arg; + assert(stack->visited_space == get_gc_state()->visited_space); + move_to_stack(op, stack, stack->visited_space); + return 0; } /* Do bookkeeping for a completed GC cycle */ @@ -1420,54 +1422,62 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); - gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static intptr_t -move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) -{ - if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, reachable); - return 1; +static void +frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) +{ + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + if (frame->f_locals != NULL) { + move_unvisited(frame->f_locals, stack, visited_space); + } + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + move_unvisited(func, stack, visited_space); + while (sp > locals) { + sp--; + _PyStackRef ref = *sp; + if (!PyStackRef_IsNull(ref)) { + PyObject *op = PyStackRef_AsPyObjectBorrow(ref); + if (!_Py_IsImmortal(op)) { + move_unvisited(op, stack, visited_space); + } } } - return 0; } -static intptr_t -mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +static Py_ssize_t +move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - struct container_and_flag arg = { - .container = reachable, - .visited_space = visited_space, - .size = 0 - }; - while (!gc_list_is_empty(reachable)) { - PyGC_Head *gc = _PyGCHead_NEXT(reachable); + Py_ssize_t objects_marked = 0; + while (stack->top != NULL) { + PyGC_Head *gc = pop_from_stack(stack); assert(gc_old_space(gc) == visited_space); - gc_list_move(gc, visited); + gc_list_append(gc, visited); + objects_marked++; PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); + assert(PyObject_IS_GC(op)); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + _PyObject_GC_UNTRACK(op); + } + else { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, visit_add_to_container, stack); + } } gc_list_validate_space(visited, visited_space); - return arg.size; + return objects_marked; } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1480,27 +1490,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, &reachable, visited_space); - while (sp > locals) { - sp--; - if (PyStackRef_IsNull(*sp)) { - continue; - } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - objects_marked++; - gc_list_move(gc, &reachable); - } - } - } + frame_move_unvisited(frame, &stack, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1513,31 +1503,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; - objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); - objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); - objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; + MOVE_UNVISITED(interp->sysdict, &stack, visited_space); + MOVE_UNVISITED(interp->builtins, &stack, visited_space); + MOVE_UNVISITED(interp->dict, &stack, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } @@ -1549,39 +1539,35 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); - gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; validate_spaces(gcstate); return objects_marked; } + +/* See InternalDocs/garbage_collector.md for more details. */ +#define MAX_HEAP_PORTION_MULTIPLIER 5 +#define MARKING_PROGRESS_MULTIPLIER 2 + static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on three things. + /* The amount of work we want to do depends on two things. * 1. The number of new objects created - * 2. The growth in heap size since the last collection - * 3. The heap size (up to the number of new objects, to avoid quadratic effects) - * - * For a steady state heap, the amount of work to do is three times the number - * of new objects added to the heap. This ensures that we stay ahead in the - * worst case of all new objects being garbage. - * - * This could be improved by tracking survival rates, but it is still a - * large improvement on the non-marking approach. + * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*3/2; - intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_fraction > max_heap_fraction) { - heap_fraction = max_heap_fraction; + intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; + intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_portion > max_heap_portion) { + heap_portion = max_heap_portion; } gcstate->young.count = 0; - return new_objects + heap_fraction; + return new_objects + heap_portion; } static void @@ -1594,36 +1580,37 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); - int scale_factor = gcstate->old[0].threshold; - if (scale_factor < 2) { - scale_factor = 2; - } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - gc_list_merge(&gcstate->young.head, &increment); + PyGC_Head increment; + gc_list_init(&increment); + WorkStack working; + working.top = 0; + working.visited_space = gcstate->visited_space; + move_list_to_stack(&gcstate->young.head, &working); + Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = gc_list_size(&increment); + assert(working.top == NULL); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - increment_size++; - assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); + push_to_stack(gc, &working); + assert(!_Py_IsImmortal(FROM_GC(gc))); + increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + assert(working.top == NULL); } + assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1632,7 +1619,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1668,6 +1654,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); + gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From 77a61c0465c27c1c4ba7cddf4638d9ed75259671 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Fri, 6 Dec 2024 23:09:20 +0900 Subject: [PATCH 10/14] gh-101100: amend references starting with `!~` in gh-127054 (#127684) --- Doc/tutorial/datastructures.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 263b0c2e2815a1..cbe780e075baf5 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -142,8 +142,8 @@ Using Lists as Stacks The list methods make it very easy to use a list as a stack, where the last element added is the first element retrieved ("last-in, first-out"). To add an -item to the top of the stack, use :meth:`!~list.append`. To retrieve an item from the -top of the stack, use :meth:`!~list.pop` without an explicit index. For example:: +item to the top of the stack, use :meth:`!append`. To retrieve an item from the +top of the stack, use :meth:`!pop` without an explicit index. For example:: >>> stack = [3, 4, 5] >>> stack.append(6) @@ -340,7 +340,7 @@ The :keyword:`!del` statement ============================= There is a way to remove an item from a list given its index instead of its -value: the :keyword:`del` statement. This differs from the :meth:`!~list.pop` method +value: the :keyword:`del` statement. This differs from the :meth:`!pop` method which returns a value. The :keyword:`!del` statement can also be used to remove slices from a list or clear the entire list (which we did earlier by assignment of an empty list to the slice). For example:: @@ -500,8 +500,8 @@ any immutable type; strings and numbers can always be keys. Tuples can be used as keys if they contain only strings, numbers, or tuples; if a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. You can't use lists as keys, since lists can be modified in place using index -assignments, slice assignments, or methods like :meth:`!~list.append` and -:meth:`!~list.extend`. +assignments, slice assignments, or methods like :meth:`!append` and +:meth:`!extend`. It is best to think of a dictionary as a set of *key: value* pairs, with the requirement that the keys are unique (within one dictionary). A pair of From 36c6178d372b075e9c74b786cfb5e47702976b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:31:30 +0100 Subject: [PATCH 11/14] gh-126024: fix UBSan failure in `unicodeobject.c:find_first_nonascii` (GH-127566) --- Objects/unicodeobject.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 463da06445984b..33c4747bbef488 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5083,12 +5083,9 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); #if PY_LITTLE_ENDIAN && HAVE_CTZ if (p < p2) { -#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) - // x86 and amd64 are little endian and can load unaligned memory. - size_t u = *(const size_t*)p & ASCII_CHAR_MASK; -#else - size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK; -#endif + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; if (u) { return (ctz(u) - 7) / 8; } From a353455fca1b8f468ff3ffbb4b5e316510b4fd43 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 6 Dec 2024 15:48:24 +0000 Subject: [PATCH 12/14] gh-125610: Fix `STORE_ATTR_INSTANCE_VALUE` specialization check (GH-125612) The `STORE_ATTR_INSTANCE_VALUE` opcode doesn't support objects with non-NULL managed dictionaries, so don't specialize to that op in that case. --- Python/specialize.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index ec2cd7025e5054..d3fea717243847 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -947,7 +947,10 @@ specialize_dict_access( return 0; } _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); - if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && _PyObject_InlineValues(owner)->valid) { + if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && + _PyObject_InlineValues(owner)->valid && + !(base_op == STORE_ATTR && _PyObject_GetManagedDict(owner) != NULL)) + { PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; assert(PyUnicode_CheckExact(name)); Py_ssize_t index = _PyDictKeys_StringLookup(keys, name); From 12680ec5bd45c85b6daebe0739d30ef45f089efa Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Fri, 6 Dec 2024 10:58:19 -0500 Subject: [PATCH 13/14] gh-127314: Don't mention the GIL when calling without a thread state on the free-threaded build (#127315) Co-authored-by: Victor Stinner --- Include/internal/pycore_pystate.h | 8 ++++++++ Lib/test/test_capi/test_mem.py | 9 +++++++-- Lib/test/test_capi/test_misc.py | 17 ++++++++++++----- ...24-11-26-22-06-10.gh-issue-127314.SsRrIu.rst | 2 ++ Objects/obmalloc.c | 7 +++++++ 5 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 54d8803bc0bdb6..1e73e541ef8de0 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -190,10 +190,18 @@ static inline void _Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate) { if (tstate == NULL) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "the function must be called with the GIL held, " "after Python initialization and before Python finalization, " "but the GIL is released (the current Python thread state is NULL)"); +#else + _Py_FatalErrorFunc(func, + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } diff --git a/Lib/test/test_capi/test_mem.py b/Lib/test/test_capi/test_mem.py index 6ab7b685c2e18b..5035b2b4829bf6 100644 --- a/Lib/test/test_capi/test_mem.py +++ b/Lib/test/test_capi/test_mem.py @@ -68,8 +68,13 @@ def test_api_misuse(self): def check_malloc_without_gil(self, code): out = self.check(code) - expected = ('Fatal Python error: _PyMem_DebugMalloc: ' - 'Python memory allocator called without holding the GIL') + if not support.Py_GIL_DISABLED: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without holding the GIL') + else: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without an active thread state. ' + 'Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?') self.assertIn(expected, out) def test_pymem_malloc_without_gil(self): diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 8e0271919cc8a5..61512e610f46f2 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -100,11 +100,18 @@ def test_no_FatalError_infinite_loop(self): _rc, out, err = run_result self.assertEqual(out, b'') # This used to cause an infinite loop. - msg = ("Fatal Python error: PyThreadState_Get: " - "the function must be called with the GIL held, " - "after Python initialization and before Python finalization, " - "but the GIL is released " - "(the current Python thread state is NULL)").encode() + if not support.Py_GIL_DISABLED: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with the GIL held, " + "after Python initialization and before Python finalization, " + "but the GIL is released " + "(the current Python thread state is NULL)").encode() + else: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?").encode() self.assertTrue(err.rstrip().startswith(msg), err) diff --git a/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst new file mode 100644 index 00000000000000..8ea3c4ee2a2c53 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst @@ -0,0 +1,2 @@ +Improve error message when calling the C API without an active thread state +on the :term:`free-threaded ` build. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2cc0377f68f990..b103deb01ca712 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2910,9 +2910,16 @@ static inline void _PyMem_DebugCheckGIL(const char *func) { if (!PyGILState_Check()) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "Python memory allocator called " "without holding the GIL"); +#else + _Py_FatalErrorFunc(func, + "Python memory allocator called " + "without an active thread state. " + "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } From 67b18a18b66b89e253f38895057ef9f6bae92e7b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 6 Dec 2024 17:27:12 +0100 Subject: [PATCH 14/14] gh-59705: Add _thread.set_name() function (#127338) On Linux, threading.Thread now sets the thread name to the operating system. * configure now checks if pthread_getname_np() and pthread_setname_np() functions are available. * Add PYTHREAD_NAME_MAXLEN macro. * Add _thread._NAME_MAXLEN constant for test_threading. Co-authored-by: Serhiy Storchaka --- Lib/test/test_threading.py | 60 ++++++++++ Lib/threading.py | 9 ++ ...4-11-27-17-04-38.gh-issue-59705.sAGyvs.rst | 2 + Modules/_threadmodule.c | 108 ++++++++++++++++++ Modules/clinic/_threadmodule.c.h | 104 +++++++++++++++++ configure | 30 +++++ configure.ac | 22 +++- pyconfig.h.in | 9 ++ 8 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst create mode 100644 Modules/clinic/_threadmodule.c.h diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index fe225558fc4f0b..d05161f46f1034 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2104,6 +2104,66 @@ def test__all__(self): support.check__all__(self, threading, ('threading', '_thread'), extra=extra, not_exported=not_exported) + @unittest.skipUnless(hasattr(_thread, 'set_name'), "missing _thread.set_name") + @unittest.skipUnless(hasattr(_thread, '_get_name'), "missing _thread._get_name") + def test_set_name(self): + # set_name() limit in bytes + truncate = getattr(_thread, "_NAME_MAXLEN", None) + limit = truncate or 100 + + tests = [ + # test short ASCII name + "CustomName", + + # test short non-ASCII name + "namé€", + + # embedded null character: name is truncated + # at the first null character + "embed\0null", + + # Test long ASCII names (not truncated) + "x" * limit, + + # Test long ASCII names (truncated) + "x" * (limit + 10), + + # Test long non-ASCII name (truncated) + "x" * (limit - 1) + "é€", + ] + if os_helper.FS_NONASCII: + tests.append(f"nonascii:{os_helper.FS_NONASCII}") + if os_helper.TESTFN_UNENCODABLE: + tests.append(os_helper.TESTFN_UNENCODABLE) + + if sys.platform.startswith("solaris"): + encoding = "utf-8" + else: + encoding = sys.getfilesystemencoding() + + def work(): + nonlocal work_name + work_name = _thread._get_name() + + for name in tests: + encoded = name.encode(encoding, "replace") + if b'\0' in encoded: + encoded = encoded.split(b'\0', 1)[0] + if truncate is not None: + encoded = encoded[:truncate] + if sys.platform.startswith("solaris"): + expected = encoded.decode("utf-8", "surrogateescape") + else: + expected = os.fsdecode(encoded) + + with self.subTest(name=name, expected=expected): + work_name = None + thread = threading.Thread(target=work, name=name) + thread.start() + thread.join() + self.assertEqual(work_name, expected, + f"{len(work_name)=} and {len(expected)=}") + class InterruptMainTests(unittest.TestCase): def check_interrupt_main_with_signal_handler(self, signum): diff --git a/Lib/threading.py b/Lib/threading.py index 94ea2f08178369..3abd22a2aa1b72 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -48,6 +48,10 @@ __all__.append('get_native_id') except AttributeError: _HAVE_THREAD_NATIVE_ID = False +try: + _set_name = _thread.set_name +except AttributeError: + _set_name = None ThreadError = _thread.error try: _CRLock = _thread.RLock @@ -1027,6 +1031,11 @@ def _bootstrap_inner(self): self._set_ident() if _HAVE_THREAD_NATIVE_ID: self._set_native_id() + if _set_name is not None and self._name: + try: + _set_name(self._name) + except OSError: + pass self._started.set() with _active_limbo_lock: _active[self._ident] = self diff --git a/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst new file mode 100644 index 00000000000000..a8c7b3d00755e6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst @@ -0,0 +1,2 @@ +On Linux, :class:`threading.Thread` now sets the thread name to the +operating system. Patch by Victor Stinner. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 4a45445e2f62db..35c032fbeaa94f 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -17,6 +17,8 @@ # include // SIGINT #endif +#include "clinic/_threadmodule.c.h" + // ThreadError is just an alias to PyExc_RuntimeError #define ThreadError PyExc_RuntimeError @@ -44,6 +46,13 @@ get_thread_state(PyObject *module) return (thread_module_state *)state; } + +/*[clinic input] +module _thread +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=be8dbe5cc4b16df7]*/ + + // _ThreadHandle type // Handles state transitions according to the following diagram: @@ -2354,6 +2363,96 @@ PyDoc_STRVAR(thread__get_main_thread_ident_doc, Internal only. Return a non-zero integer that uniquely identifies the main thread\n\ of the main interpreter."); + +#ifdef HAVE_PTHREAD_GETNAME_NP +/*[clinic input] +_thread._get_name + +Get the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread__get_name_impl(PyObject *module) +/*[clinic end generated code: output=20026e7ee3da3dd7 input=35cec676833d04c8]*/ +{ + // Linux and macOS are limited to respectively 16 and 64 bytes + char name[100]; + pthread_t thread = pthread_self(); + int rc = pthread_getname_np(thread, name, Py_ARRAY_LENGTH(name)); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + +#ifdef __sun + return PyUnicode_DecodeUTF8(name, strlen(name), "surrogateescape"); +#else + return PyUnicode_DecodeFSDefault(name); +#endif +} +#endif // HAVE_PTHREAD_GETNAME_NP + + +#ifdef HAVE_PTHREAD_SETNAME_NP +/*[clinic input] +_thread.set_name + + name as name_obj: unicode + +Set the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj) +/*[clinic end generated code: output=402b0c68e0c0daed input=7e7acd98261be82f]*/ +{ +#ifdef __sun + // Solaris always uses UTF-8 + const char *encoding = "utf-8"; +#else + // Encode the thread name to the filesystem encoding using the "replace" + // error handler + PyInterpreterState *interp = _PyInterpreterState_GET(); + const char *encoding = interp->unicode.fs_codec.encoding; +#endif + PyObject *name_encoded; + name_encoded = PyUnicode_AsEncodedString(name_obj, encoding, "replace"); + if (name_encoded == NULL) { + return NULL; + } + +#ifdef PYTHREAD_NAME_MAXLEN + // Truncate to PYTHREAD_NAME_MAXLEN bytes + the NUL byte if needed + size_t len = PyBytes_GET_SIZE(name_encoded); + if (len > PYTHREAD_NAME_MAXLEN) { + PyObject *truncated; + truncated = PyBytes_FromStringAndSize(PyBytes_AS_STRING(name_encoded), + PYTHREAD_NAME_MAXLEN); + if (truncated == NULL) { + Py_DECREF(name_encoded); + return NULL; + } + Py_SETREF(name_encoded, truncated); + } +#endif + + const char *name = PyBytes_AS_STRING(name_encoded); +#ifdef __APPLE__ + int rc = pthread_setname_np(name); +#else + pthread_t thread = pthread_self(); + int rc = pthread_setname_np(thread, name); +#endif + Py_DECREF(name_encoded); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + Py_RETURN_NONE; +} +#endif // HAVE_PTHREAD_SETNAME_NP + + static PyMethodDef thread_methods[] = { {"start_new_thread", (PyCFunction)thread_PyThread_start_new_thread, METH_VARARGS, start_new_thread_doc}, @@ -2393,6 +2492,8 @@ static PyMethodDef thread_methods[] = { METH_O, thread__make_thread_handle_doc}, {"_get_main_thread_ident", thread__get_main_thread_ident, METH_NOARGS, thread__get_main_thread_ident_doc}, + _THREAD_SET_NAME_METHODDEF + _THREAD__GET_NAME_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2484,6 +2585,13 @@ thread_module_exec(PyObject *module) llist_init(&state->shutdown_handles); +#ifdef PYTHREAD_NAME_MAXLEN + if (PyModule_AddIntConstant(module, "_NAME_MAXLEN", + PYTHREAD_NAME_MAXLEN) < 0) { + return -1; + } +#endif + return 0; } diff --git a/Modules/clinic/_threadmodule.c.h b/Modules/clinic/_threadmodule.c.h new file mode 100644 index 00000000000000..8f0507d40285b3 --- /dev/null +++ b/Modules/clinic/_threadmodule.c.h @@ -0,0 +1,104 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +#if defined(HAVE_PTHREAD_GETNAME_NP) + +PyDoc_STRVAR(_thread__get_name__doc__, +"_get_name($module, /)\n" +"--\n" +"\n" +"Get the name of the current thread."); + +#define _THREAD__GET_NAME_METHODDEF \ + {"_get_name", (PyCFunction)_thread__get_name, METH_NOARGS, _thread__get_name__doc__}, + +static PyObject * +_thread__get_name_impl(PyObject *module); + +static PyObject * +_thread__get_name(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return _thread__get_name_impl(module); +} + +#endif /* defined(HAVE_PTHREAD_GETNAME_NP) */ + +#if defined(HAVE_PTHREAD_SETNAME_NP) + +PyDoc_STRVAR(_thread_set_name__doc__, +"set_name($module, /, name)\n" +"--\n" +"\n" +"Set the name of the current thread."); + +#define _THREAD_SET_NAME_METHODDEF \ + {"set_name", _PyCFunction_CAST(_thread_set_name), METH_FASTCALL|METH_KEYWORDS, _thread_set_name__doc__}, + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj); + +static PyObject * +_thread_set_name(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_name", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *name_obj; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("set_name", "argument 'name'", "str", args[0]); + goto exit; + } + name_obj = args[0]; + return_value = _thread_set_name_impl(module, name_obj); + +exit: + return return_value; +} + +#endif /* defined(HAVE_PTHREAD_SETNAME_NP) */ + +#ifndef _THREAD__GET_NAME_METHODDEF + #define _THREAD__GET_NAME_METHODDEF +#endif /* !defined(_THREAD__GET_NAME_METHODDEF) */ + +#ifndef _THREAD_SET_NAME_METHODDEF + #define _THREAD_SET_NAME_METHODDEF +#endif /* !defined(_THREAD_SET_NAME_METHODDEF) */ +/*[clinic end generated code: output=b5cb85aaccc45bf6 input=a9049054013a1b77]*/ diff --git a/configure b/configure index 5e9bcb602d884e..bcbab8dfcff190 100755 --- a/configure +++ b/configure @@ -821,6 +821,7 @@ MODULE_TIME_TRUE MODULE__IO_FALSE MODULE__IO_TRUE MODULE_BUILDTYPE +PYTHREAD_NAME_MAXLEN TEST_MODULES OPENSSL_LDFLAGS OPENSSL_LIBS @@ -18841,6 +18842,18 @@ if test "x$ac_cv_func_pthread_kill" = xyes then : printf "%s\n" "#define HAVE_PTHREAD_KILL 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "pthread_getname_np" "ac_cv_func_pthread_getname_np" +if test "x$ac_cv_func_pthread_getname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_GETNAME_NP 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "pthread_setname_np" "ac_cv_func_pthread_setname_np" +if test "x$ac_cv_func_pthread_setname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_SETNAME_NP 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "ptsname" "ac_cv_func_ptsname" if test "x$ac_cv_func_ptsname" = xyes @@ -29081,6 +29094,23 @@ fi CPPFLAGS=$save_CPPFLAGS +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + +printf "%s\n" "#define PYTHREAD_NAME_MAXLEN $PYTHREAD_NAME_MAXLEN" >>confdefs.h + +fi + + + # stdlib diff --git a/configure.ac b/configure.ac index bf3685e1b1b209..922a125ea9608e 100644 --- a/configure.ac +++ b/configure.ac @@ -5110,8 +5110,10 @@ AC_CHECK_FUNCS([ \ mknod mknodat mktime mmap mremap nice openat opendir pathconf pause pipe \ pipe2 plock poll posix_fadvise posix_fallocate posix_openpt posix_spawn posix_spawnp \ posix_spawn_file_actions_addclosefrom_np \ - pread preadv preadv2 process_vm_readv pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ - pthread_kill ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ + pread preadv preadv2 process_vm_readv \ + pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ + pthread_kill pthread_getname_np pthread_setname_np \ + ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ sched_setparam sched_setscheduler sem_clockwait sem_getvalue sem_open \ sem_timedwait sem_unlink sendfile setegid seteuid setgid sethostname \ @@ -7498,6 +7500,22 @@ AS_VAR_IF([ac_cv_libatomic_needed], [yes], _RESTORE_VAR([CPPFLAGS]) +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + AC_DEFINE_UNQUOTED([PYTHREAD_NAME_MAXLEN], [$PYTHREAD_NAME_MAXLEN], + [Maximum length in bytes of a thread name]) +fi +AC_SUBST([PYTHREAD_NAME_MAXLEN]) + + # stdlib AC_DEFUN([PY_STDLIB_MOD_SET_NA], [ m4_foreach([mod], [$@], [ diff --git a/pyconfig.h.in b/pyconfig.h.in index 6a1f1284650b9f..166c195a8c66fc 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -981,6 +981,9 @@ /* Define to 1 if you have the `pthread_getcpuclockid' function. */ #undef HAVE_PTHREAD_GETCPUCLOCKID +/* Define to 1 if you have the `pthread_getname_np' function. */ +#undef HAVE_PTHREAD_GETNAME_NP + /* Define to 1 if you have the header file. */ #undef HAVE_PTHREAD_H @@ -990,6 +993,9 @@ /* Define to 1 if you have the `pthread_kill' function. */ #undef HAVE_PTHREAD_KILL +/* Define to 1 if you have the `pthread_setname_np' function. */ +#undef HAVE_PTHREAD_SETNAME_NP + /* Define to 1 if you have the `pthread_sigmask' function. */ #undef HAVE_PTHREAD_SIGMASK @@ -1650,6 +1656,9 @@ /* Define as the preferred size in bits of long digits */ #undef PYLONG_BITS_IN_DIGIT +/* Maximum length in bytes of a thread name */ +#undef PYTHREAD_NAME_MAXLEN + /* enabled builtin hash modules */ #undef PY_BUILTIN_HASHLIB_HASHES