Skip to content

Commit

Permalink
Initialize codec registry eagerly, greatly simplifying things
Browse files Browse the repository at this point in the history
  • Loading branch information
swtaarrs committed Apr 15, 2024
1 parent d8bcdd8 commit 8076032
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 91 deletions.
13 changes: 6 additions & 7 deletions Include/internal/pycore_codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ extern "C" {

#include "pycore_lock.h" // PyMutex

/* Initialize codecs-related state for the given interpreter. Must be called
before any other _PyCodec* functions, and while only one thread is
active. */
extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp);

extern PyObject* _PyCodec_Lookup(const char *encoding);

/* Text codec specific encoding and decoding API.
Expand Down Expand Up @@ -65,15 +70,9 @@ struct codecs_state {
#ifdef Py_GIL_DISABLED
// Used to safely delete a specific item from search_path.
PyMutex search_path_mutex;

// Used to synchronize initialization of the PyObject* members above.
PyMutex init_mutex;
#endif

// If an acquire load of initialized yields 1, all of the PyObject* members
// above will be set, and their values will not change until interpreter
// finalization. This allows common operations to freely read them without
// additional synchronization.
// Whether or not the rest of the state is initialized.
int initialized;
};

Expand Down
7 changes: 0 additions & 7 deletions Include/internal/pycore_pyatomic_ft_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,18 @@ extern "C" {
#define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value)
#define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \
_Py_atomic_load_ssize_relaxed(&value)
#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) _Py_atomic_load_int_acquire(&value)

#define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \
_Py_atomic_store_ptr_relaxed(&value, new_value)
#define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \
_Py_atomic_store_ptr_release(&value, new_value)
#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \
_Py_atomic_store_ssize_relaxed(&value, new_value)
#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) \
_Py_atomic_store_int_release(&value, new_value)
#else
#define FT_ATOMIC_LOAD_SSIZE(value) value
#define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value
#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) value

#define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value
#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) value = new_value
#endif

#ifdef __cplusplus
Expand Down
1 change: 0 additions & 1 deletion Objects/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ void
PyObject_CallFinalizer(PyObject *self)
{
PyTypeObject *tp = Py_TYPE(self);

if (tp->tp_finalize == NULL)
return;
/* tp_finalize should only be called once. */
Expand Down
6 changes: 5 additions & 1 deletion Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -15452,7 +15452,11 @@ init_fs_encoding(PyThreadState *tstate)
PyStatus
_PyUnicode_InitEncodings(PyThreadState *tstate)
{
PyStatus status = init_fs_encoding(tstate);
PyStatus status = _PyCodec_InitRegistry(tstate->interp);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
status = init_fs_encoding(tstate);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
Expand Down
105 changes: 31 additions & 74 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ Copyright (c) Corporation for National Research Initiatives.
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
#include "pycore_lock.h" // PyMutex
#include "pycore_pyatomic_ft_wrappers.h"
#include "pycore_pyerrors.h" // _PyErr_FormatNote()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
Expand All @@ -32,15 +31,10 @@ const char *Py_hexdigits = "0123456789abcdef";
*/

static int _PyCodecRegistry_EnsureInit(PyInterpreterState *); /* Forward */

int PyCodec_Register(PyObject *search_function)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyCodecRegistry_EnsureInit(interp) < 0) {
goto onError;
}
PyObject *search_path = interp->codecs.search_path;
assert(interp->codecs.initialized);
if (search_function == NULL) {
PyErr_BadArgument();
goto onError;
Expand All @@ -52,7 +46,7 @@ int PyCodec_Register(PyObject *search_function)
#ifdef Py_GIL_DISABLED
PyMutex_Lock(&interp->codecs.search_path_mutex);
#endif
int ret = PyList_Append(search_path, search_function);
int ret = PyList_Append(interp->codecs.search_path, search_function);
#ifdef Py_GIL_DISABLED
PyMutex_Unlock(&interp->codecs.search_path_mutex);
#endif
Expand All @@ -66,8 +60,9 @@ int
PyCodec_Unregister(PyObject *search_function)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
/* Do nothing if codec data structures are not created yet. */
if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 0) {
if (interp->codecs.initialized != 1) {
/* Do nothing if codecs state was cleared (only possible during
interpreter shutdown). */
return 0;
}

Expand Down Expand Up @@ -154,9 +149,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
}

PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyCodecRegistry_EnsureInit(interp) < 0) {
return NULL;
}
assert(interp->codecs.initialized);

/* Convert the encoding to a normalized Python string: all
characters are converted to lower case, spaces and hyphens are
Expand Down Expand Up @@ -623,9 +616,7 @@ PyObject *_PyCodec_DecodeText(PyObject *object,
int PyCodec_RegisterError(const char *name, PyObject *error)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyCodecRegistry_EnsureInit(interp) < 0) {
return -1;
}
assert(interp->codecs.initialized);
if (!PyCallable_Check(error)) {
PyErr_SetString(PyExc_TypeError, "handler must be callable");
return -1;
Expand All @@ -640,9 +631,7 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
PyObject *PyCodec_LookupError(const char *name)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyCodecRegistry_EnsureInit(interp) < 0) {
return NULL;
}
assert(interp->codecs.initialized);

if (name==NULL)
name = "strict";
Expand Down Expand Up @@ -1400,7 +1389,7 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogateEscapeErrors(exc);
}

static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp)
PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp)
{
static struct {
const char *name;
Expand Down Expand Up @@ -1488,74 +1477,42 @@ static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp)
}
};

if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 1) {
return 0;
assert(interp->codecs.initialized == 0);
interp->codecs.search_path = PyList_New(0);
if (interp->codecs.search_path == NULL) {
return PyStatus_NoMemory();
}

PyObject *search_path = NULL, *search_cache = NULL, *error_registry = NULL;
search_path = PyList_New(0);
if (search_path == NULL) {
goto error;
interp->codecs.search_cache = PyDict_New();
if (interp->codecs.search_cache == NULL) {
return PyStatus_NoMemory();
}
search_cache = PyDict_New();
if (search_cache == NULL) {
goto error;
}
error_registry = PyDict_New();
if (error_registry == NULL) {
goto error;
interp->codecs.error_registry = PyDict_New();
if (interp->codecs.error_registry == NULL) {
return PyStatus_NoMemory();
}
for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
if (func == NULL) {
goto error;
return PyStatus_NoMemory();
}

int res = PyDict_SetItemString(error_registry, methods[i].name, func);
int res = PyDict_SetItemString(interp->codecs.error_registry,
methods[i].name, func);
Py_DECREF(func);
if (res < 0) {
goto error;
return PyStatus_Error("Failed to insert into codec error registry");
}
}

#ifdef Py_GIL_DISABLED
PyMutex_Lock(&interp->codecs.init_mutex);
#endif
int do_import = 1;
if (interp->codecs.initialized == 0) {
interp->codecs.search_path = search_path;
interp->codecs.search_cache = search_cache;
interp->codecs.error_registry = error_registry;
FT_ATOMIC_STORE_INT_RELEASE(interp->codecs.initialized, 1);
} else {
// Another thread initialized everything while we were preparing.
Py_DECREF(search_path);
Py_DECREF(search_cache);
Py_DECREF(error_registry);
do_import = 0;
}

// Importing `encodings' can execute arbitrary code and will call back into
// this module to register codec search functions. Do it once everything is
// initialized and we hold no locks. Other Python code may register other
// codecs before `encodings' is finished importing; this is true with or
// without the GIL.
#ifdef Py_GIL_DISABLED
PyMutex_Unlock(&interp->codecs.init_mutex);
#endif
interp->codecs.initialized = 1;

if (do_import) {
PyObject *mod = PyImport_ImportModule("encodings");
if (mod == NULL) {
return -1;
}
Py_DECREF(mod);
// Importing `encodings' will call back into this module to register codec
// search functions, so this is done after everything else is initialized.
PyObject *mod = PyImport_ImportModule("encodings");
if (mod == NULL) {
return PyStatus_Error("Failed to import encodings module");
}
return 0;
Py_DECREF(mod);

error:
Py_XDECREF(search_path);
Py_XDECREF(search_cache);
Py_XDECREF(error_registry);
return -1;
return PyStatus_Ok();
}
1 change: 1 addition & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
Py_CLEAR(interp->codecs.search_path);
Py_CLEAR(interp->codecs.search_cache);
Py_CLEAR(interp->codecs.error_registry);
interp->codecs.initialized = 0;

assert(interp->imports.modules == NULL);
assert(interp->imports.modules_by_index == NULL);
Expand Down
2 changes: 1 addition & 1 deletion Tools/c-analyzer/cpython/ignored.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ Python/ceval.c - _PyEval_BinaryOps -
Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
Python/codecs.c - Py_hexdigits -
Python/codecs.c - ucnhash_capi -
Python/codecs.c _PyCodecRegistry_EnsureInit methods -
Python/codecs.c _PyCodec_InitRegistry methods -
Python/compile.c - NO_LABEL -
Python/compile.c - NO_LOCATION -
Python/dynload_shlib.c - _PyImport_DynLoadFiletab -
Expand Down

0 comments on commit 8076032

Please sign in to comment.