Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-115999: Add free-threaded specialization for TO_BOOL #126616

Merged
merged 17 commits into from
Nov 21, 2024
Merged
10 changes: 10 additions & 0 deletions Include/internal/pycore_typeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,16 @@ extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp);
PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version);
PyTypeObject *_PyType_LookupByVersion(unsigned int version);

// Function pointer type for user-defined validation function that will be
// called by _PyType_Validate().
// It should return 0 if the validation is passed, otherwise it will return -1.
typedef int (*_py_validate_type)(PyTypeObject *);

// It will verify the ``ty`` through user-defined validation function ``validate``,
// and if the validation is passed, it will set the ``tp_version`` as valid
// tp_version_tag from the ``ty``.
extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version);

#ifdef __cplusplus
}
#endif
Expand Down
66 changes: 66 additions & 0 deletions Lib/test/test_opcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,72 @@ def g():
self.assert_specialized(g, "CONTAINS_OP_SET")
self.assert_no_opcode(g, "CONTAINS_OP")

@cpython_only
@requires_specialization_ft
def test_to_bool(self):
def to_bool_bool():
true_cnt, false_cnt = 0, 0
elems = [e % 2 == 0 for e in range(100)]
for e in elems:
if e:
true_cnt += 1
else:
false_cnt += 1
self.assertEqual(true_cnt, 50)
self.assertEqual(false_cnt, 50)

to_bool_bool()
self.assert_specialized(to_bool_bool, "TO_BOOL_BOOL")
self.assert_no_opcode(to_bool_bool, "TO_BOOL")

def to_bool_int():
count = 0
for i in range(100):
if i:
count += 1
else:
count -= 1
self.assertEqual(count, 98)

to_bool_int()
self.assert_specialized(to_bool_int, "TO_BOOL_INT")
self.assert_no_opcode(to_bool_int, "TO_BOOL")

def to_bool_list():
count = 0
elems = [1, 2, 3]
while elems:
count += elems.pop()
self.assertEqual(elems, [])
self.assertEqual(count, 6)

to_bool_list()
self.assert_specialized(to_bool_list, "TO_BOOL_LIST")
self.assert_no_opcode(to_bool_list, "TO_BOOL")

def to_bool_none():
count = 0
elems = [None, None, None, None]
for e in elems:
if not e:
count += 1
self.assertEqual(count, len(elems))

to_bool_none()
self.assert_specialized(to_bool_none, "TO_BOOL_NONE")
self.assert_no_opcode(to_bool_none, "TO_BOOL")

def to_bool_str():
count = 0
elems = ["", "foo", ""]
for e in elems:
if e:
count += 1
self.assertEqual(count, 1)

to_bool_str()
self.assert_specialized(to_bool_str, "TO_BOOL_STR")
self.assert_no_opcode(to_bool_str, "TO_BOOL")


if __name__ == "__main__":
Expand Down
18 changes: 18 additions & 0 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5645,6 +5645,24 @@ _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags)
END_TYPE_LOCK();
}

int
_PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version)
{
int err;
BEGIN_TYPE_LOCK();
err = validate(ty);
if (!err) {
if(assign_version_tag(_PyInterpreterState_GET(), ty)) {
*tp_version = ty->tp_version_tag;
}
else {
err = -1;
}
}
END_TYPE_LOCK();
return err;
}

static void
set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags)
{
Expand Down
6 changes: 3 additions & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,15 +391,15 @@ dummy_func(
};

specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
OPCODE_DEFERRED_INC(TO_BOOL);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}

op(_TO_BOOL, (value -- res)) {
Expand Down Expand Up @@ -435,7 +435,7 @@ dummy_func(
PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
EXIT_IF(!PyList_CheckExact(value_o));
STAT_INC(TO_BOOL, hit);
res = Py_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
res = PyList_GET_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
DECREF_INPUTS();
}

Expand Down
2 changes: 1 addition & 1 deletion Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

129 changes: 67 additions & 62 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -2665,101 +2665,106 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr)
cache->counter = adaptive_counter_cooldown();
}

#ifdef Py_STATS
static int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be surrounded by #ifdef Py_STATS. We don't want to be calling this unless stats are enabled.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mpage
I have same concerns with this change, this change was first introduced at #126414, do we need to make it as dummy function for the default build?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the performance overhead of calling this will matter, but if it does, we could replace it with a dummy function that returns a special sentinel value indicating that stats are disabled.

Copy link
Contributor

@mpage mpage Nov 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both clang (18.18) and GCC (11.5.0) eliminate the call to to_bool_fail_kind if stats are disabled because the result of the call is unused.

to_bool_fail_kind(PyObject *value)
{
if (PyByteArray_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_BYTEARRAY;
}
if (PyBytes_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_BYTES;
}
if (PyDict_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_DICT;
}
if (PyFloat_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_FLOAT;
}
if (PyMemoryView_Check(value)) {
return SPEC_FAIL_TO_BOOL_MEMORY_VIEW;
}
if (PyAnySet_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_SET;
}
if (PyTuple_CheckExact(value)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not for this PR to address, but why is the tuple type handled here? I see no specialization for tuple (or did I miss something?). Tuple is immutable, so it should be relatively easy to add

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, we added specializaiton based on metric that is measured at https://github.com/faster-cpython/benchmarking-public.
I am not sure how many tuple cases are actually existed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that tuple case is easy to add but that would be based on how many boolean operations are existed for the tuple at the real world workload.

return SPEC_FAIL_TO_BOOL_TUPLE;
}
return SPEC_FAIL_OTHER;
}
#endif // Py_STATS

static int
check_type_always_true(PyTypeObject *ty)
{
PyNumberMethods *nb = ty->tp_as_number;
if (nb && nb->nb_bool) {
return SPEC_FAIL_TO_BOOL_NUMBER;
}
PyMappingMethods *mp = ty->tp_as_mapping;
if (mp && mp->mp_length) {
return SPEC_FAIL_TO_BOOL_MAPPING;
}
PySequenceMethods *sq = ty->tp_as_sequence;
if (sq && sq->sq_length) {
return SPEC_FAIL_TO_BOOL_SEQUENCE;
}
return 0;
}

void
_Py_Specialize_ToBool(_PyStackRef value_o, _Py_CODEUNIT *instr)
{
assert(ENABLE_SPECIALIZATION);
assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[TO_BOOL] == INLINE_CACHE_ENTRIES_TO_BOOL);
_PyToBoolCache *cache = (_PyToBoolCache *)(instr + 1);
PyObject *value = PyStackRef_AsPyObjectBorrow(value_o);
uint8_t specialized_op;
if (PyBool_Check(value)) {
instr->op.code = TO_BOOL_BOOL;
specialized_op = TO_BOOL_BOOL;
goto success;
}
if (PyLong_CheckExact(value)) {
instr->op.code = TO_BOOL_INT;
specialized_op = TO_BOOL_INT;
goto success;
}
if (PyList_CheckExact(value)) {
instr->op.code = TO_BOOL_LIST;
specialized_op = TO_BOOL_LIST;
goto success;
}
if (Py_IsNone(value)) {
instr->op.code = TO_BOOL_NONE;
specialized_op = TO_BOOL_NONE;
goto success;
}
if (PyUnicode_CheckExact(value)) {
instr->op.code = TO_BOOL_STR;
specialized_op = TO_BOOL_STR;
goto success;
}
if (PyType_HasFeature(Py_TYPE(value), Py_TPFLAGS_HEAPTYPE)) {
PyNumberMethods *nb = Py_TYPE(value)->tp_as_number;
if (nb && nb->nb_bool) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_NUMBER);
goto failure;
}
PyMappingMethods *mp = Py_TYPE(value)->tp_as_mapping;
if (mp && mp->mp_length) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MAPPING);
goto failure;
}
PySequenceMethods *sq = Py_TYPE(value)->tp_as_sequence;
if (sq && sq->sq_length) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SEQUENCE);
goto failure;
}
if (!PyUnstable_Type_AssignVersionTag(Py_TYPE(value))) {
unsigned int version = 0;
int err = _PyType_Validate(Py_TYPE(value), check_type_always_true, &version);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be adding a fair bit of overhead and indirection.
Would it be sufficient to set the version number before performing the other checks, and check that it hasn't changed at the end? The extra check would be benign in the default build.

Copy link
Contributor

@mpage mpage Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be adding a fair bit of overhead and indirection.

This adds one extra function call (the call to check_type_always_true).

Would it be sufficient to set the version number before performing the other checks, and check that it hasn't changed at the end?

This might be possible, but would require using something like a sequence lock to do correctly, which is more complicated and harder to reason about than a solution that acquires the lock around the validation that needs to be performed.

if (err < 0) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OUT_OF_VERSIONS);
goto failure;
}
uint32_t version = type_get_version(Py_TYPE(value), TO_BOOL);
if (version == 0) {
else if (err > 0) {
SPECIALIZATION_FAIL(TO_BOOL, err);
goto failure;
}
instr->op.code = TO_BOOL_ALWAYS_TRUE;
write_u32(cache->version, version);

assert(err == 0);
assert(version);
write_u32(cache->version, version);
specialized_op = TO_BOOL_ALWAYS_TRUE;
goto success;
}
#ifdef Py_STATS
if (PyByteArray_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTEARRAY);
goto failure;
}
if (PyBytes_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTES);
goto failure;
}
if (PyDict_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_DICT);
goto failure;
}
if (PyFloat_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_FLOAT);
goto failure;
}
if (PyMemoryView_Check(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MEMORY_VIEW);
goto failure;
}
if (PyAnySet_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SET);
goto failure;
}
if (PyTuple_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_TUPLE);
goto failure;
}
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OTHER);
#endif // Py_STATS

SPECIALIZATION_FAIL(TO_BOOL, to_bool_fail_kind(value));
failure:
STAT_INC(TO_BOOL, failure);
instr->op.code = TO_BOOL;
cache->counter = adaptive_counter_backoff(cache->counter);
unspecialize(instr);
return;
success:
STAT_INC(TO_BOOL, success);
cache->counter = adaptive_counter_cooldown();
specialize(instr, specialized_op);
}

#ifdef Py_STATS
Expand Down
Loading