Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-115999: Add free-threaded specialization for TO_BOOL #126616

Merged
merged 17 commits into from
Nov 21, 2024
Merged
66 changes: 66 additions & 0 deletions Lib/test/test_opcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,72 @@ def g():
self.assert_specialized(g, "CONTAINS_OP_SET")
self.assert_no_opcode(g, "CONTAINS_OP")

@cpython_only
@requires_specialization_ft
def test_to_bool(self):
def to_bool_bool():
true_cnt, false_cnt = 0, 0
elems = [e % 2 == 0 for e in range(100)]
for e in elems:
if e:
true_cnt += 1
else:
false_cnt += 1
self.assertEqual(true_cnt, 50)
self.assertEqual(false_cnt, 50)

to_bool_bool()
self.assert_specialized(to_bool_bool, "TO_BOOL_BOOL")
self.assert_no_opcode(to_bool_bool, "TO_BOOL")

def to_bool_int():
count = 0
for i in range(100):
if i:
count += 1
else:
count -= 1
self.assertEqual(count, 98)

to_bool_int()
self.assert_specialized(to_bool_int, "TO_BOOL_INT")
self.assert_no_opcode(to_bool_int, "TO_BOOL")

def to_bool_list():
count = 0
elems = [1, 2, 3]
while elems:
count += elems.pop()
self.assertEqual(elems, [])
self.assertEqual(count, 6)

to_bool_list()
self.assert_specialized(to_bool_list, "TO_BOOL_LIST")
self.assert_no_opcode(to_bool_list, "TO_BOOL")

def to_bool_none():
count = 0
elems = [None, None, None, None]
for e in elems:
if not e:
count += 1
self.assertEqual(count, len(elems))

to_bool_none()
self.assert_specialized(to_bool_none, "TO_BOOL_NONE")
self.assert_no_opcode(to_bool_none, "TO_BOOL")

def to_bool_str():
count = 0
elems = ["", "foo", ""]
for e in elems:
if e:
count += 1
self.assertEqual(count, 1)

to_bool_str()
self.assert_specialized(to_bool_str, "TO_BOOL_STR")
self.assert_no_opcode(to_bool_str, "TO_BOOL")


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,15 +391,15 @@ dummy_func(
};

specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
OPCODE_DEFERRED_INC(TO_BOOL);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}

op(_TO_BOOL, (value -- res)) {
Expand Down Expand Up @@ -435,7 +435,7 @@ dummy_func(
PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
EXIT_IF(!PyList_CheckExact(value_o));
STAT_INC(TO_BOOL, hit);
res = Py_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
res = PyList_GET_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
DECREF_INPUTS();
}

Expand Down
2 changes: 1 addition & 1 deletion Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

114 changes: 52 additions & 62 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -2663,101 +2663,91 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr)
cache->counter = adaptive_counter_cooldown();
}

static int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be surrounded by #ifdef Py_STATS. We don't want to be calling this unless stats are enabled.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mpage
I have same concerns with this change, this change was first introduced at #126414, do we need to make it as dummy function for the default build?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the performance overhead of calling this will matter, but if it does, we could replace it with a dummy function that returns a special sentinel value indicating that stats are disabled.

Copy link
Contributor

@mpage mpage Nov 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both clang (18.18) and GCC (11.5.0) eliminate the call to to_bool_fail_kind if stats are disabled because the result of the call is unused.

to_bool_fail_kind(PyObject *value)
{
if (PyByteArray_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_BYTEARRAY;
}
if (PyBytes_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_BYTES;
}
if (PyDict_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_DICT;
}
if (PyFloat_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_FLOAT;
}
if (PyMemoryView_Check(value)) {
return SPEC_FAIL_TO_BOOL_MEMORY_VIEW;
}
if (PyAnySet_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_SET;
}
if (PyTuple_CheckExact(value)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not for this PR to address, but why is the tuple type handled here? I see no specialization for tuple (or did I miss something?). Tuple is immutable, so it should be relatively easy to add

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, we added specializaiton based on metric that is measured at https://github.com/faster-cpython/benchmarking-public.
I am not sure how many tuple cases are actually existed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that tuple case is easy to add but that would be based on how many boolean operations are existed for the tuple at the real world workload.

return SPEC_FAIL_TO_BOOL_TUPLE;
}
return SPEC_FAIL_OTHER;
}

void
_Py_Specialize_ToBool(_PyStackRef value_o, _Py_CODEUNIT *instr)
{
assert(ENABLE_SPECIALIZATION);
assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[TO_BOOL] == INLINE_CACHE_ENTRIES_TO_BOOL);
_PyToBoolCache *cache = (_PyToBoolCache *)(instr + 1);
PyObject *value = PyStackRef_AsPyObjectBorrow(value_o);
if (PyBool_Check(value)) {
instr->op.code = TO_BOOL_BOOL;
goto success;
specialize(instr, TO_BOOL_BOOL);
return;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we leave the gotos, and have one call to specialize at the end?
This seems very repetitive and adds the overhead of several calls.

Copy link
Contributor

@mpage mpage Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is any more repetitive that what was previously here and is easier to reason about. It's also the same number of function calls (1) which will be inlined.

Copy link
Contributor

@mpage mpage Nov 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To elaborate a bit, it is the same amount of repetition in either case. The only difference is what is being repeated. The previous approach repeats the assignment to the opcode and the goto:

if (PyBool_Check(value)) {
    instr->op.code = TO_BOOL_BOOL;
    goto success;
}

while the new approach repeats the call to specialize and the return:

if (PyBool_Check(value)) {
    specialize(instr, TO_BOOL_BOOL);
    return;
}

The benefit of the latter approach is that the control flow is easier to follow.

Finally, we make one call to specialize and that call is inlined. As noted below, the generated code for the two versions of _Py_Specialize_ToBool is nearly identical.

}
if (PyLong_CheckExact(value)) {
instr->op.code = TO_BOOL_INT;
goto success;
specialize(instr, TO_BOOL_INT);
return;
}
if (PyList_CheckExact(value)) {
instr->op.code = TO_BOOL_LIST;
goto success;
specialize(instr, TO_BOOL_LIST);
return;
}
if (Py_IsNone(value)) {
instr->op.code = TO_BOOL_NONE;
goto success;
specialize(instr, TO_BOOL_NONE);
return;
}
if (PyUnicode_CheckExact(value)) {
instr->op.code = TO_BOOL_STR;
goto success;
specialize(instr, TO_BOOL_STR);
return;
}
if (PyType_HasFeature(Py_TYPE(value), Py_TPFLAGS_HEAPTYPE)) {
PyNumberMethods *nb = Py_TYPE(value)->tp_as_number;
if (nb && nb->nb_bool) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_NUMBER);
goto failure;
unspecialize(instr, SPEC_FAIL_TO_BOOL_NUMBER);
return;
}
PyMappingMethods *mp = Py_TYPE(value)->tp_as_mapping;
if (mp && mp->mp_length) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MAPPING);
goto failure;
unspecialize(instr, SPEC_FAIL_TO_BOOL_MAPPING);
return;
}
PySequenceMethods *sq = Py_TYPE(value)->tp_as_sequence;
if (sq && sq->sq_length) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SEQUENCE);
goto failure;
unspecialize(instr, SPEC_FAIL_TO_BOOL_SEQUENCE);
return;
}
if (!PyUnstable_Type_AssignVersionTag(Py_TYPE(value))) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OUT_OF_VERSIONS);
goto failure;
unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS);
return;
}
uint32_t version = type_get_version(Py_TYPE(value), TO_BOOL);
uint32_t version = FT_ATOMIC_LOAD_UINT32_RELAXED(Py_TYPE(value)->tp_version_tag);
mpage marked this conversation as resolved.
Show resolved Hide resolved
if (version == 0) {
goto failure;
unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS);
return;
}
instr->op.code = TO_BOOL_ALWAYS_TRUE;
specialize(instr, TO_BOOL_ALWAYS_TRUE);
write_u32(cache->version, version);
mpage marked this conversation as resolved.
Show resolved Hide resolved
assert(version);
goto success;
}
#ifdef Py_STATS
if (PyByteArray_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTEARRAY);
goto failure;
}
if (PyBytes_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTES);
goto failure;
}
if (PyDict_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_DICT);
goto failure;
}
if (PyFloat_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_FLOAT);
goto failure;
}
if (PyMemoryView_Check(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MEMORY_VIEW);
goto failure;
}
if (PyAnySet_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SET);
goto failure;
}
if (PyTuple_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_TUPLE);
goto failure;
return;
}
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OTHER);
#endif // Py_STATS
failure:
STAT_INC(TO_BOOL, failure);
instr->op.code = TO_BOOL;
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(TO_BOOL, success);
cache->counter = adaptive_counter_cooldown();
unspecialize(instr, to_bool_fail_kind(value));
}

static int
Expand Down
Loading