-
-
Notifications
You must be signed in to change notification settings - Fork 30.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-115999: Add free-threaded specialization for TO_BOOL
#126616
Changes from 2 commits
62b0307
e32f2f0
6798366
e45778b
bafabd7
aa8a81c
1f4a350
23e190e
30020b9
b1f1c71
a213390
e68dc65
b27f916
249efd1
c08f8b7
6bc4c90
542e4dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2663,101 +2663,91 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr) | |
cache->counter = adaptive_counter_cooldown(); | ||
} | ||
|
||
static int | ||
to_bool_fail_kind(PyObject *value) | ||
{ | ||
if (PyByteArray_CheckExact(value)) { | ||
return SPEC_FAIL_TO_BOOL_BYTEARRAY; | ||
} | ||
if (PyBytes_CheckExact(value)) { | ||
return SPEC_FAIL_TO_BOOL_BYTES; | ||
} | ||
if (PyDict_CheckExact(value)) { | ||
return SPEC_FAIL_TO_BOOL_DICT; | ||
} | ||
if (PyFloat_CheckExact(value)) { | ||
return SPEC_FAIL_TO_BOOL_FLOAT; | ||
} | ||
if (PyMemoryView_Check(value)) { | ||
return SPEC_FAIL_TO_BOOL_MEMORY_VIEW; | ||
} | ||
if (PyAnySet_CheckExact(value)) { | ||
return SPEC_FAIL_TO_BOOL_SET; | ||
} | ||
if (PyTuple_CheckExact(value)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe not for this PR to address, but why is the tuple type handled here? I see no specialization for tuple (or did I miss something?). Tuple is immutable, so it should be relatively easy to add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC, we added specializaiton based on metric that is measured at https://github.com/faster-cpython/benchmarking-public. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that tuple case is easy to add but that would be based on how many boolean operations are existed for the tuple at the real world workload. |
||
return SPEC_FAIL_TO_BOOL_TUPLE; | ||
} | ||
return SPEC_FAIL_OTHER; | ||
} | ||
|
||
void | ||
_Py_Specialize_ToBool(_PyStackRef value_o, _Py_CODEUNIT *instr) | ||
{ | ||
assert(ENABLE_SPECIALIZATION); | ||
assert(ENABLE_SPECIALIZATION_FT); | ||
assert(_PyOpcode_Caches[TO_BOOL] == INLINE_CACHE_ENTRIES_TO_BOOL); | ||
_PyToBoolCache *cache = (_PyToBoolCache *)(instr + 1); | ||
PyObject *value = PyStackRef_AsPyObjectBorrow(value_o); | ||
if (PyBool_Check(value)) { | ||
instr->op.code = TO_BOOL_BOOL; | ||
goto success; | ||
specialize(instr, TO_BOOL_BOOL); | ||
return; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we leave the gotos, and have one call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is any more repetitive that what was previously here and is easier to reason about. It's also the same number of function calls (1) which will be inlined. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To elaborate a bit, it is the same amount of repetition in either case. The only difference is what is being repeated. The previous approach repeats the assignment to the opcode and the goto: if (PyBool_Check(value)) {
instr->op.code = TO_BOOL_BOOL;
goto success;
} while the new approach repeats the call to specialize and the return: if (PyBool_Check(value)) {
specialize(instr, TO_BOOL_BOOL);
return;
} The benefit of the latter approach is that the control flow is easier to follow. Finally, we make one call to |
||
} | ||
if (PyLong_CheckExact(value)) { | ||
instr->op.code = TO_BOOL_INT; | ||
goto success; | ||
specialize(instr, TO_BOOL_INT); | ||
return; | ||
} | ||
if (PyList_CheckExact(value)) { | ||
instr->op.code = TO_BOOL_LIST; | ||
goto success; | ||
specialize(instr, TO_BOOL_LIST); | ||
return; | ||
} | ||
if (Py_IsNone(value)) { | ||
instr->op.code = TO_BOOL_NONE; | ||
goto success; | ||
specialize(instr, TO_BOOL_NONE); | ||
return; | ||
} | ||
if (PyUnicode_CheckExact(value)) { | ||
instr->op.code = TO_BOOL_STR; | ||
goto success; | ||
specialize(instr, TO_BOOL_STR); | ||
return; | ||
} | ||
if (PyType_HasFeature(Py_TYPE(value), Py_TPFLAGS_HEAPTYPE)) { | ||
PyNumberMethods *nb = Py_TYPE(value)->tp_as_number; | ||
if (nb && nb->nb_bool) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_NUMBER); | ||
goto failure; | ||
unspecialize(instr, SPEC_FAIL_TO_BOOL_NUMBER); | ||
return; | ||
} | ||
PyMappingMethods *mp = Py_TYPE(value)->tp_as_mapping; | ||
if (mp && mp->mp_length) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MAPPING); | ||
goto failure; | ||
unspecialize(instr, SPEC_FAIL_TO_BOOL_MAPPING); | ||
return; | ||
} | ||
PySequenceMethods *sq = Py_TYPE(value)->tp_as_sequence; | ||
if (sq && sq->sq_length) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SEQUENCE); | ||
goto failure; | ||
unspecialize(instr, SPEC_FAIL_TO_BOOL_SEQUENCE); | ||
return; | ||
} | ||
if (!PyUnstable_Type_AssignVersionTag(Py_TYPE(value))) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OUT_OF_VERSIONS); | ||
goto failure; | ||
unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); | ||
return; | ||
} | ||
uint32_t version = type_get_version(Py_TYPE(value), TO_BOOL); | ||
uint32_t version = FT_ATOMIC_LOAD_UINT32_RELAXED(Py_TYPE(value)->tp_version_tag); | ||
mpage marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (version == 0) { | ||
goto failure; | ||
unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); | ||
return; | ||
} | ||
instr->op.code = TO_BOOL_ALWAYS_TRUE; | ||
specialize(instr, TO_BOOL_ALWAYS_TRUE); | ||
write_u32(cache->version, version); | ||
mpage marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert(version); | ||
goto success; | ||
} | ||
#ifdef Py_STATS | ||
if (PyByteArray_CheckExact(value)) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTEARRAY); | ||
goto failure; | ||
} | ||
if (PyBytes_CheckExact(value)) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTES); | ||
goto failure; | ||
} | ||
if (PyDict_CheckExact(value)) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_DICT); | ||
goto failure; | ||
} | ||
if (PyFloat_CheckExact(value)) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_FLOAT); | ||
goto failure; | ||
} | ||
if (PyMemoryView_Check(value)) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MEMORY_VIEW); | ||
goto failure; | ||
} | ||
if (PyAnySet_CheckExact(value)) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SET); | ||
goto failure; | ||
} | ||
if (PyTuple_CheckExact(value)) { | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_TUPLE); | ||
goto failure; | ||
return; | ||
} | ||
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OTHER); | ||
#endif // Py_STATS | ||
failure: | ||
STAT_INC(TO_BOOL, failure); | ||
instr->op.code = TO_BOOL; | ||
cache->counter = adaptive_counter_backoff(cache->counter); | ||
return; | ||
success: | ||
STAT_INC(TO_BOOL, success); | ||
cache->counter = adaptive_counter_cooldown(); | ||
unspecialize(instr, to_bool_fail_kind(value)); | ||
} | ||
|
||
static int | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be surrounded by
#ifdef Py_STATS
. We don't want to be calling this unless stats are enabled.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mpage
I have same concerns with this change, this change was first introduced at #126414, do we need to make it as dummy function for the default build?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think the performance overhead of calling this will matter, but if it does, we could replace it with a dummy function that returns a special sentinel value indicating that stats are disabled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Both clang (18.18) and GCC (11.5.0) eliminate the call to
to_bool_fail_kind
if stats are disabled because the result of the call is unused.