Add free-threaded specialization for `UNPACK_SEQUENCE` opcode.
`UNPACK_SEQUENCE_TUPLE/UNPACK_SEQUENCE_TWO_TUPLE` are already thread safe since tuples are immutable.
`UNPACK_SEQUENCE_LIST` is not thread safe because of nature of lists (there is nothing preventing another thread from adding items to or removing them the list while the instruction is executing). To achieve thread safety we add a critical section to the implementation of `UNPACK_SEQUENCE_LIST`, especially around the parts where we check the size of the list and push items onto the stack.
---------
Co-authored-by: Matt Page <mpage@meta.com>
Co-authored-by: mpage <mpage@cs.stanford.edu>
[UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG },
[UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+ [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
- [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
+ [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
[_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
self.assert_specialized(to_bool_str, "TO_BOOL_STR")
self.assert_no_opcode(to_bool_str, "TO_BOOL")
+ @cpython_only
+ @requires_specialization_ft
+ def test_unpack_sequence(self):
+ def f():
+ for _ in range(100):
+ a, b = 1, 2
+ self.assertEqual(a, 1)
+ self.assertEqual(b, 2)
+
+ f()
+ self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE")
+ self.assert_no_opcode(f, "UNPACK_SEQUENCE")
+
+ def g():
+ for _ in range(100):
+ a, = 1,
+ self.assertEqual(a, 1)
+
+ g()
+ self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE")
+ self.assert_no_opcode(g, "UNPACK_SEQUENCE")
+
+ def x():
+ for _ in range(100):
+ a, b = [1, 2]
+ self.assertEqual(a, 1)
+ self.assertEqual(b, 2)
+
+ x()
+ self.assert_specialized(x, "UNPACK_SEQUENCE_LIST")
+ self.assert_no_opcode(x, "UNPACK_SEQUENCE")
if __name__ == "__main__":
unittest.main()
};
specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) {
- #if ENABLE_SPECIALIZATION
+ #if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
}
OPCODE_DEFERRED_INC(UNPACK_SEQUENCE);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
(void)seq;
(void)counter;
}
inst(UNPACK_SEQUENCE_LIST, (unused/1, seq -- values[oparg])) {
PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq);
DEOPT_IF(!PyList_CheckExact(seq_o));
- DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg);
+ #ifdef Py_GIL_DISABLED
+ PyCriticalSection cs;
+ PyCriticalSection_Begin(&cs, seq_o);
+ #endif
+ if (PyList_GET_SIZE(seq_o) != oparg) {
+ #ifdef Py_GIL_DISABLED
+ PyCriticalSection_End(&cs);
+ #endif
+ DEOPT_IF(true);
+ }
STAT_INC(UNPACK_SEQUENCE, hit);
PyObject **items = _PyList_ITEMS(seq_o);
for (int i = oparg; --i >= 0; ) {
*values++ = PyStackRef_FromPyObjectNew(items[i]);
}
+ #ifdef Py_GIL_DISABLED
+ PyCriticalSection_End(&cs);
+ #endif
DECREF_INPUTS();
}
}
OPCODE_DEFERRED_INC(CONTAINS_OP);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
}
macro(CONTAINS_OP) = _SPECIALIZE_CONTAINS_OP + _CONTAINS_OP;
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
+ #ifdef Py_GIL_DISABLED
+ PyCriticalSection cs;
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyCriticalSection_Begin(&cs, seq_o);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ #endif
if (PyList_GET_SIZE(seq_o) != oparg) {
- UOP_STAT_INC(uopcode, miss);
- JUMP_TO_JUMP_TARGET();
+ #ifdef Py_GIL_DISABLED
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyCriticalSection_End(&cs);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ #endif
+ if (true) {
+ UOP_STAT_INC(uopcode, miss);
+ JUMP_TO_JUMP_TARGET();
+ }
}
STAT_INC(UNPACK_SEQUENCE, hit);
PyObject **items = _PyList_ITEMS(seq_o);
for (int i = oparg; --i >= 0; ) {
*values++ = PyStackRef_FromPyObjectNew(items[i]);
}
+ #ifdef Py_GIL_DISABLED
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyCriticalSection_End(&cs);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ #endif
PyStackRef_CLOSE(seq);
stack_pointer += -1 + oparg;
assert(WITHIN_STACK_BOUNDS());
}
OPCODE_DEFERRED_INC(CONTAINS_OP);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
}
// _CONTAINS_OP
{
seq = stack_pointer[-1];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
- #if ENABLE_SPECIALIZATION
+ #if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
}
OPCODE_DEFERRED_INC(UNPACK_SEQUENCE);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
(void)seq;
(void)counter;
}
values = &stack_pointer[-1];
PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq);
DEOPT_IF(!PyList_CheckExact(seq_o), UNPACK_SEQUENCE);
- DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg, UNPACK_SEQUENCE);
+ #ifdef Py_GIL_DISABLED
+ PyCriticalSection cs;
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyCriticalSection_Begin(&cs, seq_o);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ #endif
+ if (PyList_GET_SIZE(seq_o) != oparg) {
+ #ifdef Py_GIL_DISABLED
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyCriticalSection_End(&cs);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ #endif
+ DEOPT_IF(true, UNPACK_SEQUENCE);
+ }
STAT_INC(UNPACK_SEQUENCE, hit);
PyObject **items = _PyList_ITEMS(seq_o);
for (int i = oparg; --i >= 0; ) {
*values++ = PyStackRef_FromPyObjectNew(items[i]);
}
+ #ifdef Py_GIL_DISABLED
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyCriticalSection_End(&cs);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ #endif
PyStackRef_CLOSE(seq);
stack_pointer += -1 + oparg;
assert(WITHIN_STACK_BOUNDS());
{
PyObject *seq = PyStackRef_AsPyObjectBorrow(seq_st);
- assert(ENABLE_SPECIALIZATION);
+ assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[UNPACK_SEQUENCE] ==
INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE);
- _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)(instr + 1);
if (PyTuple_CheckExact(seq)) {
if (PyTuple_GET_SIZE(seq) != oparg) {
SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR);
- goto failure;
+ unspecialize(instr);
+ return;
}
if (PyTuple_GET_SIZE(seq) == 2) {
- instr->op.code = UNPACK_SEQUENCE_TWO_TUPLE;
- goto success;
+ specialize(instr, UNPACK_SEQUENCE_TWO_TUPLE);
+ return;
}
- instr->op.code = UNPACK_SEQUENCE_TUPLE;
- goto success;
+ specialize(instr, UNPACK_SEQUENCE_TUPLE);
+ return;
}
if (PyList_CheckExact(seq)) {
if (PyList_GET_SIZE(seq) != oparg) {
SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR);
- goto failure;
+ unspecialize(instr);
+ return;
}
- instr->op.code = UNPACK_SEQUENCE_LIST;
- goto success;
+ specialize(instr, UNPACK_SEQUENCE_LIST);
+ return;
}
SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq));
-failure:
- STAT_INC(UNPACK_SEQUENCE, failure);
- instr->op.code = UNPACK_SEQUENCE;
- cache->counter = adaptive_counter_backoff(cache->counter);
- return;
-success:
- STAT_INC(UNPACK_SEQUENCE, success);
- cache->counter = adaptive_counter_cooldown();
+ unspecialize(instr);
}
#ifdef Py_STATS