From: Kirill Podoprigora Date: Fri, 22 Nov 2024 17:00:35 +0000 (+0200) Subject: gh-115999: Add free-threaded specialization for `UNPACK_SEQUENCE` (#126600) X-Git-Tag: v3.14.0a3~238 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=27486c3365a1f628e2df9a0e88c7c51706a1282e;p=thirdparty%2FPython%2Fcpython.git gh-115999: Add free-threaded specialization for `UNPACK_SEQUENCE` (#126600) Add free-threaded specialization for `UNPACK_SEQUENCE` opcode. `UNPACK_SEQUENCE_TUPLE/UNPACK_SEQUENCE_TWO_TUPLE` are already thread safe since tuples are immutable. `UNPACK_SEQUENCE_LIST` is not thread safe because of nature of lists (there is nothing preventing another thread from adding items to or removing them the list while the instruction is executing). To achieve thread safety we add a critical section to the implementation of `UNPACK_SEQUENCE_LIST`, especially around the parts where we check the size of the list and push items onto the stack. --------- Co-authored-by: Matt Page Co-authored-by: mpage --- diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 58e583eabbcc..53280875f104 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1223,7 +1223,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1b2880cb6bb6..1c1f478c3833 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -112,7 +112,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index a0292b31af1b..1a6eac236009 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1339,6 +1339,37 @@ class TestSpecializer(TestBase): self.assert_specialized(to_bool_str, "TO_BOOL_STR") self.assert_no_opcode(to_bool_str, "TO_BOOL") + @cpython_only + @requires_specialization_ft + def test_unpack_sequence(self): + def f(): + for _ in range(100): + a, b = 1, 2 + self.assertEqual(a, 1) + self.assertEqual(b, 2) + + f() + self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE") + self.assert_no_opcode(f, "UNPACK_SEQUENCE") + + def g(): + for _ in range(100): + a, = 1, + self.assertEqual(a, 1) + + g() + self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE") + self.assert_no_opcode(g, "UNPACK_SEQUENCE") + + def x(): + for _ in range(100): + a, b = [1, 2] + self.assertEqual(a, 1) + self.assertEqual(b, 2) + + x() + self.assert_specialized(x, "UNPACK_SEQUENCE_LIST") + self.assert_no_opcode(x, "UNPACK_SEQUENCE") if __name__ == "__main__": unittest.main() diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6ee886c2ba0f..83240d5b95be 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1381,7 +1381,7 @@ dummy_func( }; specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); @@ -1389,7 +1389,7 @@ dummy_func( } OPCODE_DEFERRED_INC(UNPACK_SEQUENCE); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ (void)seq; (void)counter; } @@ -1429,12 +1429,24 @@ dummy_func( inst(UNPACK_SEQUENCE_LIST, (unused/1, seq -- values[oparg])) { PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o)); - DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg); + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + PyCriticalSection_Begin(&cs, seq_o); + #endif + if (PyList_GET_SIZE(seq_o) != oparg) { + #ifdef Py_GIL_DISABLED + PyCriticalSection_End(&cs); + #endif + DEOPT_IF(true); + } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + PyCriticalSection_End(&cs); + #endif DECREF_INPUTS(); } @@ -2525,7 +2537,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CONTAINS_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } macro(CONTAINS_OP) = _SPECIALIZE_CONTAINS_OP + _CONTAINS_OP; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5c7138a94214..22de7ca11cd5 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1711,15 +1711,33 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_Begin(&cs, seq_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif if (PyList_GET_SIZE(seq_o) != oparg) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 13947849942c..182c9cf99198 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3405,7 +3405,7 @@ } OPCODE_DEFERRED_INC(CONTAINS_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _CONTAINS_OP { @@ -7994,7 +7994,7 @@ seq = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -8004,7 +8004,7 @@ } OPCODE_DEFERRED_INC(UNPACK_SEQUENCE); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ (void)seq; (void)counter; } @@ -8035,12 +8035,30 @@ values = &stack_pointer[-1]; PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o), UNPACK_SEQUENCE); - DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg, UNPACK_SEQUENCE); + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_Begin(&cs, seq_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + if (PyList_GET_SIZE(seq_o) != oparg) { + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + DEOPT_IF(true, UNPACK_SEQUENCE); + } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/specialize.c b/Python/specialize.c index c69f61c8b449..716d53a495c2 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2487,39 +2487,33 @@ _Py_Specialize_UnpackSequence(_PyStackRef seq_st, _Py_CODEUNIT *instr, int oparg { PyObject *seq = PyStackRef_AsPyObjectBorrow(seq_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[UNPACK_SEQUENCE] == INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); - _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)(instr + 1); if (PyTuple_CheckExact(seq)) { if (PyTuple_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); - goto failure; + unspecialize(instr); + return; } if (PyTuple_GET_SIZE(seq) == 2) { - instr->op.code = UNPACK_SEQUENCE_TWO_TUPLE; - goto success; + specialize(instr, UNPACK_SEQUENCE_TWO_TUPLE); + return; } - instr->op.code = UNPACK_SEQUENCE_TUPLE; - goto success; + specialize(instr, UNPACK_SEQUENCE_TUPLE); + return; } if (PyList_CheckExact(seq)) { if (PyList_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); - goto failure; + unspecialize(instr); + return; } - instr->op.code = UNPACK_SEQUENCE_LIST; - goto success; + specialize(instr, UNPACK_SEQUENCE_LIST); + return; } SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq)); -failure: - STAT_INC(UNPACK_SEQUENCE, failure); - instr->op.code = UNPACK_SEQUENCE; - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(UNPACK_SEQUENCE, success); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } #ifdef Py_STATS