]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-133395: add option for extension modules to specialize BINARY_OP/SUBSCR, apply...
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>
Mon, 5 May 2025 16:46:56 +0000 (17:46 +0100)
committerGitHub <noreply@github.com>
Mon, 5 May 2025 16:46:56 +0000 (17:46 +0100)
14 files changed:
Include/cpython/object.h
Include/internal/pycore_code.h
Include/internal/pycore_opcode_metadata.h
Include/internal/pycore_uop_metadata.h
Include/typeslots.h
Lib/test/test_sys.py
Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst [new file with mode: 0644]
Modules/arraymodule.c
Objects/typeslots.inc
Python/bytecodes.c
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/optimizer_cases.c.h
Python/specialize.c

index 3a4d65f7712c632fe7ee7838df34ef1a024e2143..818fc7d05605ffc7c13c7bfff6609334ebbe51c4 100644 (file)
@@ -143,6 +143,11 @@ typedef struct {
  * backwards-compatibility */
 typedef Py_ssize_t printfunc;
 
+/* Specialize a binary op by setting the descriptor pointer */
+struct _PyBinopSpecializationDescr;
+typedef int (*binop_specialize_func)(PyObject *v, PyObject *w, int oparg,
+                                    struct _PyBinopSpecializationDescr **descr);
+
 // If this structure is modified, Doc/includes/typestruct.h should be updated
 // as well.
 struct _typeobject {
@@ -233,6 +238,13 @@ struct _typeobject {
     /* bitset of which type-watchers care about this type */
     unsigned char tp_watched;
 
+    /* callback that may specialize BINARY_OP
+     * this is an experimental API based on the ideas in the paper
+     * Cross Module Quickening - The Curious Case of C Extensions
+     * by Felix Berlakovich and Stefan Brunthaler.
+     */
+    binop_specialize_func tp_binop_specialize;
+
     /* Number of tp_version_tag values used.
      * Set to _Py_ATTR_CACHE_UNUSED if the attribute cache is
      * disabled for this type (e.g. due to custom MRO entries).
index 635d2b24f4bdffc1c9596df64e82105f7e9834ff..1c77730e417ef7dc30dda7df52790fb106fbeda4 100644 (file)
@@ -482,13 +482,18 @@ adaptive_counter_backoff(_Py_BackoffCounter counter) {
 /* Specialization Extensions */
 
 /* callbacks for an external specialization */
+
+struct _PyBinopSpecializationDescr;
+
 typedef int (*binaryopguardfunc)(PyObject *lhs, PyObject *rhs);
-typedef PyObject *(*binaryopactionfunc)(PyObject *lhs, PyObject *rhs);
+typedef PyObject* (*binaryopactionfunc)(PyObject *lhs, PyObject *rhs);
+typedef void (*binaryopfreefunc)(struct _PyBinopSpecializationDescr *descr);
 
-typedef struct {
+typedef struct _PyBinopSpecializationDescr {
     int oparg;
     binaryopguardfunc guard;
     binaryopactionfunc action;
+    binaryopfreefunc free;
 } _PyBinaryOpSpecializationDescr;
 
 /* Comparison bit masks. */
index dc7ecc998c4477474dfee671359dba582d786eb2..fbb696e1755590858949531b3493643e3b6081e0 100644 (file)
@@ -1082,7 +1082,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = {
     [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
     [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
-    [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
+    [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
     [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
@@ -1333,7 +1333,6 @@ _PyOpcode_macro_expansion[256] = {
     [BINARY_OP_ADD_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 } } },
     [BINARY_OP_ADD_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 } } },
     [BINARY_OP_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 } } },
-    [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } },
     [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } },
     [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 } } },
     [BINARY_OP_MULTIPLY_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_INT, OPARG_SIMPLE, 5 } } },
index 9b88763da07ba7de60ecddffc9c68ffee1b90552..922d8a6cadf08cc7dac1191d2af2f7bfe4e54cb2 100644 (file)
@@ -94,8 +94,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
-    [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
-    [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
     [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
@@ -423,7 +422,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_GET_ITER] = "_GET_ITER",
     [_GET_LEN] = "_GET_LEN",
     [_GET_YIELD_FROM_ITER] = "_GET_YIELD_FROM_ITER",
-    [_GUARD_BINARY_OP_EXTEND] = "_GUARD_BINARY_OP_EXTEND",
     [_GUARD_CALLABLE_STR_1] = "_GUARD_CALLABLE_STR_1",
     [_GUARD_CALLABLE_TUPLE_1] = "_GUARD_CALLABLE_TUPLE_1",
     [_GUARD_CALLABLE_TYPE_1] = "_GUARD_CALLABLE_TYPE_1",
@@ -760,8 +758,6 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 2;
         case _BINARY_OP_INPLACE_ADD_UNICODE:
             return 2;
-        case _GUARD_BINARY_OP_EXTEND:
-            return 0;
         case _BINARY_OP_EXTEND:
             return 2;
         case _BINARY_SLICE:
index a7f3017ec02e92f69f8194b11f51d2ceee172a6c..980e714714e7860791843cc8a6adf9173b19c49d 100644 (file)
@@ -93,4 +93,5 @@
 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000
 /* New in 3.14 */
 #define Py_tp_token 83
+#define Py_tp_binop_specialize 84
 #endif
index 5f7171d02c5602bf527ac7cc00a52a0d34e4c393..aeca3720cfaedc20479a0781cefd63e650828de0 100644 (file)
@@ -1776,7 +1776,7 @@ class SizeofTest(unittest.TestCase):
         check((1,2,3), vsize('') + self.P + 3*self.P)
         # type
         # static type: PyTypeObject
-        fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
+        fmt = 'P2nPI13Pl4Pn9Pn12PI3Pc'
         s = vsize(fmt)
         check(int, s)
         typeid = 'n' if support.Py_GIL_DISABLED else ''
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst
new file mode 100644 (file)
index 0000000..a391ce1
--- /dev/null
@@ -0,0 +1,2 @@
+Add option for extension modules to specialize ``BINARY_OP`` instructions.
+Applied to ``array`` objects.
index 401a3a7072b8460c01865bbb9fd81696d98e6af7..4d2ff32cabe46722a3aa510c09e2eb5eba5c771f 100644 (file)
@@ -14,6 +14,8 @@
 #include "pycore_modsupport.h"    // _PyArg_NoKeywords()
 #include "pycore_moduleobject.h"  // _PyModule_GetState()
 
+#include "opcode.h"               // binary op opargs (NB_*)
+
 #include <stddef.h>               // offsetof()
 #include <stdbool.h>
 
@@ -848,6 +850,10 @@ array_richcompare(PyObject *v, PyObject *w, int op)
     return res;
 }
 
+static int
+array_binop_specialize(PyObject *v, PyObject *w, int oparg,
+                       _PyBinaryOpSpecializationDescr **descr);
+
 static Py_ssize_t
 array_length(PyObject *op)
 {
@@ -2963,6 +2969,8 @@ static PyType_Slot array_slots[] = {
     {Py_tp_alloc, PyType_GenericAlloc},
     {Py_tp_new, array_new},
     {Py_tp_traverse, array_tp_traverse},
+    {Py_tp_token, Py_TP_USE_SPEC},
+    {Py_tp_binop_specialize, array_binop_specialize},
 
     /* as sequence */
     {Py_sq_length, array_length},
@@ -2995,6 +3003,70 @@ static PyType_Spec array_spec = {
     .slots = array_slots,
 };
 
+static inline int
+array_subscr_guard(PyObject *lhs, PyObject *rhs)
+{
+    PyObject *exc = PyErr_GetRaisedException();
+    int ret = PyType_GetBaseByToken(Py_TYPE(lhs), &array_spec, NULL);
+    if (ret < 0) {
+        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+            PyErr_Clear();
+            ret = 0;
+        }
+    }
+    _PyErr_ChainExceptions1(exc);
+    return ret;
+}
+
+static PyObject *
+array_subscr_action(PyObject *lhs, PyObject *rhs)
+{
+    return array_subscr(lhs, rhs);
+}
+
+static void
+array_subscr_free(_PyBinaryOpSpecializationDescr* descr)
+{
+    if (descr != NULL) {
+        PyMem_Free(descr);
+    }
+}
+
+static int
+array_binop_specialize(PyObject *v, PyObject *w, int oparg,
+                       _PyBinaryOpSpecializationDescr **descr)
+{
+    array_state *state = find_array_state_by_type(Py_TYPE(v));
+
+    if (!array_Check(v, state)) {
+        return 0;
+    }
+
+    *descr = NULL;
+    switch(oparg) {
+        case NB_SUBSCR:
+            if (array_subscr_guard(v, w)) {
+                *descr = (_PyBinaryOpSpecializationDescr*)PyMem_Malloc(
+                             sizeof(_PyBinaryOpSpecializationDescr));
+                if (*descr == NULL) {
+                    PyErr_NoMemory();
+                    return -1;
+                }
+                **descr = (_PyBinaryOpSpecializationDescr) {
+                    .oparg = oparg,
+                    .guard = array_subscr_guard,
+                    .action = array_subscr_action,
+                    .free = array_subscr_free,
+                };
+                return 1;
+            }
+            break;
+    }
+
+    return 0;
+}
+
+
 /*********************** Array Iterator **************************/
 
 /*[clinic input]
index 642160fe0bd8bcc7a0b0edc14613186f2d760ff0..f197c3f5023670911c87d700c5d9222762f21333 100644 (file)
@@ -82,3 +82,4 @@
 {offsetof(PyAsyncMethods, am_send), offsetof(PyTypeObject, tp_as_async)},
 {-1, offsetof(PyTypeObject, tp_vectorcall)},
 {-1, offsetof(PyHeapTypeObject, ht_token)},
+{-1, offsetof(PyTypeObject, tp_binop_specialize)},
index 5a52efaaec804223458e0f2a0b9a001b82d08c93..cc47e57175d7273299a20e297ce3ab62fea7d798 100644 (file)
@@ -801,9 +801,19 @@ dummy_func(
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr;
             assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
-            assert(d && d->guard);
+            assert(d);
+            assert(d->guard);
             int res = d->guard(left_o, right_o);
-            DEOPT_IF(!res);
+            ERROR_IF(res < 0);
+            if (res == 0) {
+                if (d->free) {
+                    d->free(d);
+                }
+                _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(this_instr+1);
+                write_ptr(cache->external_cache, NULL);
+                this_instr->op.code = BINARY_OP;
+                DEOPT_IF(true);
+            }
         }
 
         pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) {
@@ -816,6 +826,7 @@ dummy_func(
 
             PyObject *res_o = d->action(left_o, right_o);
             DECREF_INPUTS();
+            ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
index 7f3c3141ad00b9045285cc39b6382508b032e10e..662e050c5c478aad47269000dbb43af6cb324bcf 100644 (file)
             break;
         }
 
-        case _GUARD_BINARY_OP_EXTEND: {
-            _PyStackRef right;
-            _PyStackRef left;
-            right = stack_pointer[-1];
-            left = stack_pointer[-2];
-            PyObject *descr = (PyObject *)CURRENT_OPERAND0();
-            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-            PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
-            _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr;
-            assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
-            assert(d && d->guard);
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            int res = d->guard(left_o, right_o);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            if (!res) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
-            }
-            break;
-        }
+        /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
 
         case _BINARY_OP_EXTEND: {
             _PyStackRef right;
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
             assert(WITHIN_STACK_BOUNDS());
+            if (res_o == NULL) {
+                JUMP_TO_ERROR();
+            }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
index ee54b385b7064e6d77068077c780abe26b0be49f..072951d2a5fcf6d585d9cd6548229c4ba8f06ed3 100644 (file)
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
                 _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr;
                 assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
-                assert(d && d->guard);
+                assert(d);
+                assert(d->guard);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int res = d->guard(left_o, right_o);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                if (!res) {
-                    UPDATE_MISS_STATS(BINARY_OP);
-                    assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
-                    JUMP_TO_PREDICTED(BINARY_OP);
+                if (res < 0) {
+                    JUMP_TO_LABEL(error);
+                }
+                if (res == 0) {
+                    if (d->free) {
+                        _PyFrame_SetStackPointer(frame, stack_pointer);
+                        d->free(d);
+                        stack_pointer = _PyFrame_GetStackPointer(frame);
+                    }
+                    _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(this_instr+1);
+                    _PyFrame_SetStackPointer(frame, stack_pointer);
+                    write_ptr(cache->external_cache, NULL);
+                    stack_pointer = _PyFrame_GetStackPointer(frame);
+                    this_instr->op.code = BINARY_OP;
+                    if (true) {
+                        UPDATE_MISS_STATS(BINARY_OP);
+                        assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
+                        JUMP_TO_PREDICTED(BINARY_OP);
+                    }
                 }
             }
             /* Skip -4 cache entry */
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
                 assert(WITHIN_STACK_BOUNDS());
+                if (res_o == NULL) {
+                    JUMP_TO_LABEL(error);
+                }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             stack_pointer[0] = res;
index 3f91f7eefc7bb0b1a828367350c771b282351e91..8e8b2ecfa5b0cf047082149ed1046c5dd6921d9d 100644 (file)
             break;
         }
 
-        case _GUARD_BINARY_OP_EXTEND: {
-            break;
-        }
+        /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 */
 
         case _BINARY_OP_EXTEND: {
             JitOptSymbol *res;
index 59ec9a4cad6b9c50be4837fea16fe567b11d7473..fe4a65ee5f8079968e744ae2e81daebc54ccba97 100644 (file)
@@ -2534,7 +2534,7 @@ LONG_FLOAT_ACTION(compactlong_float_multiply, *)
 LONG_FLOAT_ACTION(compactlong_float_true_div, /)
 #undef LONG_FLOAT_ACTION
 
-static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
+static const _PyBinaryOpSpecializationDescr binaryop_extend_builtins[] = {
     /* long-long arithmetic */
     {NB_OR, compactlongs_guard, compactlongs_or},
     {NB_AND, compactlongs_guard, compactlongs_and},
@@ -2560,14 +2560,41 @@ static int
 binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg,
                                   _PyBinaryOpSpecializationDescr **descr)
 {
-    size_t n = sizeof(binaryop_extend_descrs)/sizeof(_PyBinaryOpSpecializationDescr);
-    for (size_t i = 0; i < n; i++) {
-        _PyBinaryOpSpecializationDescr *d = &binaryop_extend_descrs[i];
+    /* We are currently using this only for NB_SUBSCR, which is not
+     * commutative. Will need to revisit this function when we use
+     * this for operators which are.
+     */
+
+    typedef _PyBinaryOpSpecializationDescr descr_type;
+    size_t size = Py_ARRAY_LENGTH(binaryop_extend_builtins);
+    for (size_t i = 0; i < size; i++) {
+        descr_type *d = (descr_type *)&binaryop_extend_builtins[i];
+        assert(d != NULL);
+        assert(d->guard != NULL);
         if (d->oparg == oparg && d->guard(lhs, rhs)) {
             *descr = d;
             return 1;
         }
     }
+
+    PyTypeObject *lhs_type = Py_TYPE(lhs);
+    if (lhs_type->tp_binop_specialize != NULL) {
+        int ret = lhs_type->tp_binop_specialize(lhs, rhs, oparg, descr);
+        if (ret < 0) {
+            return -1;
+        }
+        if (ret == 1) {
+            if (*descr == NULL) {
+                PyErr_Format(
+                    PyExc_ValueError,
+                    "tp_binop_specialize of '%T' returned 1 with *descr == NULL",
+                    lhs);
+                return -1;
+            }
+            (*descr)->oparg = oparg;
+        }
+        return ret;
+    }
     return 0;
 }