* backwards-compatibility */
typedef Py_ssize_t printfunc;
+/* Specialize a binary op by setting the descriptor pointer */
+struct _PyBinopSpecializationDescr;
+typedef int (*binop_specialize_func)(PyObject *v, PyObject *w, int oparg,
+ struct _PyBinopSpecializationDescr **descr);
+
// If this structure is modified, Doc/includes/typestruct.h should be updated
// as well.
struct _typeobject {
/* bitset of which type-watchers care about this type */
unsigned char tp_watched;
+ /* callback that may specialize BINARY_OP
+ * this is an experimental API based on the ideas in the paper
+ * Cross Module Quickening - The Curious Case of C Extensions
+ * by Felix Berlakovich and Stefan Brunthaler.
+ */
+ binop_specialize_func tp_binop_specialize;
+
/* Number of tp_version_tag values used.
* Set to _Py_ATTR_CACHE_UNUSED if the attribute cache is
* disabled for this type (e.g. due to custom MRO entries).
/* Specialization Extensions */
/* callbacks for an external specialization */
+
+struct _PyBinopSpecializationDescr;
+
typedef int (*binaryopguardfunc)(PyObject *lhs, PyObject *rhs);
-typedef PyObject *(*binaryopactionfunc)(PyObject *lhs, PyObject *rhs);
+typedef PyObject* (*binaryopactionfunc)(PyObject *lhs, PyObject *rhs);
+typedef void (*binaryopfreefunc)(struct _PyBinopSpecializationDescr *descr);
-typedef struct {
+typedef struct _PyBinopSpecializationDescr {
int oparg;
binaryopguardfunc guard;
binaryopactionfunc action;
+ binaryopfreefunc free;
} _PyBinaryOpSpecializationDescr;
/* Comparison bit masks. */
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
- [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
+ [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_ADD_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 } } },
[BINARY_OP_ADD_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 } } },
[BINARY_OP_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 } } },
- [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } },
[BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } },
[BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 } } },
[BINARY_OP_MULTIPLY_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_INT, OPARG_SIMPLE, 5 } } },
[_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
[_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
[_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
- [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
- [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
+ [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
[_GET_ITER] = "_GET_ITER",
[_GET_LEN] = "_GET_LEN",
[_GET_YIELD_FROM_ITER] = "_GET_YIELD_FROM_ITER",
- [_GUARD_BINARY_OP_EXTEND] = "_GUARD_BINARY_OP_EXTEND",
[_GUARD_CALLABLE_STR_1] = "_GUARD_CALLABLE_STR_1",
[_GUARD_CALLABLE_TUPLE_1] = "_GUARD_CALLABLE_TUPLE_1",
[_GUARD_CALLABLE_TYPE_1] = "_GUARD_CALLABLE_TYPE_1",
return 2;
case _BINARY_OP_INPLACE_ADD_UNICODE:
return 2;
- case _GUARD_BINARY_OP_EXTEND:
- return 0;
case _BINARY_OP_EXTEND:
return 2;
case _BINARY_SLICE:
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000
/* New in 3.14 */
#define Py_tp_token 83
+#define Py_tp_binop_specialize 84
#endif
check((1,2,3), vsize('') + self.P + 3*self.P)
# type
# static type: PyTypeObject
- fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
+ fmt = 'P2nPI13Pl4Pn9Pn12PI3Pc'
s = vsize(fmt)
check(int, s)
typeid = 'n' if support.Py_GIL_DISABLED else ''
--- /dev/null
+Add option for extension modules to specialize ``BINARY_OP`` instructions.
+Applied to ``array`` objects.
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_moduleobject.h" // _PyModule_GetState()
+#include "opcode.h" // binary op opargs (NB_*)
+
#include <stddef.h> // offsetof()
#include <stdbool.h>
return res;
}
+static int
+array_binop_specialize(PyObject *v, PyObject *w, int oparg,
+ _PyBinaryOpSpecializationDescr **descr);
+
static Py_ssize_t
array_length(PyObject *op)
{
{Py_tp_alloc, PyType_GenericAlloc},
{Py_tp_new, array_new},
{Py_tp_traverse, array_tp_traverse},
+ {Py_tp_token, Py_TP_USE_SPEC},
+ {Py_tp_binop_specialize, array_binop_specialize},
/* as sequence */
{Py_sq_length, array_length},
.slots = array_slots,
};
+static inline int
+array_subscr_guard(PyObject *lhs, PyObject *rhs)
+{
+ PyObject *exc = PyErr_GetRaisedException();
+ int ret = PyType_GetBaseByToken(Py_TYPE(lhs), &array_spec, NULL);
+ if (ret < 0) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+ PyErr_Clear();
+ ret = 0;
+ }
+ }
+ _PyErr_ChainExceptions1(exc);
+ return ret;
+}
+
+static PyObject *
+array_subscr_action(PyObject *lhs, PyObject *rhs)
+{
+ return array_subscr(lhs, rhs);
+}
+
+static void
+array_subscr_free(_PyBinaryOpSpecializationDescr* descr)
+{
+ if (descr != NULL) {
+ PyMem_Free(descr);
+ }
+}
+
+static int
+array_binop_specialize(PyObject *v, PyObject *w, int oparg,
+ _PyBinaryOpSpecializationDescr **descr)
+{
+ array_state *state = find_array_state_by_type(Py_TYPE(v));
+
+ if (!array_Check(v, state)) {
+ return 0;
+ }
+
+ *descr = NULL;
+ switch(oparg) {
+ case NB_SUBSCR:
+ if (array_subscr_guard(v, w)) {
+ *descr = (_PyBinaryOpSpecializationDescr*)PyMem_Malloc(
+ sizeof(_PyBinaryOpSpecializationDescr));
+ if (*descr == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ **descr = (_PyBinaryOpSpecializationDescr) {
+ .oparg = oparg,
+ .guard = array_subscr_guard,
+ .action = array_subscr_action,
+ .free = array_subscr_free,
+ };
+ return 1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+
/*********************** Array Iterator **************************/
/*[clinic input]
{offsetof(PyAsyncMethods, am_send), offsetof(PyTypeObject, tp_as_async)},
{-1, offsetof(PyTypeObject, tp_vectorcall)},
{-1, offsetof(PyHeapTypeObject, ht_token)},
+{-1, offsetof(PyTypeObject, tp_binop_specialize)},
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
_PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr;
assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
- assert(d && d->guard);
+ assert(d);
+ assert(d->guard);
int res = d->guard(left_o, right_o);
- DEOPT_IF(!res);
+ ERROR_IF(res < 0);
+ if (res == 0) {
+ if (d->free) {
+ d->free(d);
+ }
+ _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(this_instr+1);
+ write_ptr(cache->external_cache, NULL);
+ this_instr->op.code = BINARY_OP;
+ DEOPT_IF(true);
+ }
}
pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) {
PyObject *res_o = d->action(left_o, right_o);
DECREF_INPUTS();
+ ERROR_IF(res_o == NULL);
res = PyStackRef_FromPyObjectSteal(res_o);
}
break;
}
- case _GUARD_BINARY_OP_EXTEND: {
- _PyStackRef right;
- _PyStackRef left;
- right = stack_pointer[-1];
- left = stack_pointer[-2];
- PyObject *descr = (PyObject *)CURRENT_OPERAND0();
- PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
- PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
- _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr;
- assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
- assert(d && d->guard);
- _PyFrame_SetStackPointer(frame, stack_pointer);
- int res = d->guard(left_o, right_o);
- stack_pointer = _PyFrame_GetStackPointer(frame);
- if (!res) {
- UOP_STAT_INC(uopcode, miss);
- JUMP_TO_JUMP_TARGET();
- }
- break;
- }
+ /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
case _BINARY_OP_EXTEND: {
_PyStackRef right;
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
+ if (res_o == NULL) {
+ JUMP_TO_ERROR();
+ }
res = PyStackRef_FromPyObjectSteal(res_o);
stack_pointer[0] = res;
stack_pointer += 1;
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
_PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr;
assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
- assert(d && d->guard);
+ assert(d);
+ assert(d->guard);
_PyFrame_SetStackPointer(frame, stack_pointer);
int res = d->guard(left_o, right_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
- if (!res) {
- UPDATE_MISS_STATS(BINARY_OP);
- assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
- JUMP_TO_PREDICTED(BINARY_OP);
+ if (res < 0) {
+ JUMP_TO_LABEL(error);
+ }
+ if (res == 0) {
+ if (d->free) {
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ d->free(d);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ }
+ _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(this_instr+1);
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ write_ptr(cache->external_cache, NULL);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ this_instr->op.code = BINARY_OP;
+ if (true) {
+ UPDATE_MISS_STATS(BINARY_OP);
+ assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
+ JUMP_TO_PREDICTED(BINARY_OP);
+ }
}
}
/* Skip -4 cache entry */
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
+ if (res_o == NULL) {
+ JUMP_TO_LABEL(error);
+ }
res = PyStackRef_FromPyObjectSteal(res_o);
}
stack_pointer[0] = res;
break;
}
- case _GUARD_BINARY_OP_EXTEND: {
- break;
- }
+ /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 */
case _BINARY_OP_EXTEND: {
JitOptSymbol *res;
LONG_FLOAT_ACTION(compactlong_float_true_div, /)
#undef LONG_FLOAT_ACTION
-static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
+static const _PyBinaryOpSpecializationDescr binaryop_extend_builtins[] = {
/* long-long arithmetic */
{NB_OR, compactlongs_guard, compactlongs_or},
{NB_AND, compactlongs_guard, compactlongs_and},
binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg,
_PyBinaryOpSpecializationDescr **descr)
{
- size_t n = sizeof(binaryop_extend_descrs)/sizeof(_PyBinaryOpSpecializationDescr);
- for (size_t i = 0; i < n; i++) {
- _PyBinaryOpSpecializationDescr *d = &binaryop_extend_descrs[i];
+ /* We are currently using this only for NB_SUBSCR, which is not
+ * commutative. Will need to revisit this function when we use
+ * this for operators which are.
+ */
+
+ typedef _PyBinaryOpSpecializationDescr descr_type;
+ size_t size = Py_ARRAY_LENGTH(binaryop_extend_builtins);
+ for (size_t i = 0; i < size; i++) {
+ descr_type *d = (descr_type *)&binaryop_extend_builtins[i];
+ assert(d != NULL);
+ assert(d->guard != NULL);
if (d->oparg == oparg && d->guard(lhs, rhs)) {
*descr = d;
return 1;
}
}
+
+ PyTypeObject *lhs_type = Py_TYPE(lhs);
+ if (lhs_type->tp_binop_specialize != NULL) {
+ int ret = lhs_type->tp_binop_specialize(lhs, rhs, oparg, descr);
+ if (ret < 0) {
+ return -1;
+ }
+ if (ret == 1) {
+ if (*descr == NULL) {
+ PyErr_Format(
+ PyExc_ValueError,
+ "tp_binop_specialize of '%T' returned 1 with *descr == NULL",
+ lhs);
+ return -1;
+ }
+ (*descr)->oparg = oparg;
+ }
+ return ret;
+ }
return 0;
}