"Stateless" code is a function or code object which does not rely on external state or internal state.
It may rely on arguments and builtins, but not globals or a closure. I've left a comment in
pycore_code.h that provides more detail.
We also add _PyFunction_VerifyStateless(). The new functions will be used in several later changes
that facilitate "sharing" functions and code objects between interpreters.
PyObject *globalsns,
PyObject *builtinsns);
+
+/* "Stateless" code is a function or code object which does not rely on
+ * external state or internal state. It may rely on arguments and
+ * builtins, but not globals or a closure. Thus it does not rely
+ * on __globals__ or __closure__, and a stateless function
+ * is equivalent to its code object.
+ *
+ * Stateless code also does not keep any persistent state
+ * of its own, so it can't have any executors, monitoring,
+ * instrumentation, or "extras" (i.e. co_extra).
+ *
+ * Stateless code may create nested functions, including closures.
+ * However, nested functions must themselves be stateless, except they
+ * *can* close on the enclosing locals.
+ *
+ * Stateless code may return any value, including nested functions and closures.
+ *
+ * Stateless code that takes no arguments and doesn't return anything
+ * may be treated like a script.
+ *
+ * We consider stateless code to be "portable" if it does not return any
+ * any object that holds a reference to any of the code's locals. Thus
+ * generators and coroutines are not portable. Likewise a function
+ * that returns a closure is not portable. The concept of
+ * portability is useful in cases where the code is run
+ * in a different execution context than where
+ * the return value will be used. */
+
+PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **);
+PyAPI_FUNC(int) _PyCode_CheckNoExternalState(
+ PyCodeObject *,
+ _PyCode_var_counts_t *,
+ const char **);
+PyAPI_FUNC(int) _PyCode_VerifyStateless(
+ PyThreadState *,
+ PyCodeObject *,
+ PyObject *globalnames,
+ PyObject *globalsns,
+ PyObject *builtinsns);
+
+PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **);
PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *);
extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params);
+
+/* See pycore_code.h for explanation about what "stateless" means. */
+
+PyAPI_FUNC(int)
+_PyFunction_VerifyStateless(PyThreadState *, PyObject *);
+
+
#ifdef __cplusplus
}
#endif
#define IS_RETURN_OPCODE(opcode) \
(opcode == RETURN_VALUE)
+#define IS_RAISE_OPCODE(opcode) \
+ (opcode == RAISE_VARARGS || opcode == RERAISE)
/* Flags used in the oparg for MAKE_FUNCTION */
return
+def spam_with_builtins():
+ x = 42
+ values = (42,)
+ checks = tuple(callable(v) for v in values)
+ res = callable(values), tuple(values), list(values), checks
+ print(res)
+
+
+def spam_with_globals_and_builtins():
+ func1 = spam
+ func2 = spam_minimal
+ funcs = (func1, func2)
+ checks = tuple(callable(f) for f in funcs)
+ res = callable(funcs), tuple(funcs), list(funcs), checks
+ print(res)
+
+
+def spam_returns_arg(x):
+ return x
+
+
+def spam_with_inner_not_closure():
+ def eggs():
+ pass
+ eggs()
+
+
+def spam_with_inner_closure():
+ x = 42
+ def eggs():
+ print(x)
+ eggs()
+
+
def spam_full(a, b, /, c, d:int=1, *args, e, f:object=None, **kwargs) -> tuple:
# arg defaults, kwarg defaults
# annotations
TOP_FUNCTIONS = [
# shallow
spam_minimal,
+ spam_with_builtins,
+ spam_with_globals_and_builtins,
+ spam_returns_arg,
+ spam_with_inner_not_closure,
+ spam_with_inner_closure,
spam_full,
spam,
# outer func
*NESTED_FUNCTIONS,
]
+STATELESS_FUNCTIONS = [
+ spam,
+ spam_minimal,
+ spam_with_builtins,
+ spam_returns_arg,
+ spam_with_inner_not_closure,
+ spam_with_inner_closure,
+ spam_N,
+ spam_C,
+ spam_NN,
+ spam_NC,
+ spam_CN,
+ spam_CC,
+ eggs_nested,
+ eggs_nested_N,
+ ham_nested,
+ ham_C_nested
+]
+STATELESS_CODE = [
+ *STATELESS_FUNCTIONS,
+ spam_with_globals_and_builtins,
+ spam_full,
+]
+
# generators
import _testinternalcapi
except ModuleNotFoundError:
_testinternalcapi = None
+import test._code_definitions as defs
COPY_FREE_VARS = opmap['COPY_FREE_VARS']
VARARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_POS
VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW
- import test._code_definitions as defs
funcs = {
defs.spam_minimal: {},
+ defs.spam_with_builtins: {
+ 'x': CO_FAST_LOCAL,
+ 'values': CO_FAST_LOCAL,
+ 'checks': CO_FAST_LOCAL,
+ 'res': CO_FAST_LOCAL,
+ },
+ defs.spam_with_globals_and_builtins: {
+ 'func1': CO_FAST_LOCAL,
+ 'func2': CO_FAST_LOCAL,
+ 'funcs': CO_FAST_LOCAL,
+ 'checks': CO_FAST_LOCAL,
+ 'res': CO_FAST_LOCAL,
+ },
+ defs.spam_returns_arg: {
+ 'x': POSORKW,
+ },
+ defs.spam_with_inner_not_closure: {
+ 'eggs': CO_FAST_LOCAL,
+ },
+ defs.spam_with_inner_closure: {
+ 'x': CO_FAST_CELL,
+ 'eggs': CO_FAST_LOCAL,
+ },
defs.spam_full: {
'a': POSONLY,
'b': POSONLY,
},
}
- import test._code_definitions as defs
funcs = {
defs.spam_minimal: new_var_counts(),
+ defs.spam_with_builtins: new_var_counts(
+ purelocals=4,
+ globalvars=4,
+ ),
+ defs.spam_with_globals_and_builtins: new_var_counts(
+ purelocals=5,
+ globalvars=6,
+ ),
+ defs.spam_returns_arg: new_var_counts(
+ posorkw=1,
+ ),
+ defs.spam_with_inner_not_closure: new_var_counts(
+ purelocals=1,
+ ),
+ defs.spam_with_inner_closure: new_var_counts(
+ othercells=1,
+ purelocals=1,
+ ),
defs.spam_full: new_var_counts(
posonly=2,
posorkw=2,
counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected)
- def func_with_globals_and_builtins():
- mod1 = _testinternalcapi
- mod2 = dis
- mods = (mod1, mod2)
- checks = tuple(callable(m) for m in mods)
- return callable(mod2), tuple(mods), list(mods), checks
-
- func = func_with_globals_and_builtins
+ func = defs.spam_with_globals_and_builtins
with self.subTest(f'{func} code'):
expected = new_var_counts(
- purelocals=4,
- globalvars=5,
+ purelocals=5,
+ globalvars=6,
)
counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected)
with self.subTest(f'{func} with own globals and builtins'):
expected = new_var_counts(
- purelocals=4,
- globalvars=(2, 3),
+ purelocals=5,
+ globalvars=(2, 4),
)
counts = _testinternalcapi.get_code_var_counts(func)
self.assertEqual(counts, expected)
with self.subTest(f'{func} without globals'):
expected = new_var_counts(
- purelocals=4,
- globalvars=(0, 3, 2),
+ purelocals=5,
+ globalvars=(0, 4, 2),
)
counts = _testinternalcapi.get_code_var_counts(func, globalsns={})
self.assertEqual(counts, expected)
with self.subTest(f'{func} without both'):
expected = new_var_counts(
- purelocals=4,
- globalvars=5,
+ purelocals=5,
+ globalvars=6,
)
counts = _testinternalcapi.get_code_var_counts(func, globalsns={},
builtinsns={})
with self.subTest(f'{func} without builtins'):
expected = new_var_counts(
- purelocals=4,
- globalvars=(2, 0, 3),
+ purelocals=5,
+ globalvars=(2, 0, 4),
)
counts = _testinternalcapi.get_code_var_counts(func, builtinsns={})
self.assertEqual(counts, expected)
+ @unittest.skipIf(_testinternalcapi is None, "missing _testinternalcapi")
+ def test_stateless(self):
+ self.maxDiff = None
+
+ for func in defs.STATELESS_CODE:
+ with self.subTest((func, '(code)')):
+ _testinternalcapi.verify_stateless_code(func.__code__)
+ for func in defs.STATELESS_FUNCTIONS:
+ with self.subTest((func, '(func)')):
+ _testinternalcapi.verify_stateless_code(func)
+
+ for func in defs.FUNCTIONS:
+ if func not in defs.STATELESS_CODE:
+ with self.subTest((func, '(code)')):
+ with self.assertRaises(Exception):
+ _testinternalcapi.verify_stateless_code(func.__code__)
+
+ if func not in defs.STATELESS_FUNCTIONS:
+ with self.subTest((func, '(func)')):
+ with self.assertRaises(Exception):
+ _testinternalcapi.verify_stateless_code(func)
+
def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])
return NULL;
}
+static PyObject *
+verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ PyThreadState *tstate = _PyThreadState_GET();
+ PyObject *codearg;
+ PyObject *globalnames = NULL;
+ PyObject *globalsns = NULL;
+ PyObject *builtinsns = NULL;
+ static char *kwlist[] = {"code", "globalnames",
+ "globalsns", "builtinsns", NULL};
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+ "O|O!O!O!:get_code_var_counts", kwlist,
+ &codearg, &PySet_Type, &globalnames,
+ &PyDict_Type, &globalsns, &PyDict_Type, &builtinsns))
+ {
+ return NULL;
+ }
+ if (PyFunction_Check(codearg)) {
+ if (globalsns == NULL) {
+ globalsns = PyFunction_GET_GLOBALS(codearg);
+ }
+ if (builtinsns == NULL) {
+ builtinsns = PyFunction_GET_BUILTINS(codearg);
+ }
+ codearg = PyFunction_GET_CODE(codearg);
+ }
+ else if (!PyCode_Check(codearg)) {
+ PyErr_SetString(PyExc_TypeError,
+ "argument must be a code object or a function");
+ return NULL;
+ }
+ PyCodeObject *code = (PyCodeObject *)codearg;
+
+ if (_PyCode_VerifyStateless(
+ tstate, code, globalnames, globalsns, builtinsns) < 0)
+ {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+
static PyObject *
jit_enabled(PyObject *self, PyObject *arg)
{
{"get_co_localskinds", get_co_localskinds, METH_O, NULL},
{"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts),
METH_VARARGS | METH_KEYWORDS, NULL},
+ {"verify_stateless_code", _PyCFunction_CAST(verify_stateless_code),
+ METH_VARARGS | METH_KEYWORDS, NULL},
{"jit_enabled", jit_enabled, METH_NOARGS, NULL},
#ifdef _Py_TIER2
{"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},
}
+int
+_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg)
+{
+ const char *errmsg = NULL;
+ if (_PyCode_HAS_EXECUTORS(co) || _PyCode_HAS_INSTRUMENTATION(co)) {
+ errmsg = "only basic code objects are supported";
+ }
+ else if (co->_co_monitoring != NULL) {
+ errmsg = "only basic code objects are supported";
+ }
+ else if (co->co_extra != NULL) {
+ errmsg = "only basic code objects are supported";
+ }
+
+ if (errmsg != NULL) {
+ if (p_errmsg != NULL) {
+ *p_errmsg = errmsg;
+ }
+ return 0;
+ }
+ return 1;
+}
+
+int
+_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts,
+ const char **p_errmsg)
+{
+ const char *errmsg = NULL;
+ assert(counts->locals.hidden.total == 0);
+ if (counts->numfree > 0) { // It's a closure.
+ errmsg = "closures not supported";
+ }
+ else if (counts->unbound.globals.numglobal > 0) {
+ errmsg = "globals not supported";
+ }
+ else if (counts->unbound.globals.numbuiltin > 0
+ && counts->unbound.globals.numunknown > 0)
+ {
+ errmsg = "globals not supported";
+ }
+ // Otherwise we don't check counts.unbound.globals.numunknown since we can't
+ // distinguish beween globals and builtins here.
+
+ if (errmsg != NULL) {
+ if (p_errmsg != NULL) {
+ *p_errmsg = errmsg;
+ }
+ return 0;
+ }
+ return 1;
+}
+
+int
+_PyCode_VerifyStateless(PyThreadState *tstate,
+ PyCodeObject *co, PyObject *globalnames,
+ PyObject *globalsns, PyObject *builtinsns)
+{
+ const char *errmsg;
+ _PyCode_var_counts_t counts = {0};
+ _PyCode_GetVarCounts(co, &counts);
+ if (_PyCode_SetUnboundVarCounts(
+ tstate, co, &counts, globalnames, NULL,
+ globalsns, builtinsns) < 0)
+ {
+ return -1;
+ }
+ // We may consider relaxing the internal state constraints
+ // if it becomes a problem.
+ if (!_PyCode_CheckNoInternalState(co, &errmsg)) {
+ _PyErr_SetString(tstate, PyExc_ValueError, errmsg);
+ return -1;
+ }
+ if (builtinsns != NULL) {
+ // Make sure the next check will fail for globals,
+ // even if there aren't any builtins.
+ counts.unbound.globals.numbuiltin += 1;
+ }
+ if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) {
+ _PyErr_SetString(tstate, PyExc_ValueError, errmsg);
+ return -1;
+ }
+ // Note that we don't check co->co_flags & CO_NESTED for anything here.
+ return 0;
+}
+
+
+int
+_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg)
+{
+ const char *errmsg = NULL;
+ if (co->co_flags & CO_GENERATOR) {
+ errmsg = "generators not supported";
+ }
+ else if (co->co_flags & CO_COROUTINE) {
+ errmsg = "coroutines not supported";
+ }
+ else if (co->co_flags & CO_ITERABLE_COROUTINE) {
+ errmsg = "coroutines not supported";
+ }
+ else if (co->co_flags & CO_ASYNC_GENERATOR) {
+ errmsg = "generators not supported";
+ }
+
+ if (errmsg != NULL) {
+ if (p_errmsg != NULL) {
+ *p_errmsg = errmsg;
+ }
+ return 0;
+ }
+ return 1;
+}
+
/* Here "value" means a non-None value, since a bare return is identical
* to returning None explicitly. Likewise a missing return statement
* at the end of the function is turned into "return None". */
static int
code_returns_only_none(PyCodeObject *co)
{
+ if (!_PyCode_CheckPureFunction(co, NULL)) {
+ return 0;
+ }
+ int len = (int)Py_SIZE(co);
+ assert(len > 0);
+
+ // The last instruction either returns or raises. We can take advantage
+ // of that for a quick exit.
+ _Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1);
+
// Look up None in co_consts.
Py_ssize_t nconsts = PyTuple_Size(co->co_consts);
int none_index = 0;
}
if (none_index == nconsts) {
// None wasn't there, which means there was no implicit return,
- // "return", or "return None". That means there must be
- // an explicit return (non-None).
- return 0;
- }
+ // "return", or "return None".
- // Walk the bytecode, looking for RETURN_VALUE.
- Py_ssize_t len = Py_SIZE(co);
- for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
- _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
- if (IS_RETURN_OPCODE(inst.op.code)) {
- assert(i != 0);
- // Ignore it if it returns None.
- _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1);
- if (prev.op.code == LOAD_CONST) {
- // We don't worry about EXTENDED_ARG for now.
- if (prev.op.arg == none_index) {
- continue;
+ // That means there must be
+ // an explicit return (non-None), or it only raises.
+ if (IS_RETURN_OPCODE(final.op.code)) {
+ // It was an explicit return (non-None).
+ return 0;
+ }
+ // It must end with a raise then. We still have to walk the
+ // bytecode to see if there's any explicit return (non-None).
+ assert(IS_RAISE_OPCODE(final.op.code));
+ for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
+ _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
+ if (IS_RETURN_OPCODE(inst.op.code)) {
+ // We alraedy know it isn't returning None.
+ return 0;
+ }
+ }
+ // It must only raise.
+ }
+ else {
+ // Walk the bytecode, looking for RETURN_VALUE.
+ for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
+ _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
+ if (IS_RETURN_OPCODE(inst.op.code)) {
+ assert(i != 0);
+ // Ignore it if it returns None.
+ _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1);
+ if (prev.op.code == LOAD_CONST) {
+ // We don't worry about EXTENDED_ARG for now.
+ if (prev.op.arg == none_index) {
+ continue;
+ }
}
+ return 0;
}
- return 0;
}
}
return 1;
/* Function object implementation */
#include "Python.h"
+#include "pycore_code.h" // _PyCode_VerifyStateless()
#include "pycore_dict.h" // _Py_INCREF_DICT()
#include "pycore_function.h" // _PyFunction_Vectorcall
#include "pycore_long.h" // _PyLong_GetOne()
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_pyerrors.h" // _PyErr_Occurred()
+#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
};
+int
+_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func)
+{
+ assert(!PyErr_Occurred());
+ assert(PyFunction_Check(func));
+
+ // Check the globals.
+ PyObject *globalsns = PyFunction_GET_GLOBALS(func);
+ if (globalsns != NULL && !PyDict_Check(globalsns)) {
+ _PyErr_Format(tstate, PyExc_TypeError,
+ "unsupported globals %R", globalsns);
+ return -1;
+ }
+ // Check the builtins.
+ PyObject *builtinsns = PyFunction_GET_BUILTINS(func);
+ if (builtinsns != NULL && !PyDict_Check(builtinsns)) {
+ _PyErr_Format(tstate, PyExc_TypeError,
+ "unsupported builtins %R", builtinsns);
+ return -1;
+ }
+ // Disallow __defaults__.
+ PyObject *defaults = PyFunction_GET_DEFAULTS(func);
+ if (defaults != NULL && defaults != Py_None && PyDict_Size(defaults) > 0)
+ {
+ _PyErr_SetString(tstate, PyExc_ValueError, "defaults not supported");
+ return -1;
+ }
+ // Disallow __kwdefaults__.
+ PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func);
+ if (kwdefaults != NULL && kwdefaults != Py_None
+ && PyDict_Size(kwdefaults) > 0)
+ {
+ _PyErr_SetString(tstate, PyExc_ValueError,
+ "keyword defaults not supported");
+ return -1;
+ }
+ // Disallow __closure__.
+ PyObject *closure = PyFunction_GET_CLOSURE(func);
+ if (closure != NULL && closure != Py_None && PyTuple_GET_SIZE(closure) > 0)
+ {
+ _PyErr_SetString(tstate, PyExc_ValueError, "closures not supported");
+ return -1;
+ }
+ // Check the code.
+ PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
+ if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+
static int
functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name)
{