]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-106529: Split FOR_ITER_{LIST,TUPLE} into uops (#106696)
authorGuido van Rossum <guido@python.org>
Fri, 14 Jul 2023 00:27:35 +0000 (17:27 -0700)
committerGitHub <noreply@github.com>
Fri, 14 Jul 2023 00:27:35 +0000 (17:27 -0700)
Also rename `_ITER_EXHAUSTED_XXX` to `_IS_ITER_EXHAUSTED_XXX` to make it clear this is a test.

Include/internal/pycore_opcode_metadata.h
Lib/test/test_capi/test_misc.py
Python/bytecodes.c
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/optimizer.c

index c88640777e3fb0a8e23b5c137315f6312a49ad0b..e94732b64384b5d3de149b4efbb3c20bba39ac52 100644 (file)
 #define _GUARD_TYPE_VERSION 317
 #define _CHECK_MANAGED_OBJECT_HAS_VALUES 318
 #define IS_NONE 319
-#define _ITER_CHECK_RANGE 320
-#define _ITER_EXHAUSTED_RANGE 321
-#define _ITER_NEXT_RANGE 322
-#define _POP_JUMP_IF_FALSE 323
-#define _POP_JUMP_IF_TRUE 324
-#define JUMP_TO_TOP 325
+#define _ITER_CHECK_LIST 320
+#define _IS_ITER_EXHAUSTED_LIST 321
+#define _ITER_NEXT_LIST 322
+#define _ITER_CHECK_TUPLE 323
+#define _IS_ITER_EXHAUSTED_TUPLE 324
+#define _ITER_NEXT_TUPLE 325
+#define _ITER_CHECK_RANGE 326
+#define _IS_ITER_EXHAUSTED_RANGE 327
+#define _ITER_NEXT_RANGE 328
+#define _POP_JUMP_IF_FALSE 329
+#define _POP_JUMP_IF_TRUE 330
+#define JUMP_TO_TOP 331
 
 #ifndef NEED_OPCODE_METADATA
 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
@@ -1323,8 +1329,14 @@ const char * const _PyOpcode_uop_name[512] = {
     [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION",
     [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES",
     [IS_NONE] = "IS_NONE",
+    [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST",
+    [_IS_ITER_EXHAUSTED_LIST] = "_IS_ITER_EXHAUSTED_LIST",
+    [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST",
+    [_ITER_CHECK_TUPLE] = "_ITER_CHECK_TUPLE",
+    [_IS_ITER_EXHAUSTED_TUPLE] = "_IS_ITER_EXHAUSTED_TUPLE",
+    [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE",
     [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE",
-    [_ITER_EXHAUSTED_RANGE] = "_ITER_EXHAUSTED_RANGE",
+    [_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE",
     [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE",
     [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE",
     [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE",
index abdf7ed89763501234b16c8f06debd696216a892..43c04463236a2adc13abf0951c44cd94f1e039b9 100644 (file)
@@ -2590,7 +2590,6 @@ class TestUops(unittest.TestCase):
             for i in range(n):
                 total += i
             return total
-        # import dis; dis.dis(testfunc)
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
@@ -2602,7 +2601,51 @@ class TestUops(unittest.TestCase):
         # for i, (opname, oparg) in enumerate(ex):
         #     print(f"{i:4d}: {opname:<20s} {oparg:3d}")
         uops = {opname for opname, _ in ex}
-        self.assertIn("_ITER_EXHAUSTED_RANGE", uops)
+        self.assertIn("_IS_ITER_EXHAUSTED_RANGE", uops)
+        # Verification that the jump goes past END_FOR
+        # is done by manual inspection of the output
+
+    def test_for_iter_list(self):
+        def testfunc(a):
+            total = 0
+            for i in a:
+                total += i
+            return total
+
+        opt = _testinternalcapi.get_uop_optimizer()
+        with temporary_optimizer(opt):
+            a = list(range(10))
+            total = testfunc(a)
+            self.assertEqual(total, 45)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        # for i, (opname, oparg) in enumerate(ex):
+        #     print(f"{i:4d}: {opname:<20s} {oparg:3d}")
+        uops = {opname for opname, _ in ex}
+        self.assertIn("_IS_ITER_EXHAUSTED_LIST", uops)
+        # Verification that the jump goes past END_FOR
+        # is done by manual inspection of the output
+
+    def test_for_iter_tuple(self):
+        def testfunc(a):
+            total = 0
+            for i in a:
+                total += i
+            return total
+
+        opt = _testinternalcapi.get_uop_optimizer()
+        with temporary_optimizer(opt):
+            a = tuple(range(10))
+            total = testfunc(a)
+            self.assertEqual(total, 45)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        # for i, (opname, oparg) in enumerate(ex):
+        #     print(f"{i:4d}: {opname:<20s} {oparg:3d}")
+        uops = {opname for opname, _ in ex}
+        self.assertIn("_IS_ITER_EXHAUSTED_TUPLE", uops)
         # Verification that the jump goes past END_FOR
         # is done by manual inspection of the output
 
index 1fe9970e53cdfe3bb7adb2b283ca2de0cda12255..15b48ae9d82672c2995a72346cebf24721b9dbbe 100644 (file)
@@ -17,6 +17,7 @@
 #include "pycore_object.h"        // _PyObject_GC_TRACK()
 #include "pycore_moduleobject.h"  // PyModuleObject
 #include "pycore_opcode.h"        // EXTRA_CASES
+#include "pycore_opcode_metadata.h"  // uop names
 #include "pycore_opcode_utils.h"  // MAKE_FUNCTION_*
 #include "pycore_pyerrors.h"      // _PyErr_GetRaisedException()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub;
 static PyObject *container, *start, *stop, *v, *lhs, *rhs, *res2;
 static PyObject *list, *tuple, *dict, *owner, *set, *str, *tup, *map, *keys;
-static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter;
+static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter, *exhausted;
 static PyObject *aiter, *awaitable, *iterable, *w, *exc_value, *bc, *locals;
 static PyObject *orig, *excs, *update, *b, *fromlist, *level, *from;
 static PyObject **pieces, **values;
 static size_t jump;
 // Dummy variables for cache effects
 static uint16_t invert, counter, index, hint;
+#define unused 0  // Used in a macro def, can't be static
 static uint32_t type_version;
 
 static PyObject *
@@ -2418,52 +2420,108 @@ dummy_func(
             INSTRUMENTED_JUMP(here, target, PY_MONITORING_EVENT_BRANCH);
         }
 
-        inst(FOR_ITER_LIST, (unused/1, iter -- iter, next)) {
+        op(_ITER_CHECK_LIST, (iter -- iter)) {
             DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER);
+        }
+
+        op(_ITER_JUMP_LIST, (iter -- iter)) {
             _PyListIterObject *it = (_PyListIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyListIter_Type);
             STAT_INC(FOR_ITER, hit);
             PyListObject *seq = it->it_seq;
-            if (seq) {
-                if (it->it_index < PyList_GET_SIZE(seq)) {
-                    next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++));
-                    goto end_for_iter_list;  // End of this instruction
+            if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) {
+                if (seq != NULL) {
+                    it->it_seq = NULL;
+                    Py_DECREF(seq);
                 }
-                it->it_seq = NULL;
-                Py_DECREF(seq);
+                Py_DECREF(iter);
+                STACK_SHRINK(1);
+                SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
+                /* Jump forward oparg, then skip following END_FOR instruction */
+                JUMPBY(oparg + 1);
+                DISPATCH();
             }
-            Py_DECREF(iter);
-            STACK_SHRINK(1);
-            SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
-            /* Jump forward oparg, then skip following END_FOR instruction */
-            JUMPBY(oparg + 1);
-            DISPATCH();
-        end_for_iter_list:
-            // Common case: no jump, leave it to the code generator
         }
 
-        inst(FOR_ITER_TUPLE, (unused/1, iter -- iter, next)) {
+        // Only used by Tier 2
+        op(_IS_ITER_EXHAUSTED_LIST, (iter -- iter, exhausted)) {
+            _PyListIterObject *it = (_PyListIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyListIter_Type);
+            PyListObject *seq = it->it_seq;
+            if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) {
+                exhausted = Py_True;
+            }
+            else {
+                exhausted = Py_False;
+            }
+        }
+
+        op(_ITER_NEXT_LIST, (iter -- iter, next)) {
+            _PyListIterObject *it = (_PyListIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyListIter_Type);
+            PyListObject *seq = it->it_seq;
+            assert(seq);
+            assert(it->it_index < PyList_GET_SIZE(seq));
+            next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++));
+        }
+
+        macro(FOR_ITER_LIST) =
+            unused/1 +  // Skip over the counter
+            _ITER_CHECK_LIST +
+            _ITER_JUMP_LIST +
+            _ITER_NEXT_LIST;
+
+        op(_ITER_CHECK_TUPLE, (iter -- iter)) {
+            DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER);
+        }
+
+        op(_ITER_JUMP_TUPLE, (iter -- iter)) {
             _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
-            DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER);
+            assert(Py_TYPE(iter) == &PyTupleIter_Type);
             STAT_INC(FOR_ITER, hit);
             PyTupleObject *seq = it->it_seq;
-            if (seq) {
-                if (it->it_index < PyTuple_GET_SIZE(seq)) {
-                    next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++));
-                    goto end_for_iter_tuple;  // End of this instruction
+            if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) {
+                if (seq != NULL) {
+                    it->it_seq = NULL;
+                    Py_DECREF(seq);
                 }
-                it->it_seq = NULL;
-                Py_DECREF(seq);
+                Py_DECREF(iter);
+                STACK_SHRINK(1);
+                SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
+                /* Jump forward oparg, then skip following END_FOR instruction */
+                JUMPBY(oparg + 1);
+                DISPATCH();
             }
-            Py_DECREF(iter);
-            STACK_SHRINK(1);
-            SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
-            /* Jump forward oparg, then skip following END_FOR instruction */
-            JUMPBY(oparg + 1);
-            DISPATCH();
-        end_for_iter_tuple:
-            // Common case: no jump, leave it to the code generator
         }
 
+        // Only used by Tier 2
+        op(_IS_ITER_EXHAUSTED_TUPLE, (iter -- iter, exhausted)) {
+            _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyTupleIter_Type);
+            PyTupleObject *seq = it->it_seq;
+            if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) {
+                exhausted = Py_True;
+            }
+            else {
+                exhausted = Py_False;
+            }
+        }
+
+        op(_ITER_NEXT_TUPLE, (iter -- iter, next)) {
+            _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyTupleIter_Type);
+            PyTupleObject *seq = it->it_seq;
+            assert(seq);
+            assert(it->it_index < PyTuple_GET_SIZE(seq));
+            next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++));
+        }
+
+        macro(FOR_ITER_TUPLE) =
+            unused/1 +  // Skip over the counter
+            _ITER_CHECK_TUPLE +
+            _ITER_JUMP_TUPLE +
+            _ITER_NEXT_TUPLE;
+
         op(_ITER_CHECK_RANGE, (iter -- iter)) {
             _PyRangeIterObject *r = (_PyRangeIterObject *)iter;
             DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER);
@@ -2484,7 +2542,7 @@ dummy_func(
         }
 
         // Only used by Tier 2
-        op(_ITER_EXHAUSTED_RANGE, (iter -- iter, exhausted)) {
+        op(_IS_ITER_EXHAUSTED_RANGE, (iter -- iter, exhausted)) {
             _PyRangeIterObject *r = (_PyRangeIterObject *)iter;
             assert(Py_TYPE(r) == &PyRangeIter_Type);
             exhausted = r->len <= 0 ? Py_True : Py_False;
@@ -2502,7 +2560,10 @@ dummy_func(
         }
 
         macro(FOR_ITER_RANGE) =
-            unused/1 + _ITER_CHECK_RANGE + _ITER_JUMP_RANGE + _ITER_NEXT_RANGE;
+            unused/1 +  // Skip over the counter
+            _ITER_CHECK_RANGE +
+            _ITER_JUMP_RANGE +
+            _ITER_NEXT_RANGE;
 
         inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) {
             DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
index ce54755d5d25f10b799eb6a9da18a704d114c617..626baece814607922e26d08019359bf11d8016e1 100644 (file)
             break;
         }
 
+        case _ITER_CHECK_LIST: {
+            PyObject *iter = stack_pointer[-1];
+            DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER);
+            break;
+        }
+
+        case _IS_ITER_EXHAUSTED_LIST: {
+            PyObject *iter = stack_pointer[-1];
+            PyObject *exhausted;
+            _PyListIterObject *it = (_PyListIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyListIter_Type);
+            PyListObject *seq = it->it_seq;
+            if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) {
+                exhausted = Py_True;
+            }
+            else {
+                exhausted = Py_False;
+            }
+            STACK_GROW(1);
+            stack_pointer[-1] = exhausted;
+            break;
+        }
+
+        case _ITER_NEXT_LIST: {
+            PyObject *iter = stack_pointer[-1];
+            PyObject *next;
+            _PyListIterObject *it = (_PyListIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyListIter_Type);
+            PyListObject *seq = it->it_seq;
+            assert(seq);
+            assert(it->it_index < PyList_GET_SIZE(seq));
+            next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++));
+            STACK_GROW(1);
+            stack_pointer[-1] = next;
+            break;
+        }
+
+        case _ITER_CHECK_TUPLE: {
+            PyObject *iter = stack_pointer[-1];
+            DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER);
+            break;
+        }
+
+        case _IS_ITER_EXHAUSTED_TUPLE: {
+            PyObject *iter = stack_pointer[-1];
+            PyObject *exhausted;
+            _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyTupleIter_Type);
+            PyTupleObject *seq = it->it_seq;
+            if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) {
+                exhausted = Py_True;
+            }
+            else {
+                exhausted = Py_False;
+            }
+            STACK_GROW(1);
+            stack_pointer[-1] = exhausted;
+            break;
+        }
+
+        case _ITER_NEXT_TUPLE: {
+            PyObject *iter = stack_pointer[-1];
+            PyObject *next;
+            _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
+            assert(Py_TYPE(iter) == &PyTupleIter_Type);
+            PyTupleObject *seq = it->it_seq;
+            assert(seq);
+            assert(it->it_index < PyTuple_GET_SIZE(seq));
+            next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++));
+            STACK_GROW(1);
+            stack_pointer[-1] = next;
+            break;
+        }
+
         case _ITER_CHECK_RANGE: {
             PyObject *iter = stack_pointer[-1];
             _PyRangeIterObject *r = (_PyRangeIterObject *)iter;
             break;
         }
 
-        case _ITER_EXHAUSTED_RANGE: {
+        case _IS_ITER_EXHAUSTED_RANGE: {
             PyObject *iter = stack_pointer[-1];
             PyObject *exhausted;
             _PyRangeIterObject *r = (_PyRangeIterObject *)iter;
index d43c7386bd6f6d0c4e1020e1aea1942a8906c1a9..68531dc074769e0efb052c101f2210fa0b7e28e1 100644 (file)
         }
 
         TARGET(FOR_ITER_LIST) {
-            PyObject *iter = stack_pointer[-1];
-            PyObject *next;
-            DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER);
-            _PyListIterObject *it = (_PyListIterObject *)iter;
-            STAT_INC(FOR_ITER, hit);
-            PyListObject *seq = it->it_seq;
-            if (seq) {
-                if (it->it_index < PyList_GET_SIZE(seq)) {
-                    next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++));
-                    goto end_for_iter_list;  // End of this instruction
+            PyObject *_tmp_1;
+            PyObject *_tmp_2 = stack_pointer[-1];
+            {
+                PyObject *iter = _tmp_2;
+                DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER);
+                _tmp_2 = iter;
+            }
+            {
+                PyObject *iter = _tmp_2;
+                _PyListIterObject *it = (_PyListIterObject *)iter;
+                assert(Py_TYPE(iter) == &PyListIter_Type);
+                STAT_INC(FOR_ITER, hit);
+                PyListObject *seq = it->it_seq;
+                if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) {
+                    if (seq != NULL) {
+                        it->it_seq = NULL;
+                        Py_DECREF(seq);
+                    }
+                    Py_DECREF(iter);
+                    STACK_SHRINK(1);
+                    SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
+                    /* Jump forward oparg, then skip following END_FOR instruction */
+                    JUMPBY(oparg + 1);
+                    DISPATCH();
                 }
-                it->it_seq = NULL;
-                Py_DECREF(seq);
+                _tmp_2 = iter;
+            }
+            {
+                PyObject *iter = _tmp_2;
+                PyObject *next;
+                _PyListIterObject *it = (_PyListIterObject *)iter;
+                assert(Py_TYPE(iter) == &PyListIter_Type);
+                PyListObject *seq = it->it_seq;
+                assert(seq);
+                assert(it->it_index < PyList_GET_SIZE(seq));
+                next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++));
+                _tmp_2 = iter;
+                _tmp_1 = next;
             }
-            Py_DECREF(iter);
-            STACK_SHRINK(1);
-            SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
-            /* Jump forward oparg, then skip following END_FOR instruction */
-            JUMPBY(oparg + 1);
-            DISPATCH();
-        end_for_iter_list:
-            // Common case: no jump, leave it to the code generator
-            STACK_GROW(1);
-            stack_pointer[-1] = next;
             next_instr += 1;
+            STACK_GROW(1);
+            stack_pointer[-1] = _tmp_1;
+            stack_pointer[-2] = _tmp_2;
             DISPATCH();
         }
 
         TARGET(FOR_ITER_TUPLE) {
-            PyObject *iter = stack_pointer[-1];
-            PyObject *next;
-            _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
-            DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER);
-            STAT_INC(FOR_ITER, hit);
-            PyTupleObject *seq = it->it_seq;
-            if (seq) {
-                if (it->it_index < PyTuple_GET_SIZE(seq)) {
-                    next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++));
-                    goto end_for_iter_tuple;  // End of this instruction
+            PyObject *_tmp_1;
+            PyObject *_tmp_2 = stack_pointer[-1];
+            {
+                PyObject *iter = _tmp_2;
+                DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER);
+                _tmp_2 = iter;
+            }
+            {
+                PyObject *iter = _tmp_2;
+                _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
+                assert(Py_TYPE(iter) == &PyTupleIter_Type);
+                STAT_INC(FOR_ITER, hit);
+                PyTupleObject *seq = it->it_seq;
+                if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) {
+                    if (seq != NULL) {
+                        it->it_seq = NULL;
+                        Py_DECREF(seq);
+                    }
+                    Py_DECREF(iter);
+                    STACK_SHRINK(1);
+                    SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
+                    /* Jump forward oparg, then skip following END_FOR instruction */
+                    JUMPBY(oparg + 1);
+                    DISPATCH();
                 }
-                it->it_seq = NULL;
-                Py_DECREF(seq);
+                _tmp_2 = iter;
+            }
+            {
+                PyObject *iter = _tmp_2;
+                PyObject *next;
+                _PyTupleIterObject *it = (_PyTupleIterObject *)iter;
+                assert(Py_TYPE(iter) == &PyTupleIter_Type);
+                PyTupleObject *seq = it->it_seq;
+                assert(seq);
+                assert(it->it_index < PyTuple_GET_SIZE(seq));
+                next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++));
+                _tmp_2 = iter;
+                _tmp_1 = next;
             }
-            Py_DECREF(iter);
-            STACK_SHRINK(1);
-            SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
-            /* Jump forward oparg, then skip following END_FOR instruction */
-            JUMPBY(oparg + 1);
-            DISPATCH();
-        end_for_iter_tuple:
-            // Common case: no jump, leave it to the code generator
-            STACK_GROW(1);
-            stack_pointer[-1] = next;
             next_instr += 1;
+            STACK_GROW(1);
+            stack_pointer[-1] = _tmp_1;
+            stack_pointer[-2] = _tmp_2;
             DISPATCH();
         }
 
index abd2351f6b78bdeb624d27642fa49e49033bcc01..289b202f806ae1f8f7e2d244862f0773df23a943 100644 (file)
@@ -378,6 +378,7 @@ translate_bytecode_to_trace(
     _Py_CODEUNIT *initial_instr = instr;
     int trace_length = 0;
     int max_length = buffer_size;
+    int reserved = 0;
 
 #ifdef Py_DEBUG
     char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@@ -385,6 +386,9 @@ translate_bytecode_to_trace(
     if (uop_debug != NULL && *uop_debug >= '0') {
         lltrace = *uop_debug - '0';  // TODO: Parse an int and all that
     }
+#endif
+
+#ifdef Py_DEBUG
 #define DPRINTF(level, ...) \
     if (lltrace >= (level)) { fprintf(stderr, __VA_ARGS__); }
 #else
@@ -397,6 +401,8 @@ translate_bytecode_to_trace(
             uop_name(OPCODE), \
             (uint64_t)(OPERAND)); \
     assert(trace_length < max_length); \
+    assert(reserved > 0); \
+    reserved--; \
     trace[trace_length].opcode = (OPCODE); \
     trace[trace_length].operand = (OPERAND); \
     trace_length++;
@@ -409,9 +415,23 @@ translate_bytecode_to_trace(
             (INDEX), \
             uop_name(OPCODE), \
             (uint64_t)(OPERAND)); \
+    assert(reserved > 0); \
+    reserved--; \
     trace[(INDEX)].opcode = (OPCODE); \
     trace[(INDEX)].operand = (OPERAND);
 
+// Reserve space for n uops
+#define RESERVE_RAW(n, opname) \
+    if (trace_length + (n) > max_length) { \
+        DPRINTF(2, "No room for %s (need %d, got %d)\n", \
+                (opname), (n), max_length - trace_length); \
+        goto done; \
+    } \
+    reserved = (n);  // Keep ADD_TO_TRACE / ADD_TO_STUB honest
+
+// Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE
+#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode))
+
     DPRINTF(4,
             "Optimizing %s (%s:%d) at byte offset %ld\n",
             PyUnicode_AsUTF8(code->co_qualname),
@@ -420,16 +440,20 @@ translate_bytecode_to_trace(
             2 * INSTR_IP(initial_instr, code));
 
     for (;;) {
+        RESERVE_RAW(2, "epilogue");  // Always need space for SAVE_IP and EXIT_TRACE
         ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code));
+
         int opcode = instr->op.code;
         int oparg = instr->op.arg;
         int extras = 0;
+
         while (opcode == EXTENDED_ARG) {
             instr++;
             extras += 1;
             opcode = instr->op.code;
             oparg = (oparg << 8) | instr->op.arg;
         }
+
         if (opcode == ENTER_EXECUTOR) {
             _PyExecutorObject *executor =
                 (_PyExecutorObject *)code->co_executors->executors[oparg&255];
@@ -437,17 +461,14 @@ translate_bytecode_to_trace(
             DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  _PyOpcode_OpName[opcode]);
             oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
         }
+
         switch (opcode) {
 
             case POP_JUMP_IF_FALSE:
             case POP_JUMP_IF_TRUE:
             {
                 // Assume jump unlikely (TODO: handle jump likely case)
-                // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
-                if (trace_length + 5 > max_length) {
-                    DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n");
-                    goto done;
-                }
+                RESERVE(1, 2);
                 _Py_CODEUNIT *target_instr =
                     instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
                 max_length -= 2;  // Really the start of the stubs
@@ -461,9 +482,8 @@ translate_bytecode_to_trace(
 
             case JUMP_BACKWARD:
             {
-                if (instr + 2 - oparg == initial_instr
-                    && trace_length + 3 <= max_length)
-                {
+                if (instr + 2 - oparg == initial_instr) {
+                    RESERVE(1, 0);
                     ADD_TO_TRACE(JUMP_TO_TOP, 0);
                 }
                 else {
@@ -474,26 +494,45 @@ translate_bytecode_to_trace(
 
             case JUMP_FORWARD:
             {
+                RESERVE(0, 0);
                 // This will emit two SAVE_IP instructions; leave it to the optimizer
                 instr += oparg;
                 break;
             }
 
+            case FOR_ITER_LIST:
+            case FOR_ITER_TUPLE:
             case FOR_ITER_RANGE:
             {
-                // Assume jump unlikely (can a for-loop exit be likely?)
-                // Reserve 9 entries (4 here, 3 stub, plus SAVE_IP + EXIT_TRACE)
-                if (trace_length + 9 > max_length) {
-                    DPRINTF(1, "Ran out of space for FOR_ITER_RANGE\n");
-                    goto done;
+                RESERVE(4, 3);
+                int check_op, exhausted_op, next_op;
+                switch (opcode) {
+                    case FOR_ITER_LIST:
+                        check_op = _ITER_CHECK_LIST;
+                        exhausted_op = _IS_ITER_EXHAUSTED_LIST;
+                        next_op = _ITER_NEXT_LIST;
+                        break;
+                    case FOR_ITER_TUPLE:
+                        check_op = _ITER_CHECK_TUPLE;
+                        exhausted_op = _IS_ITER_EXHAUSTED_TUPLE;
+                        next_op = _ITER_NEXT_TUPLE;
+                        break;
+                    case FOR_ITER_RANGE:
+                        check_op = _ITER_CHECK_RANGE;
+                        exhausted_op = _IS_ITER_EXHAUSTED_RANGE;
+                        next_op = _ITER_NEXT_RANGE;
+                        break;
+                    default:
+                        Py_UNREACHABLE();
                 }
+                // Assume jump unlikely (can a for-loop exit be likely?)
                 _Py_CODEUNIT *target_instr =  // +1 at the end skips over END_FOR
                     instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg + 1;
                 max_length -= 3;  // Really the start of the stubs
-                ADD_TO_TRACE(_ITER_CHECK_RANGE, 0);
-                ADD_TO_TRACE(_ITER_EXHAUSTED_RANGE, 0);
+                ADD_TO_TRACE(check_op, 0);
+                ADD_TO_TRACE(exhausted_op, 0);
                 ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length);
-                ADD_TO_TRACE(_ITER_NEXT_RANGE, 0);
+                ADD_TO_TRACE(next_op, 0);
 
                 ADD_TO_STUB(max_length + 0, POP_TOP, 0);
                 ADD_TO_STUB(max_length + 1, SAVE_IP, INSTR_IP(target_instr, code));
@@ -507,10 +546,7 @@ translate_bytecode_to_trace(
                 if (expansion->nuops > 0) {
                     // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
                     int nuops = expansion->nuops;
-                    if (trace_length + nuops + 2 > max_length) {
-                        DPRINTF(1, "Ran out of space for %s\n", uop_name(opcode));
-                        goto done;
-                    }
+                    RESERVE(nuops, 0);
                     for (int i = 0; i < nuops; i++) {
                         uint64_t operand;
                         int offset = expansion->uops[i].offset;
@@ -556,12 +592,14 @@ translate_bytecode_to_trace(
                 }
                 DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode));
                 goto done;  // Break out of loop
-            }
-        }
+            }  // End default
+
+        }  // End switch (opcode)
+
         instr++;
         // Add cache size for opcode
         instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
-    }
+    }  // End for (;;)
 
 done:
     // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
@@ -610,6 +648,9 @@ done:
     }
     return 0;
 
+#undef RESERVE
+#undef RESERVE_RAW
+#undef INSTR_IP
 #undef ADD_TO_TRACE
 #undef DPRINTF
 }