]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-121784: Generate an error during code gen if a variable is marked `unused`, but...
authorMark Shannon <mark@hotpy.org>
Thu, 18 Jul 2024 11:49:24 +0000 (12:49 +0100)
committerGitHub <noreply@github.com>
Thu, 18 Jul 2024 11:49:24 +0000 (12:49 +0100)
* Reject uop definitions that declare values as 'unused' that are already cached by prior uops

* Track which variables are defined and only load from memory when needed

* Support explicit `flush` in macro definitions.

* Make sure stack is flushed in where needed.

Lib/test/test_generated_cases.py
Python/bytecodes.c
Python/executor_cases.c.h
Python/generated_cases.c.h
Tools/cases_generator/analyzer.py
Tools/cases_generator/optimizer_generator.py
Tools/cases_generator/stack.py
Tools/cases_generator/tier1_generator.py
Tools/cases_generator/tier2_generator.py

index 00def509a219c3f615a4d0541234be6004de927b..7fe6e4f20c43a09eb2ac28c475a21b3b1d4a78c6 100644 (file)
@@ -139,7 +139,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_inst_one_pop(self):
         input = """
         inst(OP, (value --)) {
-            spam();
+            spam(value);
         }
     """
         output = """
@@ -149,7 +149,7 @@ class TestGeneratedCases(unittest.TestCase):
             INSTRUCTION_STATS(OP);
             _PyStackRef value;
             value = stack_pointer[-1];
-            spam();
+            spam(value);
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             DISPATCH();
@@ -160,7 +160,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_inst_one_push(self):
         input = """
         inst(OP, (-- res)) {
-            spam();
+            res = spam();
         }
     """
         output = """
@@ -169,7 +169,7 @@ class TestGeneratedCases(unittest.TestCase):
             next_instr += 1;
             INSTRUCTION_STATS(OP);
             _PyStackRef res;
-            spam();
+            res = spam();
             stack_pointer[0] = res;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
@@ -181,7 +181,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_inst_one_push_one_pop(self):
         input = """
         inst(OP, (value -- res)) {
-            spam();
+            res = spam(value);
         }
     """
         output = """
@@ -192,7 +192,7 @@ class TestGeneratedCases(unittest.TestCase):
             _PyStackRef value;
             _PyStackRef res;
             value = stack_pointer[-1];
-            spam();
+            res = spam(value);
             stack_pointer[-1] = res;
             DISPATCH();
         }
@@ -202,7 +202,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_binary_op(self):
         input = """
         inst(OP, (left, right -- res)) {
-            spam();
+            res = spam(left, right);
         }
     """
         output = """
@@ -210,12 +210,12 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(OP);
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            spam();
+            res = spam(left, right);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -227,7 +227,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_overlap(self):
         input = """
         inst(OP, (left, right -- left, result)) {
-            spam();
+            result = spam(left, right);
         }
     """
         output = """
@@ -235,12 +235,12 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(OP);
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef result;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            spam();
+            result = spam(left, right);
             stack_pointer[-1] = result;
             DISPATCH();
         }
@@ -253,6 +253,7 @@ class TestGeneratedCases(unittest.TestCase):
         }
         inst(OP3, (arg -- res)) {
             DEOPT_IF(xxx);
+            res = Py_None;
             CHECK_EVAL_BREAKER();
         }
         family(OP1, INLINE_CACHE_ENTRIES_OP1) = { OP3 };
@@ -263,9 +264,6 @@ class TestGeneratedCases(unittest.TestCase):
             next_instr += 1;
             INSTRUCTION_STATS(OP1);
             PREDICTED(OP1);
-            _PyStackRef arg;
-            _PyStackRef rest;
-            arg = stack_pointer[-1];
             stack_pointer[-1] = rest;
             DISPATCH();
         }
@@ -275,10 +273,9 @@ class TestGeneratedCases(unittest.TestCase):
             next_instr += 1;
             INSTRUCTION_STATS(OP3);
             static_assert(INLINE_CACHE_ENTRIES_OP1 == 0, "incorrect cache size");
-            _PyStackRef arg;
             _PyStackRef res;
-            arg = stack_pointer[-1];
             DEOPT_IF(xxx, OP1);
+            res = Py_None;
             stack_pointer[-1] = res;
             CHECK_EVAL_BREAKER();
             DISPATCH();
@@ -324,6 +321,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_error_if_pop(self):
         input = """
         inst(OP, (left, right -- res)) {
+            res = spam(left, right);
             ERROR_IF(cond, label);
         }
     """
@@ -332,11 +330,12 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(OP);
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
+            res = spam(left, right);
             if (cond) goto pop_2_label;
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -357,8 +356,6 @@ class TestGeneratedCases(unittest.TestCase):
             (void)this_instr;
             next_instr += 4;
             INSTRUCTION_STATS(OP);
-            _PyStackRef value;
-            value = stack_pointer[-1];
             uint16_t counter = read_u16(&this_instr[1].cache);
             (void)counter;
             uint32_t extra = read_u32(&this_instr[2].cache);
@@ -408,8 +405,8 @@ class TestGeneratedCases(unittest.TestCase):
             PREDICTED(OP);
             _Py_CODEUNIT *this_instr = next_instr - 6;
             (void)this_instr;
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef arg2;
             _PyStackRef res;
             // _OP1
@@ -439,8 +436,8 @@ class TestGeneratedCases(unittest.TestCase):
             (void)this_instr;
             next_instr += 2;
             INSTRUCTION_STATS(OP1);
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
             uint16_t counter = read_u16(&this_instr[1].cache);
@@ -454,9 +451,9 @@ class TestGeneratedCases(unittest.TestCase):
             next_instr += 6;
             INSTRUCTION_STATS(OP3);
             static_assert(INLINE_CACHE_ENTRIES_OP == 5, "incorrect cache size");
-            _PyStackRef right;
-            _PyStackRef left;
             _PyStackRef arg2;
+            _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             /* Skip 5 cache entries */
             right = stack_pointer[-1];
@@ -531,7 +528,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_array_input(self):
         input = """
         inst(OP, (below, values[oparg*2], above --)) {
-            spam();
+            spam(values, oparg);
         }
     """
         output = """
@@ -539,13 +536,9 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(OP);
-            _PyStackRef above;
             _PyStackRef *values;
-            _PyStackRef below;
-            above = stack_pointer[-1];
             values = &stack_pointer[-1 - oparg*2];
-            below = stack_pointer[-2 - oparg*2];
-            spam();
+            spam(values, oparg);
             stack_pointer += -2 - oparg*2;
             assert(WITHIN_STACK_BOUNDS());
             DISPATCH();
@@ -564,9 +557,7 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(OP);
-            _PyStackRef below;
             _PyStackRef *values;
-            _PyStackRef above;
             values = &stack_pointer[-1];
             spam(values, oparg);
             stack_pointer[-2] = below;
@@ -590,7 +581,6 @@ class TestGeneratedCases(unittest.TestCase):
             next_instr += 1;
             INSTRUCTION_STATS(OP);
             _PyStackRef *values;
-            _PyStackRef above;
             values = &stack_pointer[-oparg];
             spam(values, oparg);
             stack_pointer[0] = above;
@@ -612,10 +602,6 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(OP);
-            _PyStackRef *values;
-            _PyStackRef extra;
-            values = &stack_pointer[-oparg];
-            extra = stack_pointer[-1 - oparg];
             if (oparg == 0) { stack_pointer += -1 - oparg; goto somewhere; }
             stack_pointer += -1 - oparg;
             assert(WITHIN_STACK_BOUNDS());
@@ -627,7 +613,7 @@ class TestGeneratedCases(unittest.TestCase):
     def test_cond_effect(self):
         input = """
         inst(OP, (aa, input if ((oparg & 1) == 1), cc -- xx, output if (oparg & 2), zz)) {
-            output = spam(oparg, input);
+            output = spam(oparg, aa, cc, input);
         }
     """
         output = """
@@ -635,16 +621,14 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(OP);
-            _PyStackRef cc;
-            _PyStackRef input = PyStackRef_NULL;
             _PyStackRef aa;
-            _PyStackRef xx;
+            _PyStackRef input = PyStackRef_NULL;
+            _PyStackRef cc;
             _PyStackRef output = PyStackRef_NULL;
-            _PyStackRef zz;
             cc = stack_pointer[-1];
             if ((oparg & 1) == 1) { input = stack_pointer[-1 - (((oparg & 1) == 1) ? 1 : 0)]; }
             aa = stack_pointer[-2 - (((oparg & 1) == 1) ? 1 : 0)];
-            output = spam(oparg, input);
+            output = spam(oparg, aa, cc, input);
             stack_pointer[-2 - (((oparg & 1) == 1) ? 1 : 0)] = xx;
             if (oparg & 2) stack_pointer[-1 - (((oparg & 1) == 1) ? 1 : 0)] = output;
             stack_pointer[-1 - (((oparg & 1) == 1) ? 1 : 0) + ((oparg & 2) ? 1 : 0)] = zz;
@@ -658,10 +642,11 @@ class TestGeneratedCases(unittest.TestCase):
     def test_macro_cond_effect(self):
         input = """
         op(A, (left, middle, right --)) {
-            # Body of A
+            use(left, middle, right);
         }
         op(B, (-- deep, extra if (oparg), res)) {
-            # Body of B
+            res = 0;
+            extra = 1;
         }
         macro(M) = A + B;
     """
@@ -670,10 +655,9 @@ class TestGeneratedCases(unittest.TestCase):
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(M);
-            _PyStackRef right;
-            _PyStackRef middle;
             _PyStackRef left;
-            _PyStackRef deep;
+            _PyStackRef middle;
+            _PyStackRef right;
             _PyStackRef extra = PyStackRef_NULL;
             _PyStackRef res;
             // A
@@ -681,11 +665,12 @@ class TestGeneratedCases(unittest.TestCase):
             middle = stack_pointer[-2];
             left = stack_pointer[-3];
             {
-                # Body of A
+                use(left, middle, right);
             }
             // B
             {
-                # Body of B
+                res = 0;
+                extra = 1;
             }
             stack_pointer[-3] = deep;
             if (oparg) stack_pointer[-2] = extra;
@@ -868,6 +853,165 @@ class TestGeneratedCases(unittest.TestCase):
         """
         self.run_cases_test(input, output)
 
+    def test_unused_cached_value(self):
+        input = """
+        op(FIRST, (arg1 -- out)) {
+            out = arg1;
+        }
+
+        op(SECOND, (unused -- unused)) {
+        }
+
+        macro(BOTH) = FIRST + SECOND;
+        """
+        output = """
+        """
+        with self.assertRaises(SyntaxError):
+            self.run_cases_test(input, output)
+
+    def test_unused_named_values(self):
+        input = """
+        op(OP, (named -- named)) {
+        }
+
+        macro(INST) = OP;
+        """
+        output = """
+        TARGET(INST) {
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(INST);
+            DISPATCH();
+        }
+
+        """
+        self.run_cases_test(input, output)
+
+    def test_used_unused_used(self):
+        input = """
+        op(FIRST, (w -- w)) {
+            use(w);
+        }
+
+        op(SECOND, (x -- x)) {
+        }
+
+        op(THIRD, (y -- y)) {
+            use(y);
+        }
+
+        macro(TEST) = FIRST + SECOND + THIRD;
+        """
+        output = """
+        TARGET(TEST) {
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(TEST);
+            _PyStackRef w;
+            _PyStackRef x;
+            _PyStackRef y;
+            // FIRST
+            w = stack_pointer[-1];
+            {
+                use(w);
+            }
+            // SECOND
+            x = w;
+            {
+            }
+            // THIRD
+            y = x;
+            {
+                use(y);
+            }
+            DISPATCH();
+        }
+        """
+        self.run_cases_test(input, output)
+
+    def test_unused_used_used(self):
+        input = """
+        op(FIRST, (w -- w)) {
+        }
+
+        op(SECOND, (x -- x)) {
+            use(x);
+        }
+
+        op(THIRD, (y -- y)) {
+            use(y);
+        }
+
+        macro(TEST) = FIRST + SECOND + THIRD;
+        """
+        output = """
+        TARGET(TEST) {
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(TEST);
+            _PyStackRef x;
+            _PyStackRef y;
+            // FIRST
+            {
+            }
+            // SECOND
+            x = stack_pointer[-1];
+            {
+                use(x);
+            }
+            // THIRD
+            y = x;
+            {
+                use(y);
+            }
+            DISPATCH();
+        }
+        """
+        self.run_cases_test(input, output)
+
+    def test_flush(self):
+        input = """
+        op(FIRST, ( -- a, b)) {
+            a = 0;
+            b = 1;
+        }
+
+        op(SECOND, (a, b -- )) {
+            use(a, b);
+        }
+
+        macro(TEST) = FIRST + flush + SECOND;
+        """
+        output = """
+        TARGET(TEST) {
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(TEST);
+            _PyStackRef a;
+            _PyStackRef b;
+            // FIRST
+            {
+                a = 0;
+                b = 1;
+            }
+            // flush
+            stack_pointer[0] = a;
+            stack_pointer[1] = b;
+            stack_pointer += 2;
+            assert(WITHIN_STACK_BOUNDS());
+            // SECOND
+            b = stack_pointer[-1];
+            a = stack_pointer[-2];
+            {
+                use(a, b);
+            }
+            stack_pointer += -2;
+            assert(WITHIN_STACK_BOUNDS());
+            DISPATCH();
+        }
+        """
+        self.run_cases_test(input, output)
+
 
 class TestGeneratedAbstractCases(unittest.TestCase):
     def setUp(self) -> None:
@@ -956,7 +1100,6 @@ class TestGeneratedAbstractCases(unittest.TestCase):
         case OP: {
             _Py_UopsSymbol *arg1;
             _Py_UopsSymbol *out;
-            arg1 = stack_pointer[-1];
             eggs();
             stack_pointer[-1] = out;
             break;
@@ -996,7 +1139,6 @@ class TestGeneratedAbstractCases(unittest.TestCase):
         case OP2: {
             _Py_UopsSymbol *arg1;
             _Py_UopsSymbol *out;
-            arg1 = stack_pointer[-1];
             stack_pointer[-1] = out;
             break;
         }
index 84241c64ffae88cd4364020db573e9cde2f78906..142f97daeb08206f7fe222c3afc4738573f7bb98 100644 (file)
@@ -3448,7 +3448,7 @@ dummy_func(
             }
         }
 
-        op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
+        op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
             PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             EXIT_IF(!PyFunction_Check(callable_o));
             PyFunctionObject *func = (PyFunctionObject *)callable_o;
@@ -3479,7 +3479,6 @@ dummy_func(
             assert(PyStackRef_IsNull(null));
             assert(Py_TYPE(callable_o) == &PyMethod_Type);
             self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);
-            stack_pointer[-1 - oparg] = self;  // Patch stack as it is used by _PY_FRAME_GENERAL
             method = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);
             assert(PyFunction_Check(PyStackRef_AsPyObjectBorrow(method)));
             PyStackRef_CLOSE(callable);
@@ -3490,6 +3489,7 @@ dummy_func(
             _CHECK_PEP_523 +
             _CHECK_METHOD_VERSION +
             _EXPAND_METHOD +
+            flush + // so that self is in the argument array
             _PY_FRAME_GENERAL +
             _SAVE_RETURN_OFFSET +
             _PUSH_FRAME;
@@ -3544,16 +3544,12 @@ dummy_func(
             EXIT_IF(Py_TYPE(PyStackRef_AsPyObjectBorrow(callable)) != &PyMethod_Type);
         }
 
-        op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) {
+        op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- func, self, unused[oparg])) {
             PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             STAT_INC(CALL, hit);
-            stack_pointer[-1 - oparg] = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);  // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS
-            stack_pointer[-2 - oparg] = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);  // This is used by CALL, upon deoptimization
-            self = stack_pointer[-1 - oparg];
-            func = stack_pointer[-2 - oparg];
+            self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);
+            func = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);
             PyStackRef_CLOSE(callable);
-            // self may be unused in tier 1, so silence warnings.
-            (void)self;
         }
 
         op(_CHECK_PEP_523, (--)) {
@@ -3568,7 +3564,7 @@ dummy_func(
             EXIT_IF(code->co_argcount != oparg + (!PyStackRef_IsNull(self_or_null)));
         }
 
-        op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
+        op(_CHECK_STACK_SPACE, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
             PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             PyFunctionObject *func = (PyFunctionObject *)callable_o;
             PyCodeObject *code = (PyCodeObject *)func->func_code;
@@ -3609,6 +3605,7 @@ dummy_func(
             _CHECK_PEP_523 +
             _CHECK_CALL_BOUND_METHOD_EXACT_ARGS +
             _INIT_CALL_BOUND_METHOD_EXACT_ARGS +
+            flush + // In case the following deopt
             _CHECK_FUNCTION_VERSION +
             _CHECK_FUNCTION_EXACT_ARGS +
             _CHECK_STACK_SPACE +
index 8f6bc75b528d9bb04bfac44747be1492107b4d11..375e1fb05840a4abf742d6e80c62e6419a7c2563 100644 (file)
             assert(PyStackRef_IsNull(null));
             assert(Py_TYPE(callable_o) == &PyMethod_Type);
             self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);
-            stack_pointer[-1 - oparg] = self;  // Patch stack as it is used by _PY_FRAME_GENERAL
             method = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);
             assert(PyFunction_Check(PyStackRef_AsPyObjectBorrow(method)));
             PyStackRef_CLOSE(callable);
             callable = stack_pointer[-2 - oparg];
             PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             STAT_INC(CALL, hit);
-            stack_pointer[-1 - oparg] = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);  // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS
-            stack_pointer[-2 - oparg] = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);  // This is used by CALL, upon deoptimization
-            self = stack_pointer[-1 - oparg];
-            func = stack_pointer[-2 - oparg];
+            self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);
+            func = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);
             PyStackRef_CLOSE(callable);
-            // self may be unused in tier 1, so silence warnings.
-            (void)self;
             stack_pointer[-2 - oparg] = func;
             stack_pointer[-1 - oparg] = self;
             break;
index 61057221291c0a4cf113bbfb6bac6ffa4675021a..382288440a7a2c34de22f7029ee1a479fade6d29 100644 (file)
@@ -16,8 +16,8 @@
             PREDICTED(BINARY_OP);
             _Py_CODEUNIT *this_instr = next_instr - 2;
             (void)this_instr;
-            _PyStackRef rhs;
             _PyStackRef lhs;
+            _PyStackRef rhs;
             _PyStackRef res;
             // _SPECIALIZE_BINARY_OP
             rhs = stack_pointer[-1];
@@ -59,8 +59,8 @@
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_ADD_FLOAT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_FLOAT
             right = stack_pointer[-1];
@@ -95,8 +95,8 @@
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_ADD_INT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_INT
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_ADD_UNICODE);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_UNICODE
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_INPLACE_ADD_UNICODE);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             // _GUARD_BOTH_UNICODE
             right = stack_pointer[-1];
             left = stack_pointer[-2];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_MULTIPLY_FLOAT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_FLOAT
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_MULTIPLY_INT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_INT
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_SUBTRACT_FLOAT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_FLOAT
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_OP_SUBTRACT_INT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_INT
             right = stack_pointer[-1];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(BINARY_SLICE);
-            _PyStackRef stop;
-            _PyStackRef start;
             _PyStackRef container;
+            _PyStackRef start;
+            _PyStackRef stop;
             _PyStackRef res;
             stop = stack_pointer[-1];
             start = stack_pointer[-2];
             PREDICTED(BINARY_SUBSCR);
             _Py_CODEUNIT *this_instr = next_instr - 2;
             (void)this_instr;
-            _PyStackRef sub;
             _PyStackRef container;
+            _PyStackRef sub;
             _PyStackRef res;
             // _SPECIALIZE_BINARY_SUBSCR
             sub = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_SUBSCR_DICT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
-            _PyStackRef sub_st;
             _PyStackRef dict_st;
+            _PyStackRef sub_st;
             _PyStackRef res;
             /* Skip 1 cache entry */
             sub_st = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
-            _PyStackRef sub_st;
             _PyStackRef container_st;
+            _PyStackRef sub_st;
             /* Skip 1 cache entry */
             sub_st = stack_pointer[-1];
             container_st = stack_pointer[-2];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_SUBSCR_LIST_INT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
-            _PyStackRef sub_st;
             _PyStackRef list_st;
+            _PyStackRef sub_st;
             _PyStackRef res;
             /* Skip 1 cache entry */
             sub_st = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_SUBSCR_STR_INT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
-            _PyStackRef sub_st;
             _PyStackRef str_st;
+            _PyStackRef sub_st;
             _PyStackRef res;
             /* Skip 1 cache entry */
             sub_st = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(BINARY_SUBSCR_TUPLE_INT);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
-            _PyStackRef sub_st;
             _PyStackRef tuple_st;
+            _PyStackRef sub_st;
             _PyStackRef res;
             /* Skip 1 cache entry */
             sub_st = stack_pointer[-1];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(BUILD_CONST_KEY_MAP);
-            _PyStackRef keys;
             _PyStackRef *values;
+            _PyStackRef keys;
             _PyStackRef map;
             keys = stack_pointer[-1];
             values = &stack_pointer[-1 - oparg];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(BUILD_SLICE);
-            _PyStackRef step = PyStackRef_NULL;
-            _PyStackRef stop;
             _PyStackRef start;
+            _PyStackRef stop;
+            _PyStackRef step = PyStackRef_NULL;
             _PyStackRef slice;
             if (oparg == 3) { step = stack_pointer[-((oparg == 3) ? 1 : 0)]; }
             stop = stack_pointer[-1 - ((oparg == 3) ? 1 : 0)];
             PREDICTED(CALL);
             _Py_CODEUNIT *this_instr = next_instr - 4;
             (void)this_instr;
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             // _SPECIALIZE_CALL
-            args = &stack_pointer[-oparg];
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
             {
+                args = &stack_pointer[-oparg];
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
             next_instr += 4;
             INSTRUCTION_STATS(CALL_ALLOC_AND_ENTER_INIT);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef null;
             _PyStackRef callable;
+            _PyStackRef null;
+            _PyStackRef *args;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             args = &stack_pointer[-oparg];
             next_instr += 4;
             INSTRUCTION_STATS(CALL_BOUND_METHOD_EXACT_ARGS);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef null;
             _PyStackRef callable;
+            _PyStackRef null;
             _PyStackRef func;
             _PyStackRef self;
             _PyStackRef self_or_null;
             {
                 PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 STAT_INC(CALL, hit);
-                stack_pointer[-1 - oparg] = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);  // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS
-                stack_pointer[-2 - oparg] = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);  // This is used by CALL, upon deoptimization
-                self = stack_pointer[-1 - oparg];
-                func = stack_pointer[-2 - oparg];
+                self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);
+                func = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);
                 PyStackRef_CLOSE(callable);
-                // self may be unused in tier 1, so silence warnings.
-                (void)self;
             }
+            // flush
+            stack_pointer[-2 - oparg] = func;
+            stack_pointer[-1 - oparg] = self;
             // _CHECK_FUNCTION_VERSION
-            callable = func;
+            callable = stack_pointer[-2 - oparg];
             {
                 uint32_t func_version = read_u32(&this_instr[2].cache);
                 PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             }
             // _INIT_CALL_PY_EXACT_ARGS
             args = &stack_pointer[-oparg];
-            self_or_null = stack_pointer[-1 - oparg];
             {
                 PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 int has_self = !PyStackRef_IsNull(self_or_null);
             next_instr += 4;
             INSTRUCTION_STATS(CALL_BOUND_METHOD_GENERAL);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef null;
             _PyStackRef callable;
+            _PyStackRef null;
             _PyStackRef method;
             _PyStackRef self;
-            _PyStackRef *args;
             _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyInterpreterFrame *new_frame;
             /* Skip 1 cache entry */
             // _CHECK_PEP_523
                 assert(PyStackRef_IsNull(null));
                 assert(Py_TYPE(callable_o) == &PyMethod_Type);
                 self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self);
-                stack_pointer[-1 - oparg] = self;  // Patch stack as it is used by _PY_FRAME_GENERAL
                 method = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func);
                 assert(PyFunction_Check(PyStackRef_AsPyObjectBorrow(method)));
                 PyStackRef_CLOSE(callable);
             }
+            // flush
+            stack_pointer[-2 - oparg] = method;
+            stack_pointer[-1 - oparg] = self;
             // _PY_FRAME_GENERAL
             args = &stack_pointer[-oparg];
-            self_or_null = self;
-            callable = method;
+            self_or_null = stack_pointer[-1 - oparg];
+            callable = stack_pointer[-2 - oparg];
             {
                 PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null);
             next_instr += 4;
             INSTRUCTION_STATS(CALL_BUILTIN_CLASS);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_BUILTIN_FAST);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_BUILTIN_FAST_WITH_KEYWORDS);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_BUILTIN_O);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             PREDICTED(CALL_FUNCTION_EX);
             _Py_CODEUNIT *this_instr = next_instr - 1;
             (void)this_instr;
-            _PyStackRef kwargs_st = PyStackRef_NULL;
-            _PyStackRef callargs_st;
             _PyStackRef func_st;
+            _PyStackRef callargs_st;
+            _PyStackRef kwargs_st = PyStackRef_NULL;
             _PyStackRef result;
             if (oparg & 1) { kwargs_st = stack_pointer[-(oparg & 1)]; }
             callargs_st = stack_pointer[-1 - (oparg & 1)];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(CALL_INTRINSIC_2);
-            _PyStackRef value1_st;
             _PyStackRef value2_st;
+            _PyStackRef value1_st;
             _PyStackRef res;
             value1_st = stack_pointer[-1];
             value2_st = stack_pointer[-2];
             next_instr += 4;
             INSTRUCTION_STATS(CALL_ISINSTANCE);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             PREDICTED(CALL_KW);
             _Py_CODEUNIT *this_instr = next_instr - 1;
             (void)this_instr;
-            _PyStackRef kwnames;
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
+            _PyStackRef kwnames;
             _PyStackRef res;
             kwnames = stack_pointer[-1];
             args = &stack_pointer[-1 - oparg];
             next_instr += 4;
             INSTRUCTION_STATS(CALL_LEN);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_LIST_APPEND);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef arg;
-            _PyStackRef self;
             _PyStackRef callable;
+            _PyStackRef self;
+            _PyStackRef arg;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             arg = stack_pointer[-1];
             next_instr += 4;
             INSTRUCTION_STATS(CALL_METHOD_DESCRIPTOR_FAST);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_METHOD_DESCRIPTOR_NOARGS);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_METHOD_DESCRIPTOR_O);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef *args;
-            _PyStackRef self_or_null;
             _PyStackRef callable;
+            _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             INSTRUCTION_STATS(CALL_NON_PY_GENERAL);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
             _PyStackRef callable;
-            _PyStackRef *args;
             _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             }
             // _INIT_CALL_PY_EXACT_ARGS
             args = &stack_pointer[-oparg];
-            self_or_null = stack_pointer[-1 - oparg];
             {
                 PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 int has_self = !PyStackRef_IsNull(self_or_null);
             INSTRUCTION_STATS(CALL_PY_GENERAL);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
             _PyStackRef callable;
-            _PyStackRef *args;
             _PyStackRef self_or_null;
+            _PyStackRef *args;
             _PyInterpreterFrame *new_frame;
             /* Skip 1 cache entry */
             // _CHECK_PEP_523
             next_instr += 4;
             INSTRUCTION_STATS(CALL_STR_1);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef arg;
-            _PyStackRef null;
             _PyStackRef callable;
+            _PyStackRef null;
+            _PyStackRef arg;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_TUPLE_1);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef arg;
-            _PyStackRef null;
             _PyStackRef callable;
+            _PyStackRef null;
+            _PyStackRef arg;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             next_instr += 4;
             INSTRUCTION_STATS(CALL_TYPE_1);
             static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
-            _PyStackRef arg;
-            _PyStackRef null;
             _PyStackRef callable;
+            _PyStackRef null;
+            _PyStackRef arg;
             _PyStackRef res;
             /* Skip 1 cache entry */
             /* Skip 2 cache entries */
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(CHECK_EG_MATCH);
-            _PyStackRef match_type_st;
             _PyStackRef exc_value_st;
+            _PyStackRef match_type_st;
             _PyStackRef rest;
             _PyStackRef match;
             match_type_st = stack_pointer[-1];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(CHECK_EXC_MATCH);
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef b;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
             (void)this_instr;
             next_instr += 1;
             INSTRUCTION_STATS(CLEANUP_THROW);
-            _PyStackRef exc_value_st;
-            _PyStackRef last_sent_val_st;
             _PyStackRef sub_iter_st;
+            _PyStackRef last_sent_val_st;
+            _PyStackRef exc_value_st;
             _PyStackRef none;
             _PyStackRef value;
             exc_value_st = stack_pointer[-1];
             PREDICTED(COMPARE_OP);
             _Py_CODEUNIT *this_instr = next_instr - 2;
             (void)this_instr;
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _SPECIALIZE_COMPARE_OP
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(COMPARE_OP_FLOAT);
             static_assert(INLINE_CACHE_ENTRIES_COMPARE_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_FLOAT
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(COMPARE_OP_INT);
             static_assert(INLINE_CACHE_ENTRIES_COMPARE_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_INT
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(COMPARE_OP_STR);
             static_assert(INLINE_CACHE_ENTRIES_COMPARE_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef res;
             // _GUARD_BOTH_UNICODE
             right = stack_pointer[-1];
             PREDICTED(CONTAINS_OP);
             _Py_CODEUNIT *this_instr = next_instr - 2;
             (void)this_instr;
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef b;
             // _SPECIALIZE_CONTAINS_OP
             right = stack_pointer[-1];
-            left = stack_pointer[-2];
             {
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _CONTAINS_OP
+            left = stack_pointer[-2];
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             next_instr += 2;
             INSTRUCTION_STATS(CONTAINS_OP_DICT);
             static_assert(INLINE_CACHE_ENTRIES_CONTAINS_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef b;
             /* Skip 1 cache entry */
             right = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(CONTAINS_OP_SET);
             static_assert(INLINE_CACHE_ENTRIES_CONTAINS_OP == 1, "incorrect cache size");
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef b;
             /* Skip 1 cache entry */
             right = stack_pointer[-1];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(DELETE_SUBSCR);
-            _PyStackRef sub;
             _PyStackRef container;
+            _PyStackRef sub;
             sub = stack_pointer[-1];
             container = stack_pointer[-2];
             /* del container[sub] */
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(DICT_MERGE);
-            _PyStackRef update;
-            _PyStackRef dict;
             _PyStackRef callable;
+            _PyStackRef dict;
+            _PyStackRef update;
             update = stack_pointer[-1];
             dict = stack_pointer[-2 - (oparg - 1)];
             callable = stack_pointer[-5 - (oparg - 1)];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(DICT_UPDATE);
-            _PyStackRef update;
             _PyStackRef dict;
+            _PyStackRef update;
             update = stack_pointer[-1];
             dict = stack_pointer[-2 - (oparg - 1)];
             PyObject *dict_o = PyStackRef_AsPyObjectBorrow(dict);
             (void)this_instr;
             next_instr += 1;
             INSTRUCTION_STATS(END_ASYNC_FOR);
-            _PyStackRef exc_st;
             _PyStackRef awaitable_st;
+            _PyStackRef exc_st;
             exc_st = stack_pointer[-1];
             awaitable_st = stack_pointer[-2];
             PyObject *exc = PyStackRef_AsPyObjectBorrow(exc_st);
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(END_SEND);
-            _PyStackRef value;
             _PyStackRef receiver;
+            _PyStackRef value;
             value = stack_pointer[-1];
             receiver = stack_pointer[-2];
             (void)receiver;
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(FORMAT_WITH_SPEC);
-            _PyStackRef fmt_spec;
             _PyStackRef value;
+            _PyStackRef fmt_spec;
             _PyStackRef res;
             fmt_spec = stack_pointer[-1];
             value = stack_pointer[-2];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(IMPORT_NAME);
-            _PyStackRef fromlist;
             _PyStackRef level;
+            _PyStackRef fromlist;
             _PyStackRef res;
             fromlist = stack_pointer[-1];
             level = stack_pointer[-2];
             (void)this_instr;
             next_instr += 1;
             INSTRUCTION_STATS(INSTRUMENTED_END_FOR);
-            _PyStackRef value;
             _PyStackRef receiver;
+            _PyStackRef value;
             value = stack_pointer[-1];
             receiver = stack_pointer[-2];
             /* Need to create a fake StopIteration error here,
             (void)this_instr;
             next_instr += 1;
             INSTRUCTION_STATS(INSTRUMENTED_END_SEND);
-            _PyStackRef value;
             _PyStackRef receiver;
+            _PyStackRef value;
             value = stack_pointer[-1];
             receiver = stack_pointer[-2];
             PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver);
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(IS_OP);
-            _PyStackRef right;
             _PyStackRef left;
+            _PyStackRef right;
             _PyStackRef b;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(LIST_APPEND);
-            _PyStackRef v;
             _PyStackRef list;
+            _PyStackRef v;
             v = stack_pointer[-1];
             list = stack_pointer[-2 - (oparg-1)];
             if (_PyList_AppendTakeRef((PyListObject *)PyStackRef_AsPyObjectBorrow(list),
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(LIST_EXTEND);
-            _PyStackRef iterable_st;
             _PyStackRef list_st;
+            _PyStackRef iterable_st;
             iterable_st = stack_pointer[-1];
             list_st = stack_pointer[-2 - (oparg-1)];
             PyObject *list = PyStackRef_AsPyObjectBorrow(list_st);
             PREDICTED(LOAD_SUPER_ATTR);
             _Py_CODEUNIT *this_instr = next_instr - 2;
             (void)this_instr;
-            _PyStackRef class_st;
             _PyStackRef global_super_st;
+            _PyStackRef class_st;
             _PyStackRef self_st;
             _PyStackRef attr;
             _PyStackRef null = PyStackRef_NULL;
             next_instr += 2;
             INSTRUCTION_STATS(LOAD_SUPER_ATTR_ATTR);
             static_assert(INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR == 1, "incorrect cache size");
-            _PyStackRef self_st;
-            _PyStackRef class_st;
             _PyStackRef global_super_st;
+            _PyStackRef class_st;
+            _PyStackRef self_st;
             _PyStackRef attr_st;
             /* Skip 1 cache entry */
             self_st = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(LOAD_SUPER_ATTR_METHOD);
             static_assert(INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR == 1, "incorrect cache size");
-            _PyStackRef self_st;
-            _PyStackRef class_st;
             _PyStackRef global_super_st;
+            _PyStackRef class_st;
+            _PyStackRef self_st;
             _PyStackRef attr;
             _PyStackRef self_or_null;
             /* Skip 1 cache entry */
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(MAP_ADD);
-            _PyStackRef value;
-            _PyStackRef key;
             _PyStackRef dict_st;
+            _PyStackRef key;
+            _PyStackRef value;
             value = stack_pointer[-1];
             key = stack_pointer[-2];
             dict_st = stack_pointer[-3 - (oparg - 1)];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(MATCH_CLASS);
-            _PyStackRef names;
-            _PyStackRef type;
             _PyStackRef subject;
+            _PyStackRef type;
+            _PyStackRef names;
             _PyStackRef attrs;
             names = stack_pointer[-1];
             type = stack_pointer[-2];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(MATCH_KEYS);
-            _PyStackRef keys;
             _PyStackRef subject;
+            _PyStackRef keys;
             _PyStackRef values_or_none;
             keys = stack_pointer[-1];
             subject = stack_pointer[-2];
             (void)this_instr;
             next_instr += 1;
             INSTRUCTION_STATS(RERAISE);
-            _PyStackRef exc_st;
             _PyStackRef *values;
+            _PyStackRef exc_st;
             exc_st = stack_pointer[-1];
             values = &stack_pointer[-1 - oparg];
             PyObject *exc = PyStackRef_AsPyObjectBorrow(exc_st);
             next_instr += 2;
             INSTRUCTION_STATS(SEND_GEN);
             static_assert(INLINE_CACHE_ENTRIES_SEND == 1, "incorrect cache size");
-            _PyStackRef v;
             _PyStackRef receiver;
+            _PyStackRef v;
             /* Skip 1 cache entry */
             v = stack_pointer[-1];
             receiver = stack_pointer[-2];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(SET_ADD);
-            _PyStackRef v;
             _PyStackRef set;
+            _PyStackRef v;
             v = stack_pointer[-1];
             set = stack_pointer[-2 - (oparg-1)];
             int err = PySet_Add(PyStackRef_AsPyObjectBorrow(set),
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(SET_FUNCTION_ATTRIBUTE);
-            _PyStackRef func_st;
             _PyStackRef attr_st;
+            _PyStackRef func_st;
             func_st = stack_pointer[-1];
             attr_st = stack_pointer[-2];
             PyObject *func = PyStackRef_AsPyObjectBorrow(func_st);
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(SET_UPDATE);
-            _PyStackRef iterable;
             _PyStackRef set;
+            _PyStackRef iterable;
             iterable = stack_pointer[-1];
             set = stack_pointer[-2 - (oparg-1)];
             int err = _PySet_Update(PyStackRef_AsPyObjectBorrow(set),
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(STORE_FAST_STORE_FAST);
-            _PyStackRef value1;
             _PyStackRef value2;
+            _PyStackRef value1;
             value1 = stack_pointer[-1];
             value2 = stack_pointer[-2];
             uint32_t oparg1 = oparg >> 4;
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(STORE_SLICE);
-            _PyStackRef stop;
-            _PyStackRef start;
-            _PyStackRef container;
             _PyStackRef v;
+            _PyStackRef container;
+            _PyStackRef start;
+            _PyStackRef stop;
             stop = stack_pointer[-1];
             start = stack_pointer[-2];
             container = stack_pointer[-3];
             PREDICTED(STORE_SUBSCR);
             _Py_CODEUNIT *this_instr = next_instr - 2;
             (void)this_instr;
-            _PyStackRef sub;
             _PyStackRef container;
+            _PyStackRef sub;
             _PyStackRef v;
             // _SPECIALIZE_STORE_SUBSCR
             sub = stack_pointer[-1];
             next_instr += 2;
             INSTRUCTION_STATS(STORE_SUBSCR_DICT);
             static_assert(INLINE_CACHE_ENTRIES_STORE_SUBSCR == 1, "incorrect cache size");
-            _PyStackRef sub_st;
-            _PyStackRef dict_st;
             _PyStackRef value;
+            _PyStackRef dict_st;
+            _PyStackRef sub_st;
             /* Skip 1 cache entry */
             sub_st = stack_pointer[-1];
             dict_st = stack_pointer[-2];
             next_instr += 2;
             INSTRUCTION_STATS(STORE_SUBSCR_LIST_INT);
             static_assert(INLINE_CACHE_ENTRIES_STORE_SUBSCR == 1, "incorrect cache size");
-            _PyStackRef sub_st;
-            _PyStackRef list_st;
             _PyStackRef value;
+            _PyStackRef list_st;
+            _PyStackRef sub_st;
             /* Skip 1 cache entry */
             sub_st = stack_pointer[-1];
             list_st = stack_pointer[-2];
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(SWAP);
-            _PyStackRef top;
             _PyStackRef bottom;
+            _PyStackRef top;
             top = stack_pointer[-1];
             bottom = stack_pointer[-2 - (oparg-2)];
             assert(oparg >= 2);
             frame->instr_ptr = next_instr;
             next_instr += 1;
             INSTRUCTION_STATS(WITH_EXCEPT_START);
-            _PyStackRef val;
-            _PyStackRef lasti;
-            _PyStackRef exit_self;
             _PyStackRef exit_func;
+            _PyStackRef exit_self;
+            _PyStackRef lasti;
+            _PyStackRef val;
             _PyStackRef res;
             val = stack_pointer[-1];
             lasti = stack_pointer[-3];
index ec365bad3992d57eb7559cc34e2b921e81f683ea..c6f044e066539efda83baddac1a00992167725fb 100644 (file)
@@ -78,7 +78,7 @@ SKIP_PROPERTIES = Properties(
     uses_locals=False,
     has_free=False,
     side_exit=False,
-    pure=False,
+    pure=True,
 )
 
 
@@ -96,6 +96,20 @@ class Skip:
         return SKIP_PROPERTIES
 
 
+class Flush:
+
+    @property
+    def properties(self) -> Properties:
+        return SKIP_PROPERTIES
+
+    @property
+    def name(self) -> str:
+        return "flush"
+
+    @property
+    def size(self) -> int:
+        return 0
+
 @dataclass
 class StackItem:
     name: str
@@ -103,6 +117,7 @@ class StackItem:
     condition: str | None
     size: str
     peek: bool = False
+    used: bool = False
 
     def __str__(self) -> str:
         cond = f" if ({self.condition})" if self.condition else ""
@@ -133,7 +148,6 @@ class CacheEntry:
     def __str__(self) -> str:
         return f"{self.name}/{self.size}"
 
-
 @dataclass
 class Uop:
     name: str
@@ -195,7 +209,7 @@ class Uop:
         return False
 
 
-Part = Uop | Skip
+Part = Uop | Skip | Flush
 
 
 @dataclass
@@ -306,6 +320,16 @@ def analyze_stack(op: parser.InstDef | parser.Pseudo, replace_op_arg_1: str | No
     for input, output in zip(inputs, outputs):
         if input.name == output.name:
             input.peek = output.peek = True
+    if isinstance(op, parser.InstDef):
+        output_names = [out.name for out in outputs]
+        for input in inputs:
+            if (variable_used(op, input.name) or
+                variable_used(op, "DECREF_INPUTS") or
+                (not input.peek and input.name in output_names)):
+                input.used = True
+        for output in outputs:
+            if variable_used(op, output.name):
+                output.used = True
     return StackEffect(inputs, outputs)
 
 
@@ -324,7 +348,13 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]:
 def variable_used(node: parser.InstDef, name: str) -> bool:
     """Determine whether a variable with a given name is used in a node."""
     return any(
-        token.kind == "IDENTIFIER" and token.text == name for token in node.tokens
+        token.kind == "IDENTIFIER" and token.text == name for token in node.block.tokens
+    )
+
+def oparg_used(node: parser.InstDef) -> bool:
+    """Determine whether `oparg` is used in a node."""
+    return any(
+        token.kind == "IDENTIFIER" and token.text == "oparg" for token in node.tokens
     )
 
 def tier_variable(node: parser.InstDef) -> int | None:
@@ -570,7 +600,7 @@ def compute_properties(op: parser.InstDef) -> Properties:
         error_without_pop=error_without_pop,
         deopts=deopts_if,
         side_exit=exits_if,
-        oparg=variable_used(op, "oparg"),
+        oparg=oparg_used(op),
         jumps=variable_used(op, "JUMPBY"),
         eval_breaker=variable_used(op, "CHECK_EVAL_BREAKER"),
         ends_with_eval_breaker=eval_breaker_at_end(op),
@@ -689,13 +719,16 @@ def desugar_inst(
 def add_macro(
     macro: parser.Macro, instructions: dict[str, Instruction], uops: dict[str, Uop]
 ) -> None:
-    parts: list[Uop | Skip] = []
+    parts: list[Part] = []
     for part in macro.uops:
         match part:
             case parser.OpName():
-                if part.name not in uops:
-                    analysis_error(f"No Uop named {part.name}", macro.tokens[0])
-                parts.append(uops[part.name])
+                if part.name == "flush":
+                    parts.append(Flush())
+                else:
+                    if part.name not in uops:
+                        raise analysis_error(f"No Uop named {part.name}", macro.tokens[0])
+                    parts.append(uops[part.name])
             case parser.CacheEffect():
                 parts.append(Skip(part.size))
             case _:
index 277521678434c207ca5b82f8b05d9b1a7b7a523e..6a66693b93305d278de1502b9978eaf249ccd650 100644 (file)
@@ -23,7 +23,7 @@ from generators_common import (
 from cwriter import CWriter
 from typing import TextIO, Iterator
 from lexer import Token
-from stack import Stack, SizeMismatch
+from stack import Stack, StackError
 
 DEFAULT_OUTPUT = ROOT / "Python/optimizer_cases.c.h"
 DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/optimizer_bytecodes.c").absolute().as_posix()
@@ -141,7 +141,7 @@ def write_uop(
                     out.emit(stack.push(var))
         out.start_line()
         stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=True)
-    except SizeMismatch as ex:
+    except StackError as ex:
         raise analysis_error(ex.args[0], uop.body[0])
 
 
index ebe62df537f15f8feccf552eb89e982d674a78de..f497fa34dfced4d0d68a983e22fd9eb1b55bd022 100644 (file)
@@ -114,7 +114,7 @@ class StackOffset:
         self.pushed = []
 
 
-class SizeMismatch(Exception):
+class StackError(Exception):
     pass
 
 
@@ -134,27 +134,29 @@ class Stack:
         if self.variables:
             popped = self.variables.pop()
             if popped.size != var.size:
-                raise SizeMismatch(
+                raise StackError(
                     f"Size mismatch when popping '{popped.name}' from stack to assign to {var.name}. "
                     f"Expected {var.size} got {popped.size}"
                 )
-            if popped.name == var.name:
+            if var.name in UNUSED:
+                if popped.name not in UNUSED and popped.name in self.defined:
+                    raise StackError(f"Value is declared unused, but is already cached by prior operation")
                 return ""
-            elif popped.name in UNUSED:
+            if popped.name in UNUSED or popped.name not in self.defined:
                 self.defined.add(var.name)
                 return (
                     f"{var.name} = {indirect}stack_pointer[{self.top_offset.to_c()}];\n"
                 )
-            elif var.name in UNUSED:
-                return ""
             else:
                 self.defined.add(var.name)
-                return f"{var.name} = {popped.name};\n"
+                if popped.name == var.name:
+                    return ""
+                else:
+                    return f"{var.name} = {popped.name};\n"
         self.base_offset.pop(var)
-        if var.name in UNUSED:
+        if var.name in UNUSED or not var.used:
             return ""
-        else:
-            self.defined.add(var.name)
+        self.defined.add(var.name)
         cast = f"({var.type})" if (not indirect and var.type) else ""
         bits = ".bits" if cast and not extract_bits else ""
         assign = (
@@ -178,6 +180,8 @@ class Stack:
             return f"{var.name} = &stack_pointer[{c_offset}];\n"
         else:
             self.top_offset.push(var)
+            if var.used:
+                self.defined.add(var.name)
             return ""
 
     def flush(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
index 85be673b1c396c6c792194120de1a1699ab25e37..5dec66e8e0af156218ea5c2fc5340c86a26c5115 100644 (file)
@@ -12,6 +12,7 @@ from analyzer import (
     Part,
     analyze_files,
     Skip,
+    Flush,
     analysis_error,
     StackItem,
 )
@@ -24,7 +25,7 @@ from generators_common import (
 )
 from cwriter import CWriter
 from typing import TextIO
-from stack import Stack, SizeMismatch
+from stack import Stack, StackError
 
 
 DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h"
@@ -32,30 +33,39 @@ DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h"
 
 FOOTER = "#undef TIER_ONE\n"
 
+def declare_variable(var: StackItem, out: CWriter) -> None:
+    type, null = type_and_null(var)
+    space = " " if type[-1].isalnum() else ""
+    if var.condition:
+        out.emit(f"{type}{space}{var.name} = {null};\n")
+    else:
+        out.emit(f"{type}{space}{var.name};\n")
 
-def declare_variables(inst: Instruction, out: CWriter) -> None:
-    variables = {"unused"}
-    for uop in inst.parts:
-        if isinstance(uop, Uop):
-            for var in reversed(uop.stack.inputs):
-                if var.name not in variables:
-                    variables.add(var.name)
-                    type, null = type_and_null(var)
-                    space = " " if type[-1].isalnum() else ""
-                    if var.condition:
-                        out.emit(f"{type}{space}{var.name} = {null};\n")
-                    else:
-                        out.emit(f"{type}{space}{var.name};\n")
-            for var in uop.stack.outputs:
-                if var.name not in variables:
-                    variables.add(var.name)
-                    type, null = type_and_null(var)
-                    space = " " if type[-1].isalnum() else ""
-                    if var.condition:
-                        out.emit(f"{type}{space}{var.name} = {null};\n")
-                    else:
-                        out.emit(f"{type}{space}{var.name};\n")
 
+def declare_variables(inst: Instruction, out: CWriter) -> None:
+    stack = Stack()
+    for part in inst.parts:
+        if not isinstance(part, Uop):
+            continue
+        try:
+            for var in reversed(part.stack.inputs):
+                stack.pop(var)
+            for var in part.stack.outputs:
+                 stack.push(var)
+        except StackError as ex:
+            raise analysis_error(ex.args[0], part.body[0]) from None
+    required = set(stack.defined)
+    for part in inst.parts:
+        if not isinstance(part, Uop):
+            continue
+        for var in part.stack.inputs:
+            if var.name in required:
+                required.remove(var.name)
+                declare_variable(var, out)
+        for var in part.stack.outputs:
+            if var.name in required:
+                required.remove(var.name)
+                declare_variable(var, out)
 
 def write_uop(
     uop: Part, out: CWriter, offset: int, stack: Stack, inst: Instruction, braces: bool
@@ -65,6 +75,10 @@ def write_uop(
         entries = "entries" if uop.size > 1 else "entry"
         out.emit(f"/* Skip {uop.size} cache {entries} */\n")
         return offset + uop.size
+    if isinstance(uop, Flush):
+        out.emit(f"// flush\n")
+        stack.flush(out)
+        return offset
     try:
         out.start_line()
         if braces:
@@ -99,15 +113,15 @@ def write_uop(
             out.emit("}\n")
         # out.emit(stack.as_comment() + "\n")
         return offset
-    except SizeMismatch as ex:
-        raise analysis_error(ex.args[0], uop.body[0])
+    except StackError as ex:
+        raise analysis_error(ex.args[0], uop.body[0]) from None
 
 
 def uses_this(inst: Instruction) -> bool:
     if inst.properties.needs_this:
         return True
     for uop in inst.parts:
-        if isinstance(uop, Skip):
+        if not isinstance(uop, Uop):
             continue
         for cache in uop.caches:
             if cache.name != "unused":
index 7a69aa6e121fa7a245ca5a0f476c0ffcceef9cee..88ad0fd797f0ccacbbb81ea9c6f522cf7f9171f3 100644 (file)
@@ -25,17 +25,17 @@ from generators_common import (
 from cwriter import CWriter
 from typing import TextIO, Iterator
 from lexer import Token
-from stack import Stack, SizeMismatch
+from stack import Stack, StackError
 
 DEFAULT_OUTPUT = ROOT / "Python/executor_cases.c.h"
 
 
 def declare_variable(
-    var: StackItem, uop: Uop, variables: set[str], out: CWriter
+    var: StackItem, uop: Uop, required: set[str], out: CWriter
 ) -> None:
-    if var.name in variables:
+    if var.name not in required:
         return
-    variables.add(var.name)
+    required.remove(var.name)
     type, null = type_and_null(var)
     space = " " if type[-1].isalnum() else ""
     if var.condition:
@@ -49,12 +49,16 @@ def declare_variable(
 
 
 def declare_variables(uop: Uop, out: CWriter) -> None:
-    variables = {"unused"}
+    stack = Stack()
     for var in reversed(uop.stack.inputs):
-        declare_variable(var, uop, variables, out)
+        stack.pop(var)
     for var in uop.stack.outputs:
-        declare_variable(var, uop, variables, out)
-
+            stack.push(var)
+    required = set(stack.defined)
+    for var in reversed(uop.stack.inputs):
+        declare_variable(var, uop, required, out)
+    for var in uop.stack.outputs:
+        declare_variable(var, uop, required, out)
 
 def tier2_replace_error(
     out: CWriter,
@@ -177,8 +181,8 @@ def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None:
         if uop.properties.stores_sp:
             for i, var in enumerate(uop.stack.outputs):
                 out.emit(stack.push(var))
-    except SizeMismatch as ex:
-        raise analysis_error(ex.args[0], uop.body[0])
+    except StackError as ex:
+        raise analysis_error(ex.args[0], uop.body[0]) from None
 
 
 SKIPS = ("_EXTENDED_ARG",)