]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-132732: Automatically constant evaluate pure operations (GH-132733)
authorKen Jin <kenjin@python.org>
Fri, 27 Jun 2025 11:37:44 +0000 (19:37 +0800)
committerGitHub <noreply@github.com>
Fri, 27 Jun 2025 11:37:44 +0000 (19:37 +0800)
This adds a "macro" to the optimizer DSL called "REPLACE_OPCODE_IF_EVALUATES_PURE", which allows automatically constant evaluating a bytecode body if certain inputs have no side effects upon evaluations (such as ints, strings, and floats).

Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
Include/internal/pycore_optimizer.h
Include/internal/pycore_uop_metadata.h
Lib/test/test_generated_cases.py
Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst [new file with mode: 0644]
Python/bytecodes.c
Python/optimizer_analysis.c
Python/optimizer_bytecodes.c
Python/optimizer_cases.c.h
Python/optimizer_symbols.c
Tools/cases_generator/optimizer_generator.py

index 576c27947824b4a498a83f3f4f661fe2459c3e87..8b7f12bf03d6242fc5f505031a6c088e2b4fe77d 100644 (file)
@@ -10,7 +10,7 @@ extern "C" {
 
 #include "pycore_typedefs.h"      // _PyInterpreterFrame
 #include "pycore_uop_ids.h"
-#include "pycore_stackref.h"
+#include "pycore_stackref.h"      // _PyStackRef
 #include <stdbool.h>
 
 
@@ -316,6 +316,9 @@ extern JitOptRef _Py_uop_sym_new_type(
     JitOptContext *ctx, PyTypeObject *typ);
 
 extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val);
+extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val);
+bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym);
+_PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym);
 extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx);
 extern bool _Py_uop_sym_has_type(JitOptRef sym);
 extern bool _Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ);
index 52cbc2fffe484eda2414904f587beab30106898e..ff7e800aa9bb1a899d645393bb6680d10902459c 100644 (file)
@@ -106,7 +106,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
-    [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG,
     [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
index 9e0fd1218f253418a55923f1f4c7931ce6394a05..eb01328b6ea946cdf36dde66afd79cd4eb1b1e06 100644 (file)
@@ -2224,5 +2224,202 @@ class TestGeneratedAbstractCases(unittest.TestCase):
                                     "Inputs must have equal sizes"):
             self.run_cases_test(input, input2, output)
 
+    def test_pure_uop_body_copied_in(self):
+        # Note: any non-escaping call works.
+        # In this case, we use PyStackRef_IsNone.
+        input = """
+        pure op(OP, (foo -- res)) {
+            res = PyStackRef_IsNone(foo);
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = sym_new_known(ctx, foo);
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                res_stackref = PyStackRef_IsNone(foo);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = sym_new_known(ctx, foo);
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+    def test_pure_uop_body_copied_in_deopt(self):
+        # Note: any non-escaping call works.
+        # In this case, we use PyStackRef_IsNone.
+        input = """
+        pure op(OP, (foo -- res)) {
+            DEOPT_IF(PyStackRef_IsNull(foo));
+            res = foo;
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = foo;
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                if (PyStackRef_IsNull(foo)) {
+                    ctx->done = true;
+                    break;
+                }
+                res_stackref = foo;
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = foo;
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+    def test_pure_uop_body_copied_in_error_if(self):
+        # Note: any non-escaping call works.
+        # In this case, we use PyStackRef_IsNone.
+        input = """
+        pure op(OP, (foo -- res)) {
+            ERROR_IF(PyStackRef_IsNull(foo));
+            res = foo;
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = foo;
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                if (PyStackRef_IsNull(foo)) {
+                    goto error;
+                }
+                res_stackref = foo;
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = foo;
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+
+    def test_replace_opcode_uop_body_copied_in_complex(self):
+        input = """
+        pure op(OP, (foo -- res)) {
+            if (foo) {
+                res = PyStackRef_IsNone(foo);
+            }
+            else {
+                res = 1;
+            }
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = sym_new_known(ctx, foo);
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                if (foo) {
+                    res_stackref = PyStackRef_IsNone(foo);
+                }
+                else {
+                    res_stackref = 1;
+                }
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = sym_new_known(ctx, foo);
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+    def test_replace_opocode_uop_reject_array_effects(self):
+        input = """
+        pure op(OP, (foo[2] -- res)) {
+            if (foo) {
+                res = PyStackRef_IsNone(foo);
+            }
+            else {
+                res = 1;
+            }
+        }
+        """
+        input2 = """
+        op(OP, (foo[2] -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = sym_new_unknown(ctx);
+        }
+        """
+        output = """
+        """
+        with self.assertRaisesRegex(SyntaxError,
+                                    "Pure evaluation cannot take array-like inputs"):
+            self.run_cases_test(input, input2, output)
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst
new file mode 100644 (file)
index 0000000..aadaf21
--- /dev/null
@@ -0,0 +1 @@
+Automatically constant evaluate bytecode operations marked as pure in the JIT optimizer.
index 535e552e047475b7cbb7b0f3e801b2cb38795eef..1a5a9ff13a23a56e0e42f94c90514a3f69afcb05 100644 (file)
@@ -850,7 +850,7 @@ dummy_func(
             DEOPT_IF(!res);
         }
 
-        pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) {
+       op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
index 145a8c118d3612e56ff3afede8058167b9b62a6a..fab6fef5ccda10b9c1b0ad8621d0db7086fc4c1a 100644 (file)
@@ -26,6 +26,8 @@
 #include "pycore_function.h"
 #include "pycore_uop_ids.h"
 #include "pycore_range.h"
+#include "pycore_unicodeobject.h"
+#include "pycore_ceval.h"
 
 #include <stdarg.h>
 #include <stdbool.h>
@@ -321,7 +323,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
 /* Shortened forms for convenience, used in optimizer_bytecodes.c */
 #define sym_is_not_null _Py_uop_sym_is_not_null
 #define sym_is_const _Py_uop_sym_is_const
+#define sym_is_safe_const _Py_uop_sym_is_safe_const
 #define sym_get_const _Py_uop_sym_get_const
+#define sym_new_const_steal _Py_uop_sym_new_const_steal
+#define sym_get_const_as_stackref _Py_uop_sym_get_const_as_stackref
 #define sym_new_unknown _Py_uop_sym_new_unknown
 #define sym_new_not_null _Py_uop_sym_new_not_null
 #define sym_new_type _Py_uop_sym_new_type
@@ -350,6 +355,8 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
 #define sym_new_compact_int _Py_uop_sym_new_compact_int
 #define sym_new_truthiness _Py_uop_sym_new_truthiness
 
+#define JUMP_TO_LABEL(label) goto label;
+
 static int
 optimize_to_bool(
     _PyUOpInstruction *this_instr,
index f8a0484bdc2b04b0a6d836b575a4093ee1e7441a..3182e8b3b70144d3bf60fb24a98f9e66e65d0f2b 100644 (file)
@@ -181,6 +181,7 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP, (lhs, rhs -- res)) {
+        REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs);
         bool lhs_int = sym_matches_type(lhs, &PyLong_Type);
         bool rhs_int = sym_matches_type(rhs, &PyLong_Type);
         bool lhs_float = sym_matches_type(lhs, &PyFloat_Type);
@@ -235,35 +236,23 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_ADD_INT, (left, right -- res)) {
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
         res = sym_new_compact_int(ctx);
     }
 
     op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) {
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
         res = sym_new_compact_int(ctx);
     }
 
     op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
         res = sym_new_compact_int(ctx);
     }
 
     op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-            assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyFloat_FromDouble(
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) +
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-            // TODO gh-115506:
-            // replace opcode with constant propagated one and update tests!
-        }
-        else {
-            res = sym_new_type(ctx, &PyFloat_Type);
-        }
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyFloat_Type);
         // TODO (gh-134584): Refactor this to use another uop
         if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
             REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
@@ -271,23 +260,8 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-            assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyFloat_FromDouble(
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) -
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-            // TODO gh-115506:
-            // replace opcode with constant propagated one and update tests!
-        }
-        else {
-            res = sym_new_type(ctx, &PyFloat_Type);
-        }
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyFloat_Type);
         // TODO (gh-134584): Refactor this to use another uop
         if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
             REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
@@ -295,23 +269,8 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-            assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyFloat_FromDouble(
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) *
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-            // TODO gh-115506:
-            // replace opcode with constant propagated one and update tests!
-        }
-        else {
-            res = sym_new_type(ctx, &PyFloat_Type);
-        }
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyFloat_Type);
         // TODO (gh-134584): Refactor this to use another uop
         if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
             REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
@@ -319,19 +278,8 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
-            assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-        }
-        else {
-            res = sym_new_type(ctx, &PyUnicode_Type);
-        }
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyUnicode_Type);
     }
 
     op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) {
@@ -443,6 +391,7 @@ dummy_func(void) {
     }
 
     op(_UNARY_NOT, (value -- res)) {
+        REPLACE_OPCODE_IF_EVALUATES_PURE(value);
         sym_set_type(value, &PyBool_Type);
         res = sym_new_truthiness(ctx, value, false);
     }
index 10767ccdbd57f5b845ab9891305af776d06006c0..8d30df3aa7d429d71a02541999a23032029f9fc5 100644 (file)
             JitOptRef value;
             JitOptRef res;
             value = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, value)
+            ) {
+                JitOptRef value_sym = value;
+                _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                assert(PyStackRef_BoolCheck(value));
+                res_stackref = PyStackRef_IsFalse(value)
+                ? PyStackRef_True : PyStackRef_False;
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
             sym_set_type(value, &PyBool_Type);
             res = sym_new_truthiness(ctx, value, false);
             stack_pointer[-1] = res;
         }
 
         case _BINARY_OP_MULTIPLY_INT: {
+            JitOptRef right;
+            JitOptRef left;
             JitOptRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res_stackref = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res_stackref )) {
+                    ctx->done = true;
+                    break;
+                }
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                break;
+            }
             res = sym_new_compact_int(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
         }
 
         case _BINARY_OP_ADD_INT: {
+            JitOptRef right;
+            JitOptRef left;
             JitOptRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res_stackref = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res_stackref )) {
+                    ctx->done = true;
+                    break;
+                }
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                break;
+            }
             res = sym_new_compact_int(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
         }
 
         case _BINARY_OP_SUBTRACT_INT: {
+            JitOptRef right;
+            JitOptRef left;
             JitOptRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res_stackref = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res_stackref )) {
+                    ctx->done = true;
+                    break;
+                }
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                break;
+            }
             res = sym_new_compact_int(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-                assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyFloat_FromDouble(
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) *
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                double dres =
+                ((PyFloatObject *)left_o)->ob_fval *
+                ((PyFloatObject *)right_o)->ob_fval;
+                res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres);
+                if (PyStackRef_IsNull(res_stackref )) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
-            }
-            else {
-                res = sym_new_type(ctx, &PyFloat_Type);
-                stack_pointer += -1;
+                break;
             }
+            res = sym_new_type(ctx, &PyFloat_Type);
             if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
                 REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
-            stack_pointer[-1] = res;
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
             JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-                assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyFloat_FromDouble(
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) +
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                double dres =
+                ((PyFloatObject *)left_o)->ob_fval +
+                ((PyFloatObject *)right_o)->ob_fval;
+                res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres);
+                if (PyStackRef_IsNull(res_stackref )) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
-            }
-            else {
-                res = sym_new_type(ctx, &PyFloat_Type);
-                stack_pointer += -1;
+                break;
             }
+            res = sym_new_type(ctx, &PyFloat_Type);
             if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
                 REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
-            stack_pointer[-1] = res;
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
             JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-                assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyFloat_FromDouble(
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) -
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                double dres =
+                ((PyFloatObject *)left_o)->ob_fval -
+                ((PyFloatObject *)right_o)->ob_fval;
+                res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres);
+                if (PyStackRef_IsNull(res_stackref )) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
-            }
-            else {
-                res = sym_new_type(ctx, &PyFloat_Type);
-                stack_pointer += -1;
+                break;
             }
+            res = sym_new_type(ctx, &PyFloat_Type);
             if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
                 REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
-            stack_pointer[-1] = res;
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
             JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
-                assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyUnicode_CheckExact(left_o));
+                assert(PyUnicode_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                PyObject *res_o = PyUnicode_Concat(left_o, right_o);
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
+                if (res_o == NULL) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                res_stackref = PyStackRef_FromPyObjectSteal(res_o);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
-            }
-            else {
-                res = sym_new_type(ctx, &PyUnicode_Type);
-                stack_pointer += -1;
+                break;
             }
-            stack_pointer[-1] = res;
+            res = sym_new_type(ctx, &PyUnicode_Type);
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
             JitOptRef res;
             rhs = stack_pointer[-1];
             lhs = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, lhs) &&
+                sym_is_safe_const(ctx, rhs)
+            ) {
+                JitOptRef lhs_sym = lhs;
+                JitOptRef rhs_sym = rhs;
+                _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym);
+                _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs);
+                PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs);
+                assert(_PyEval_BinaryOps[oparg]);
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                PyObject *res_o = _PyEval_BinaryOps[oparg](lhs_o, rhs_o);
+                if (res_o == NULL) {
+                    JUMP_TO_LABEL(error);
+                }
+                res_stackref = PyStackRef_FromPyObjectSteal(res_o);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                break;
+            }
             bool lhs_int = sym_matches_type(lhs, &PyLong_Type);
             bool rhs_int = sym_matches_type(rhs, &PyLong_Type);
             bool lhs_float = sym_matches_type(lhs, &PyFloat_Type);
index c3d9e0e778bf55886dc950412d6c37689df660b0..e4dbca8362f4ce54668ef34d4e12217426278ffb 100644 (file)
@@ -185,6 +185,35 @@ _Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef ref)
     return NULL;
 }
 
+_PyStackRef
+_Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym)
+{
+    PyObject *const_val = _Py_uop_sym_get_const(ctx, sym);
+    if (const_val == NULL) {
+        return PyStackRef_NULL;
+    }
+    return PyStackRef_FromPyObjectBorrow(const_val);
+}
+
+/*
+ Indicates whether the constant is safe to constant evaluate
+ (without side effects).
+ */
+bool
+_Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym)
+{
+    PyObject *const_val = _Py_uop_sym_get_const(ctx, sym);
+    if (const_val == NULL) {
+        return false;
+    }
+    PyTypeObject *typ = Py_TYPE(const_val);
+    return (typ == &PyLong_Type) ||
+           (typ == &PyUnicode_Type) ||
+           (typ == &PyFloat_Type) ||
+           (typ == &PyTuple_Type) ||
+           (typ == &PyBool_Type);
+}
+
 void
 _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ)
 {
@@ -467,6 +496,16 @@ _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val)
     return ref;
 }
 
+JitOptRef
+_Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val)
+{
+    assert(const_val != NULL);
+    JitOptRef res = _Py_uop_sym_new_const(ctx, const_val);
+    // Decref once because sym_new_const increfs it.
+    Py_DECREF(const_val);
+    return res;
+}
+
 JitOptRef
 _Py_uop_sym_new_null(JitOptContext *ctx)
 {
index 3b4fe64b02a8077d9bb97ec36934d9d075523e4d..4556b6d5a74f37b523b5ba68b79b9b08689bbd1d 100644 (file)
@@ -12,6 +12,8 @@ from analyzer import (
     analyze_files,
     StackItem,
     analysis_error,
+    CodeSection,
+    Label,
 )
 from generators_common import (
     DEFAULT_INPUT,
@@ -19,6 +21,7 @@ from generators_common import (
     write_header,
     Emitter,
     TokenIterator,
+    always_true,
 )
 from cwriter import CWriter
 from typing import TextIO
@@ -75,6 +78,9 @@ def type_name(var: StackItem) -> str:
         return "JitOptRef *"
     return "JitOptRef "
 
+def stackref_type_name(var: StackItem) -> str:
+    assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef."
+    return "_PyStackRef "
 
 def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
     variables = {"unused"}
@@ -135,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None:
 
 class OptimizerEmitter(Emitter):
 
+    def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack):
+        super().__init__(out, labels)
+        self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure
+        self.original_uop = original_uop
+        self.stack = stack
+
     def emit_save(self, storage: Storage) -> None:
         storage.flush(self.out)
 
@@ -145,6 +157,185 @@ class OptimizerEmitter(Emitter):
         self.out.emit(goto)
         self.out.emit(label)
 
+    def replace_opcode_if_evaluates_pure(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        assert isinstance(uop, Uop)
+        input_identifiers = []
+        for token in tkn_iter:
+            if token.kind == "IDENTIFIER":
+                input_identifiers.append(token)
+            if token.kind == "SEMI":
+                break
+
+        if len(input_identifiers) == 0:
+            raise analysis_error(
+                "To evaluate an operation as pure, it must have at least 1 input",
+                tkn
+            )
+        # Check that the input identifiers belong to the uop's
+        # input stack effect
+        uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs}
+        for input_tkn in input_identifiers:
+            if input_tkn.text not in uop_stack_effect_input_identifers:
+                raise analysis_error(f"{input_tkn.text} referenced in "
+                                     f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not "
+                                     f"exist in the base uop's input stack effects",
+                                     input_tkn)
+        input_identifiers_as_str = {tkn.text for tkn in input_identifiers}
+        used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str]
+        assert len(used_stack_inputs) > 0
+        emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy())
+        emitter.emit("if (\n")
+        for inp in used_stack_inputs[:-1]:
+            emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n")
+        emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n")
+        emitter.emit(') {\n')
+        # Declare variables, before they are shadowed.
+        for inp in used_stack_inputs:
+            if inp.used:
+                emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n")
+        # Shadow the symbolic variables with stackrefs.
+        for inp in used_stack_inputs:
+            if inp.is_array():
+                raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn)
+            if inp.used:
+                emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n")
+        # Rename all output variables to stackref variant.
+        for outp in self.original_uop.stack.outputs:
+            if outp.is_array():
+                raise analysis_error(
+                    "Array output StackRefs not supported for evaluating pure ops.",
+                    self.original_uop.body.open
+                )
+            emitter.emit(f"_PyStackRef {outp.name}_stackref;\n")
+
+
+        storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False)
+        # No reference management of outputs needed.
+        for var in storage.outputs:
+            var.in_local = True
+        emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n")
+        emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False)
+        self.out.start_line()
+        emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n")
+        # Finally, assign back the output stackrefs to symbolics.
+        for outp in self.original_uop.stack.outputs:
+            # All new stackrefs are created from new references.
+            # That's how the stackref contract works.
+            if not outp.peek:
+                emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n")
+            else:
+                emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n")
+        storage.flush(self.out)
+        emitter.emit("break;\n")
+        emitter.emit("}\n")
+        return True
+
+class OptimizerConstantEmitter(OptimizerEmitter):
+    def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack):
+        super().__init__(out, labels, original_uop, stack)
+        # Replace all outputs to point to their stackref versions.
+        overrides = {
+            outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs
+        }
+        self._replacers = {**self._replacers, **overrides}
+
+    def emit_to_with_replacement(
+        self,
+        out: CWriter,
+        tkn_iter: TokenIterator,
+        end: str,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None
+    ) -> Token:
+        parens = 0
+        for tkn in tkn_iter:
+            if tkn.kind == end and parens == 0:
+                return tkn
+            if tkn.kind == "LPAREN":
+                parens += 1
+            if tkn.kind == "RPAREN":
+                parens -= 1
+            if tkn.text in self._replacers:
+                self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst)
+            else:
+                out.emit(tkn)
+        raise analysis_error(f"Expecting {end}. Reached end of file", tkn)
+
+    def emit_stackref_override(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        self.out.emit(tkn)
+        self.out.emit("_stackref ")
+        return True
+
+    def deopt_if(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        self.out.start_line()
+        self.out.emit("if (")
+        lparen = next(tkn_iter)
+        assert lparen.kind == "LPAREN"
+        first_tkn = tkn_iter.peek()
+        self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst)
+        self.emit(") {\n")
+        next(tkn_iter)  # Semi colon
+        # We guarantee this will deopt in real-world code
+        # via constants analysis. So just bail.
+        self.emit("ctx->done = true;\n")
+        self.emit("break;\n")
+        self.emit("}\n")
+        return not always_true(first_tkn)
+
+    exit_if = deopt_if
+
+    def error_if(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        lparen = next(tkn_iter)
+        assert lparen.kind == "LPAREN"
+        first_tkn = tkn_iter.peek()
+        unconditional = always_true(first_tkn)
+        if unconditional:
+            next(tkn_iter)
+            next(tkn_iter)  # RPAREN
+            self.out.start_line()
+        else:
+            self.out.emit_at("if ", tkn)
+            self.emit(lparen)
+            self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst)
+            self.out.emit(") {\n")
+        next(tkn_iter)  # Semi colon
+        storage.clear_inputs("at ERROR_IF")
+
+        self.out.emit("goto error;\n")
+        if not unconditional:
+            self.out.emit("}\n")
+        return not unconditional
+
+
 def write_uop(
     override: Uop | None,
     uop: Uop,
@@ -175,13 +366,14 @@ def write_uop(
                         cast = f"uint{cache.size*16}_t"
                     out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n")
         if override:
-            emitter = OptimizerEmitter(out, {})
+            emitter = OptimizerEmitter(out, {}, uop, stack.copy())
             # No reference management of inputs needed.
             for var in storage.inputs:  # type: ignore[possibly-undefined]
                 var.in_local = False
             _, storage = emitter.emit_tokens(override, storage, None, False)
             out.start_line()
             storage.flush(out)
+            out.start_line()
         else:
             emit_default(out, uop, stack)
             out.start_line()