]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-113710: Add a tier 2 peephole optimization pass. (GH-114487)
authorMark Shannon <mark@hotpy.org>
Wed, 24 Jan 2024 12:08:31 +0000 (12:08 +0000)
committerGitHub <noreply@github.com>
Wed, 24 Jan 2024 12:08:31 +0000 (12:08 +0000)
* Convert _LOAD_CONST to inline versions

* Remove PEP 523 checks

Include/internal/pycore_uop_ids.h
Include/internal/pycore_uop_metadata.h
Python/bytecodes.c
Python/executor_cases.c.h
Python/optimizer.c
Python/optimizer_analysis.c
Python/pystate.c

index 8ee90d79a13c2f850d0a1fa4c46f40e8a60129d3..a7056586ff04c0100785af667ceb0f8397f18453 100644 (file)
@@ -230,9 +230,10 @@ extern "C" {
 #define _JUMP_TO_TOP 377
 #define _SAVE_RETURN_OFFSET 378
 #define _CHECK_VALIDITY 379
-#define _LOAD_CONST_INLINE_BORROW 380
-#define _INTERNAL_INCREMENT_OPT_COUNTER 381
-#define MAX_UOP_ID 381
+#define _LOAD_CONST_INLINE 380
+#define _LOAD_CONST_INLINE_BORROW 381
+#define _INTERNAL_INCREMENT_OPT_COUNTER 382
+#define MAX_UOP_ID 382
 
 #ifdef __cplusplus
 }
index 9bfb4f4f3a4dea31ec5773c8dd94416849dd7e67..14d3382e895cdf4ed99ae3091d4d2428bc393532 100644 (file)
@@ -202,6 +202,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
     [_EXIT_TRACE] = HAS_DEOPT_FLAG,
     [_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
+    [_LOAD_CONST_INLINE] = 0,
     [_LOAD_CONST_INLINE_BORROW] = 0,
     [_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
 };
@@ -329,6 +330,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT",
     [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS",
     [_LOAD_CONST] = "_LOAD_CONST",
+    [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE",
     [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW",
     [_LOAD_DEREF] = "_LOAD_DEREF",
     [_LOAD_FAST] = "_LOAD_FAST",
index 7674ff81f64cec058411280922c53e496af4a4fd..18749ce60ecd45b9b5324e1d73ff7a6544c5d852 100644 (file)
@@ -4070,6 +4070,10 @@ dummy_func(
             DEOPT_IF(!current_executor->vm_data.valid);
         }
 
+        op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
+            value = Py_NewRef(ptr);
+        }
+
         op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
             value = ptr;
         }
index 2b4399b25bae2bc560e0101dd7d42953f2a5ae7a..241b9056207715d94d4376dc49b45c1e192f7d8d 100644 (file)
             break;
         }
 
+        case _LOAD_CONST_INLINE: {
+            PyObject *value;
+            PyObject *ptr = (PyObject *)CURRENT_OPERAND();
+            value = Py_NewRef(ptr);
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            break;
+        }
+
         case _LOAD_CONST_INLINE_BORROW: {
             PyObject *value;
             PyObject *ptr = (PyObject *)CURRENT_OPERAND();
index 1551a5ef61f892f88bd6bd5155fdb77a318d134b..4b6ed1781b5b7825f93c850a7bc8c99571f36dfc 100644 (file)
@@ -588,6 +588,9 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                         ADD_TO_TRACE(uop, oparg, operand, target);
                         if (uop == _POP_FRAME) {
                             TRACE_STACK_POP();
+                            /* Set the operand to the code object returned to,
+                             * to assist optimization passes */
+                            trace[trace_length-1].operand = (uintptr_t)code;
                             DPRINTF(2,
                                 "Returning to %s (%s:%d) at byte offset %d\n",
                                 PyUnicode_AsUTF8(code->co_qualname),
@@ -629,6 +632,9 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                                 instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
                                 TRACE_STACK_PUSH();
                                 _Py_BloomFilter_Add(dependencies, new_code);
+                                /* Set the operand to the callee's code object,
+                                * to assist optimization passes */
+                                trace[trace_length-1].operand = (uintptr_t)new_code;
                                 code = new_code;
                                 instr = _PyCode_CODE(code);
                                 DPRINTF(2,
index 7db51f0d90a45301654988bd8abea6cf86922931..d1225997e10be2b5b1450a3bfcbe5dad9b9fb0e1 100644 (file)
 #include <stddef.h>
 #include "pycore_optimizer.h"
 
+static void
+peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size)
+{
+    for (int pc = 0; pc < buffer_size; pc++) {
+        int opcode = buffer[pc].opcode;
+        switch(opcode) {
+            case _LOAD_CONST: {
+                assert(co != NULL);
+                PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg);
+                buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
+                buffer[pc].operand = (uintptr_t)val;
+                break;
+            }
+            case _CHECK_PEP_523:
+            {
+                /* Setting the eval frame function invalidates
+                 * all executors, so no need to check dynamically */
+                if (_PyInterpreterState_GET()->eval_frame == NULL) {
+                    buffer[pc].opcode = _NOP;
+                }
+                break;
+            }
+            case _PUSH_FRAME:
+            case _POP_FRAME:
+                co = (PyCodeObject *)buffer[pc].operand;
+                break;
+            case _JUMP_TO_TOP:
+            case _EXIT_TRACE:
+                return;
+        }
+    }
+}
+
 static void
 remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
 {
@@ -59,6 +92,7 @@ _Py_uop_analyze_and_optimize(
     int curr_stacklen
 )
 {
+    peephole_opt(co, buffer, buffer_size);
     remove_unneeded_uops(buffer, buffer_size);
     return 0;
 }
index 23ddc781434ac88a2c04bd76771537ddfb82b952..548c77b7dc7ebb87e213430bfd66c7a8aa3deb49 100644 (file)
@@ -2608,11 +2608,15 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp,
                                      _PyFrameEvalFunction eval_frame)
 {
     if (eval_frame == _PyEval_EvalFrameDefault) {
-        interp->eval_frame = NULL;
+        eval_frame = NULL;
     }
-    else {
-        interp->eval_frame = eval_frame;
+    if (eval_frame == interp->eval_frame) {
+        return;
+    }
+    if (eval_frame != NULL) {
+        _Py_Executors_InvalidateAll(interp);
     }
+    interp->eval_frame = eval_frame;
 }