]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-113710: Add a "globals to constants" pass (GH-114592)
authorMark Shannon <mark@hotpy.org>
Fri, 2 Feb 2024 12:14:34 +0000 (12:14 +0000)
committerGitHub <noreply@github.com>
Fri, 2 Feb 2024 12:14:34 +0000 (12:14 +0000)
Converts specializations of `LOAD_GLOBAL` into constants during tier 2 optimization.

16 files changed:
Include/cpython/dictobject.h
Include/cpython/optimizer.h
Include/internal/pycore_dict.h
Include/internal/pycore_dict_state.h
Include/internal/pycore_interp.h
Include/internal/pycore_optimizer.h
Include/internal/pycore_uop_ids.h
Include/internal/pycore_uop_metadata.h
Lib/test/test_capi/test_watchers.py
Modules/_testcapi/watchers.c
Objects/dictobject.c
Python/bytecodes.c
Python/executor_cases.c.h
Python/optimizer.c
Python/optimizer_analysis.c
Python/pylifecycle.c

index 944965fb9e5351aef1ee96080cea79d968b0ef3e..1720fe6f01ea37d11a09d28182bbfe521418bb07 100644 (file)
@@ -17,6 +17,9 @@ typedef struct {
     /* Dictionary version: globally unique, value change each time
        the dictionary is modified */
 #ifdef Py_BUILD_CORE
+    /* Bits 0-7 are for dict watchers.
+     * Bits 8-11 are for the watched mutation counter (used by tier2 optimization)
+     * The remaining bits (12-63) are the actual version tag. */
     uint64_t ma_version_tag;
 #else
     Py_DEPRECATED(3.12) uint64_t ma_version_tag;
index ecf3cae4cbc3f10aa28ae40cd8728ade99076744..5a9ccaea3b22098d994f17b2b4a42ac8ca019a28 100644 (file)
@@ -47,7 +47,10 @@ typedef struct _PyExecutorObject {
 typedef struct _PyOptimizerObject _PyOptimizerObject;
 
 /* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */
-typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject **, int curr_stackentries);
+typedef int (*optimize_func)(
+    _PyOptimizerObject* self, struct _PyInterpreterFrame *frame,
+    _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr,
+    int curr_stackentries);
 
 typedef struct _PyOptimizerObject {
     PyObject_HEAD
@@ -94,6 +97,9 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
 /* Minimum of 16 additional executions before retry */
 #define MINIMUM_TIER2_BACKOFF 4
 
+#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
+#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
+
 #ifdef __cplusplus
 }
 #endif
index 60acd89cf6c34a68a48ac7cc87bd2452d48f58cc..233da058f464d177ec2f7881e9dde7bda016b00e 100644 (file)
@@ -207,8 +207,8 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) {
 
 #define DK_IS_UNICODE(dk) ((dk)->dk_kind != DICT_KEYS_GENERAL)
 
-#define DICT_VERSION_INCREMENT (1 << DICT_MAX_WATCHERS)
-#define DICT_VERSION_MASK (DICT_VERSION_INCREMENT - 1)
+#define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS))
+#define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1)
 
 #ifdef Py_GIL_DISABLED
 #define DICT_NEXT_VERSION(INTERP) \
@@ -234,7 +234,7 @@ _PyDict_NotifyEvent(PyInterpreterState *interp,
                     PyObject *value)
 {
     assert(Py_REFCNT((PyObject*)mp) > 0);
-    int watcher_bits = mp->ma_version_tag & DICT_VERSION_MASK;
+    int watcher_bits = mp->ma_version_tag & DICT_WATCHER_MASK;
     if (watcher_bits) {
         _PyDict_SendEvent(watcher_bits, event, mp, key, value);
         return DICT_NEXT_VERSION(interp) | watcher_bits;
index a6dd63d36e040e64a12b6b9e865826b380f4e91a..1a44755c7a01a3a34f908220ef165173ecf18570 100644 (file)
@@ -9,6 +9,7 @@ extern "C" {
 #endif
 
 #define DICT_MAX_WATCHERS 8
+#define DICT_WATCHED_MUTATION_BITS 4
 
 struct _Py_dict_state {
     /*Global counter used to set ma_version_tag field of dictionary.
index c4732b1534199b40a169f5205040ba3751b011c6..f7c332ed747cfac6375b3257292f6a38dff8ba83 100644 (file)
@@ -72,7 +72,6 @@ typedef struct _rare_events {
     uint8_t set_eval_frame_func;
     /* Modifying the builtins,  __builtins__.__dict__[var] = ... */
     uint8_t builtin_dict;
-    int builtins_dict_watcher_id;
     /* Modifying a function, e.g. func.__defaults__ = ..., etc. */
     uint8_t func_modification;
 } _rare_events;
@@ -243,6 +242,7 @@ struct _is {
     uint16_t optimizer_backedge_threshold;
     uint32_t next_func_version;
     _rare_events rare_events;
+    PyDict_WatchCallback builtins_dict_watcher;
 
     _Py_GlobalMonitors monitors;
     bool sys_profile_initialized;
index 31f30c673f207a1c5ba788d55e57a4dbe55a9c48..e21412fc815540db7bf4b729ad530f6fd963b1ea 100644 (file)
@@ -8,8 +8,9 @@ extern "C" {
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
-int _Py_uop_analyze_and_optimize(PyCodeObject *code,
-    _PyUOpInstruction *trace, int trace_len, int curr_stackentries);
+int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
+    _PyUOpInstruction *trace, int trace_len, int curr_stackentries,
+    _PyBloomFilter *dependencies);
 
 extern PyTypeObject _PyCounterExecutor_Type;
 extern PyTypeObject _PyCounterOptimizer_Type;
index a7056586ff04c0100785af667ceb0f8397f18453..b2476e1c6e5c4b56a0614351d4203db800410bec 100644 (file)
@@ -232,8 +232,12 @@ extern "C" {
 #define _CHECK_VALIDITY 379
 #define _LOAD_CONST_INLINE 380
 #define _LOAD_CONST_INLINE_BORROW 381
-#define _INTERNAL_INCREMENT_OPT_COUNTER 382
-#define MAX_UOP_ID 382
+#define _LOAD_CONST_INLINE_WITH_NULL 382
+#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 383
+#define _CHECK_GLOBALS 384
+#define _CHECK_BUILTINS 385
+#define _INTERNAL_INCREMENT_OPT_COUNTER 386
+#define MAX_UOP_ID 386
 
 #ifdef __cplusplus
 }
index 14d3382e895cdf4ed99ae3091d4d2428bc393532..2b5b37e6b8d6a43dad44f1897852dca411566ace 100644 (file)
@@ -204,6 +204,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
     [_LOAD_CONST_INLINE] = 0,
     [_LOAD_CONST_INLINE_BORROW] = 0,
+    [_LOAD_CONST_INLINE_WITH_NULL] = 0,
+    [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0,
+    [_CHECK_GLOBALS] = HAS_DEOPT_FLAG,
+    [_CHECK_BUILTINS] = HAS_DEOPT_FLAG,
     [_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
 };
 
@@ -250,10 +254,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_CHECK_ATTR_METHOD_LAZY_DICT] = "_CHECK_ATTR_METHOD_LAZY_DICT",
     [_CHECK_ATTR_MODULE] = "_CHECK_ATTR_MODULE",
     [_CHECK_ATTR_WITH_HINT] = "_CHECK_ATTR_WITH_HINT",
+    [_CHECK_BUILTINS] = "_CHECK_BUILTINS",
     [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = "_CHECK_CALL_BOUND_METHOD_EXACT_ARGS",
     [_CHECK_EG_MATCH] = "_CHECK_EG_MATCH",
     [_CHECK_EXC_MATCH] = "_CHECK_EXC_MATCH",
     [_CHECK_FUNCTION_EXACT_ARGS] = "_CHECK_FUNCTION_EXACT_ARGS",
+    [_CHECK_GLOBALS] = "_CHECK_GLOBALS",
     [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES",
     [_CHECK_PEP_523] = "_CHECK_PEP_523",
     [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",
@@ -332,6 +338,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_LOAD_CONST] = "_LOAD_CONST",
     [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE",
     [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW",
+    [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = "_LOAD_CONST_INLINE_BORROW_WITH_NULL",
+    [_LOAD_CONST_INLINE_WITH_NULL] = "_LOAD_CONST_INLINE_WITH_NULL",
     [_LOAD_DEREF] = "_LOAD_DEREF",
     [_LOAD_FAST] = "_LOAD_FAST",
     [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR",
index 5981712c80c3a96f8ab5246872dd2958aeb0e2b2..ae062b1bda26b7cfb7754f52bfd182de68b24d7f 100644 (file)
@@ -151,8 +151,8 @@ class TestDictWatchers(unittest.TestCase):
 
     def test_watch_unassigned_watcher_id(self):
         d = {}
-        with self.assertRaisesRegex(ValueError, r"No dict watcher set for ID 1"):
-            self.watch(1, d)
+        with self.assertRaisesRegex(ValueError, r"No dict watcher set for ID 3"):
+            self.watch(3, d)
 
     def test_unwatch_non_dict(self):
         with self.watcher() as wid:
@@ -168,8 +168,8 @@ class TestDictWatchers(unittest.TestCase):
 
     def test_unwatch_unassigned_watcher_id(self):
         d = {}
-        with self.assertRaisesRegex(ValueError, r"No dict watcher set for ID 1"):
-            self.unwatch(1, d)
+        with self.assertRaisesRegex(ValueError, r"No dict watcher set for ID 3"):
+            self.unwatch(3, d)
 
     def test_clear_out_of_range_watcher_id(self):
         with self.assertRaisesRegex(ValueError, r"Invalid dict watcher ID -1"):
@@ -178,8 +178,8 @@ class TestDictWatchers(unittest.TestCase):
             self.clear_watcher(8)  # DICT_MAX_WATCHERS = 8
 
     def test_clear_unassigned_watcher_id(self):
-        with self.assertRaisesRegex(ValueError, r"No dict watcher set for ID 1"):
-            self.clear_watcher(1)
+        with self.assertRaisesRegex(ValueError, r"No dict watcher set for ID 3"):
+            self.clear_watcher(3)
 
 
 class TestTypeWatchers(unittest.TestCase):
index a763ff46a3c2901f5ff912fb589078251b5b71aa..1eb0db2c2e65761e6e54e6148274caa9694657d7 100644 (file)
@@ -15,8 +15,8 @@ module _testcapi
 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6361033e795369fc]*/
 
 // Test dict watching
-static PyObject *g_dict_watch_events;
-static int g_dict_watchers_installed;
+static PyObject *g_dict_watch_events = NULL;
+static int g_dict_watchers_installed = 0;
 
 static int
 dict_watch_callback(PyDict_WatchEvent event,
index e24887b7d781bb516a3f9ae452f8dbbe9c0ff224..4bb818b90a4a72d7df968a8649816ae68d468fc5 100644 (file)
@@ -5943,7 +5943,8 @@ PyDict_AddWatcher(PyDict_WatchCallback callback)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
 
-    for (int i = 0; i < DICT_MAX_WATCHERS; i++) {
+    /* Start at 2, as 0 and 1 are reserved for CPython */
+    for (int i = 2; i < DICT_MAX_WATCHERS; i++) {
         if (!interp->dict_state.watchers[i]) {
             interp->dict_state.watchers[i] = callback;
             return i;
index ebd5b06abb2d4e4afacceb74ceb33353df024c15..6fb4d719e43991cea3fcde50a0760f65f352e49e 100644 (file)
@@ -4071,11 +4071,35 @@ dummy_func(
         }
 
         op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
+            TIER_TWO_ONLY
             value = Py_NewRef(ptr);
         }
 
         op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
+            TIER_TWO_ONLY
+            value = ptr;
+        }
+
+        op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) {
+            TIER_TWO_ONLY
+            value = Py_NewRef(ptr);
+            null = NULL;
+        }
+
+        op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) {
+            TIER_TWO_ONLY
             value = ptr;
+            null = NULL;
+        }
+
+        op(_CHECK_GLOBALS, (dict/4 -- )) {
+            TIER_TWO_ONLY
+            DEOPT_IF(GLOBALS() != dict);
+        }
+
+        op(_CHECK_BUILTINS, (dict/4 -- )) {
+            TIER_TWO_ONLY
+            DEOPT_IF(BUILTINS() != dict);
         }
 
         /* Internal -- for testing executors */
index 241b9056207715d94d4376dc49b45c1e192f7d8d..2d914b82dbf88f4c5b45aef819c2d5e96b3e426b 100644 (file)
         case _LOAD_CONST_INLINE: {
             PyObject *value;
             PyObject *ptr = (PyObject *)CURRENT_OPERAND();
+            TIER_TWO_ONLY
             value = Py_NewRef(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
         case _LOAD_CONST_INLINE_BORROW: {
             PyObject *value;
             PyObject *ptr = (PyObject *)CURRENT_OPERAND();
+            TIER_TWO_ONLY
             value = ptr;
             stack_pointer[0] = value;
             stack_pointer += 1;
             break;
         }
 
+        case _LOAD_CONST_INLINE_WITH_NULL: {
+            PyObject *value;
+            PyObject *null;
+            PyObject *ptr = (PyObject *)CURRENT_OPERAND();
+            TIER_TWO_ONLY
+            value = Py_NewRef(ptr);
+            null = NULL;
+            stack_pointer[0] = value;
+            stack_pointer[1] = null;
+            stack_pointer += 2;
+            break;
+        }
+
+        case _LOAD_CONST_INLINE_BORROW_WITH_NULL: {
+            PyObject *value;
+            PyObject *null;
+            PyObject *ptr = (PyObject *)CURRENT_OPERAND();
+            TIER_TWO_ONLY
+            value = ptr;
+            null = NULL;
+            stack_pointer[0] = value;
+            stack_pointer[1] = null;
+            stack_pointer += 2;
+            break;
+        }
+
+        case _CHECK_GLOBALS: {
+            PyObject *dict = (PyObject *)CURRENT_OPERAND();
+            TIER_TWO_ONLY
+            if (GLOBALS() != dict) goto deoptimize;
+            break;
+        }
+
+        case _CHECK_BUILTINS: {
+            PyObject *dict = (PyObject *)CURRENT_OPERAND();
+            TIER_TWO_ONLY
+            if (BUILTINS() != dict) goto deoptimize;
+            break;
+        }
+
         case _INTERNAL_INCREMENT_OPT_COUNTER: {
             PyObject *opt;
             opt = stack_pointer[-1];
index 0d04b09fef1e8463f3127a71b8b3b36826c76288..d71ca0aef0e11ac39d7e5e3e7cf9988001e9b2f4 100644 (file)
@@ -108,16 +108,14 @@ PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutor
 }
 
 static int
-error_optimize(
+never_optimize(
     _PyOptimizerObject* self,
-    PyCodeObject *code,
+    _PyInterpreterFrame *frame,
     _Py_CODEUNIT *instr,
     _PyExecutorObject **exec,
     int Py_UNUSED(stack_entries))
 {
-    assert(0);
-    PyErr_Format(PyExc_SystemError, "Should never call error_optimize");
-    return -1;
+    return 0;
 }
 
 PyTypeObject _PyDefaultOptimizer_Type = {
@@ -130,7 +128,7 @@ PyTypeObject _PyDefaultOptimizer_Type = {
 
 _PyOptimizerObject _PyOptimizer_Default = {
     PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
-    .optimize = error_optimize,
+    .optimize = never_optimize,
     .resume_threshold = INT16_MAX,
     .backedge_threshold = INT16_MAX,
 };
@@ -174,7 +172,7 @@ _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject
     }
     _PyOptimizerObject *opt = interp->optimizer;
     _PyExecutorObject *executor = NULL;
-    int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
+    int err = opt->optimize(opt, frame, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
     if (err <= 0) {
         assert(executor == NULL);
         return err;
@@ -363,7 +361,8 @@ BRANCH_TO_GUARD[4][2] = {
         ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
         goto done; \
     } \
-    trace_stack[trace_stack_depth].code = code; \
+    assert(func->func_code == (PyObject *)code); \
+    trace_stack[trace_stack_depth].func = func; \
     trace_stack[trace_stack_depth].instr = instr; \
     trace_stack_depth++;
 #define TRACE_STACK_POP() \
@@ -371,7 +370,8 @@ BRANCH_TO_GUARD[4][2] = {
         Py_FatalError("Trace stack underflow\n"); \
     } \
     trace_stack_depth--; \
-    code = trace_stack[trace_stack_depth].code; \
+    func = trace_stack[trace_stack_depth].func; \
+    code = (PyCodeObject *)trace_stack[trace_stack_depth].func->func_code; \
     instr = trace_stack[trace_stack_depth].instr;
 
 /* Returns 1 on success,
@@ -380,20 +380,23 @@ BRANCH_TO_GUARD[4][2] = {
  */
 static int
 translate_bytecode_to_trace(
-    PyCodeObject *code,
+    _PyInterpreterFrame *frame,
     _Py_CODEUNIT *instr,
     _PyUOpInstruction *trace,
     int buffer_size,
     _PyBloomFilter *dependencies)
 {
     bool progress_needed = true;
+    PyCodeObject *code = (PyCodeObject *)frame->f_executable;
+    PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
+    assert(PyFunction_Check(func));
     PyCodeObject *initial_code = code;
     _Py_BloomFilter_Add(dependencies, initial_code);
     _Py_CODEUNIT *initial_instr = instr;
     int trace_length = 0;
     int max_length = buffer_size;
     struct {
-        PyCodeObject *code;
+        PyFunctionObject *func;
         _Py_CODEUNIT *instr;
     } trace_stack[TRACE_STACK_SIZE];
     int trace_stack_depth = 0;
@@ -593,9 +596,9 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                         ADD_TO_TRACE(uop, oparg, operand, target);
                         if (uop == _POP_FRAME) {
                             TRACE_STACK_POP();
-                            /* Set the operand to the code object returned to,
+                            /* Set the operand to the function object returned to,
                              * to assist optimization passes */
-                            trace[trace_length-1].operand = (uintptr_t)code;
+                            trace[trace_length-1].operand = (uintptr_t)func;
                             DPRINTF(2,
                                 "Returning to %s (%s:%d) at byte offset %d\n",
                                 PyUnicode_AsUTF8(code->co_qualname),
@@ -611,10 +614,10 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                                 // Add one to account for the actual opcode/oparg pair:
                                 + 1;
                             uint32_t func_version = read_u32(&instr[func_version_offset].cache);
-                            PyFunctionObject *func = _PyFunction_LookupByVersion(func_version);
+                            PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version);
                             DPRINTF(3, "Function object: %p\n", func);
-                            if (func != NULL) {
-                                PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func);
+                            if (new_func != NULL) {
+                                PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(new_func);
                                 if (new_code == code) {
                                     // Recursive call, bail (we could be here forever).
                                     DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
@@ -639,8 +642,9 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                                 _Py_BloomFilter_Add(dependencies, new_code);
                                 /* Set the operand to the callee's code object,
                                 * to assist optimization passes */
-                                trace[trace_length-1].operand = (uintptr_t)new_code;
+                                trace[trace_length-1].operand = (uintptr_t)new_func;
                                 code = new_code;
+                                func = new_func;
                                 instr = _PyCode_CODE(code);
                                 DPRINTF(2,
                                     "Continuing in %s (%s:%d) at byte offset %d\n",
@@ -808,7 +812,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
 static int
 uop_optimize(
     _PyOptimizerObject *self,
-    PyCodeObject *code,
+    _PyInterpreterFrame *frame,
     _Py_CODEUNIT *instr,
     _PyExecutorObject **exec_ptr,
     int curr_stackentries)
@@ -816,7 +820,7 @@ uop_optimize(
     _PyBloomFilter dependencies;
     _Py_BloomFilter_Init(&dependencies);
     _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
-    int err = translate_bytecode_to_trace(code, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
+    int err = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
     if (err <= 0) {
         // Error or nothing translated
         return err;
@@ -824,9 +828,10 @@ uop_optimize(
     OPT_STAT_INC(traces_created);
     char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
     if (uop_optimize == NULL || *uop_optimize > '0') {
-        err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries);
-        if (err < 0) {
-            return -1;
+        err = _Py_uop_analyze_and_optimize(frame, buffer,
+                                           UOP_MAX_TRACE_LENGTH, curr_stackentries, &dependencies);
+        if (err <= 0) {
+            return err;
         }
     }
     _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies);
@@ -887,12 +892,13 @@ PyTypeObject _PyCounterExecutor_Type = {
 static int
 counter_optimize(
     _PyOptimizerObject* self,
-    PyCodeObject *code,
+    _PyInterpreterFrame *frame,
     _Py_CODEUNIT *instr,
     _PyExecutorObject **exec_ptr,
     int Py_UNUSED(curr_stackentries)
 )
 {
+    PyCodeObject *code = (PyCodeObject *)frame->f_executable;
     int oparg = instr->op.arg;
     while (instr->op.code == EXTENDED_ARG) {
         instr++;
index d1225997e10be2b5b1450a3bfcbe5dad9b9fb0e1..2cfbf4b349d0f5240f8b4c495f4f5a89c8c3ffab 100644 (file)
@@ -1,10 +1,12 @@
 #include "Python.h"
 #include "opcode.h"
+#include "pycore_dict.h"
 #include "pycore_interp.h"
 #include "pycore_opcode_metadata.h"
 #include "pycore_opcode_utils.h"
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_uop_metadata.h"
+#include "pycore_dict.h"
 #include "pycore_long.h"
 #include "cpython/optimizer.h"
 #include <stdbool.h>
 #include <stddef.h>
 #include "pycore_optimizer.h"
 
+static int
+get_mutations(PyObject* dict) {
+    assert(PyDict_CheckExact(dict));
+    PyDictObject *d = (PyDictObject *)dict;
+    return (d->ma_version_tag >> DICT_MAX_WATCHERS) & ((1 << DICT_WATCHED_MUTATION_BITS)-1);
+}
+
 static void
-peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size)
+increment_mutations(PyObject* dict) {
+    assert(PyDict_CheckExact(dict));
+    PyDictObject *d = (PyDictObject *)dict;
+    d->ma_version_tag += (1 << DICT_MAX_WATCHERS);
+}
+
+static int
+globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
+                         PyObject* key, PyObject* new_value)
+{
+    if (event == PyDict_EVENT_CLONED) {
+        return 0;
+    }
+    uint64_t watched_mutations = get_mutations(dict);
+    if (watched_mutations < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) {
+        _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict);
+        increment_mutations(dict);
+    }
+    else {
+        PyDict_Unwatch(1, dict);
+    }
+    return 0;
+}
+
+
+static void
+global_to_const(_PyUOpInstruction *inst, PyObject *obj)
+{
+    assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS);
+    assert(PyDict_CheckExact(obj));
+    PyDictObject *dict = (PyDictObject *)obj;
+    assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
+    PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys);
+    assert(inst->operand <= UINT16_MAX);
+    PyObject *res = entries[inst->operand].me_value;
+    if (res == NULL) {
+        return;
+    }
+    if (_Py_IsImmortal(res)) {
+        inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_BORROW_WITH_NULL : _LOAD_CONST_INLINE_BORROW;
+    }
+    else {
+        inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE;
+    }
+    inst->operand = (uint64_t)res;
+}
+
+static int
+incorrect_keys(_PyUOpInstruction *inst, PyObject *obj)
 {
+    if (!PyDict_CheckExact(obj)) {
+        return 1;
+    }
+    PyDictObject *dict = (PyDictObject *)obj;
+    if (dict->ma_keys->dk_version != inst->operand) {
+        return 1;
+    }
+    return 0;
+}
+
+/* The first two dict watcher IDs are reserved for CPython,
+ * so we don't need to check that they haven't been used */
+#define BUILTINS_WATCHER_ID 0
+#define GLOBALS_WATCHER_ID  1
+
+/* Returns 1 if successfully optimized
+ *         0 if the trace is not suitable for optimization (yet)
+ *        -1 if there was an error. */
+static int
+remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
+               int buffer_size, _PyBloomFilter *dependencies)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    PyObject *builtins = frame->f_builtins;
+    if (builtins != interp->builtins) {
+        return 1;
+    }
+    PyObject *globals = frame->f_globals;
+    assert(PyFunction_Check(((PyFunctionObject *)frame->f_funcobj)));
+    assert(((PyFunctionObject *)frame->f_funcobj)->func_builtins == builtins);
+    assert(((PyFunctionObject *)frame->f_funcobj)->func_globals == globals);
+    /* In order to treat globals as constants, we need to
+     * know that the globals dict is the one we expected, and
+     * that it hasn't changed
+     * In order to treat builtins as constants,  we need to
+     * know that the builtins dict is the one we expected, and
+     * that it hasn't changed and that the global dictionary's
+     * keys have not changed */
+
+    /* These values represent stacks of booleans (one bool per bit).
+     * Pushing a frame shifts left, popping a frame shifts right. */
+    uint32_t builtins_checked = 0;
+    uint32_t builtins_watched = 0;
+    uint32_t globals_checked = 0;
+    uint32_t globals_watched = 0;
+    if (interp->dict_state.watchers[1] == NULL) {
+        interp->dict_state.watchers[1] = globals_watcher_callback;
+    }
+    for (int pc = 0; pc < buffer_size; pc++) {
+        _PyUOpInstruction *inst = &buffer[pc];
+        int opcode = inst->opcode;
+        switch(opcode) {
+            case _GUARD_BUILTINS_VERSION:
+                if (incorrect_keys(inst, builtins)) {
+                    return 0;
+                }
+                if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) {
+                    continue;
+                }
+                if ((builtins_watched & 1) == 0) {
+                    PyDict_Watch(BUILTINS_WATCHER_ID, builtins);
+                    builtins_watched |= 1;
+                }
+                if (builtins_checked & 1) {
+                    buffer[pc].opcode = NOP;
+                }
+                else {
+                    buffer[pc].opcode = _CHECK_BUILTINS;
+                    buffer[pc].operand = (uintptr_t)builtins;
+                    builtins_checked |= 1;
+                }
+                break;
+            case _GUARD_GLOBALS_VERSION:
+                if (incorrect_keys(inst, globals)) {
+                    return 0;
+                }
+                uint64_t watched_mutations = get_mutations(globals);
+                if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) {
+                    continue;
+                }
+                if ((globals_watched & 1) == 0) {
+                    PyDict_Watch(GLOBALS_WATCHER_ID, globals);
+                    _Py_BloomFilter_Add(dependencies, globals);
+                    globals_watched |= 1;
+                }
+                if (globals_checked & 1) {
+                    buffer[pc].opcode = NOP;
+                }
+                else {
+                    buffer[pc].opcode = _CHECK_GLOBALS;
+                    buffer[pc].operand = (uintptr_t)globals;
+                    globals_checked |= 1;
+                }
+                break;
+            case _LOAD_GLOBAL_BUILTINS:
+                if (globals_checked & builtins_checked & globals_watched & builtins_watched & 1) {
+                    global_to_const(inst, builtins);
+                }
+                break;
+            case _LOAD_GLOBAL_MODULE:
+                if (globals_checked & globals_watched & 1) {
+                    global_to_const(inst, globals);
+                }
+                break;
+            case _PUSH_FRAME:
+            {
+                globals_checked <<= 1;
+                globals_watched <<= 1;
+                builtins_checked <<= 1;
+                builtins_watched <<= 1;
+                PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
+                if (func == NULL) {
+                    return 1;
+                }
+                assert(PyFunction_Check(func));
+                globals = func->func_globals;
+                builtins = func->func_builtins;
+                if (builtins != interp->builtins) {
+                    return 1;
+                }
+                break;
+            }
+            case _POP_FRAME:
+            {
+                globals_checked >>= 1;
+                globals_watched >>= 1;
+                builtins_checked >>= 1;
+                builtins_watched >>= 1;
+                PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
+                assert(PyFunction_Check(func));
+                globals = func->func_globals;
+                builtins = func->func_builtins;
+                break;
+            }
+            case _JUMP_TO_TOP:
+            case _EXIT_TRACE:
+                return 1;
+        }
+    }
+    return 0;
+}
+
+static void
+peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
+{
+    PyCodeObject *co = (PyCodeObject *)frame->f_executable;
     for (int pc = 0; pc < buffer_size; pc++) {
         int opcode = buffer[pc].opcode;
         switch(opcode) {
@@ -36,8 +239,17 @@ peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size)
             }
             case _PUSH_FRAME:
             case _POP_FRAME:
-                co = (PyCodeObject *)buffer[pc].operand;
+            {
+                PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
+                if (func == NULL) {
+                    co = NULL;
+                }
+                else {
+                    assert(PyFunction_Check(func));
+                    co = (PyCodeObject *)func->func_code;
+                }
                 break;
+            }
             case _JUMP_TO_TOP:
             case _EXIT_TRACE:
                 return;
@@ -83,16 +295,20 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
     }
 }
 
-
 int
 _Py_uop_analyze_and_optimize(
-    PyCodeObject *co,
+    _PyInterpreterFrame *frame,
     _PyUOpInstruction *buffer,
     int buffer_size,
-    int curr_stacklen
+    int curr_stacklen,
+    _PyBloomFilter *dependencies
 )
 {
-    peephole_opt(co, buffer, buffer_size);
+    int err = remove_globals(frame, buffer, buffer_size, dependencies);
+    if (err <= 0) {
+        return err;
+    }
+    peephole_opt(frame, buffer, buffer_size);
     remove_unneeded_uops(buffer, buffer_size);
-    return 0;
+    return 1;
 }
index 372f60602375b658127da281f6300de2f4ca795d..0cac7109340129c177c31f3f383fbc5ba2f6ae0b 100644 (file)
@@ -32,6 +32,7 @@
 #include "pycore_typevarobject.h" // _Py_clear_generic_types()
 #include "pycore_unicodeobject.h" // _PyUnicode_InitTypes()
 #include "pycore_weakref.h"       // _PyWeakref_GET_REF()
+#include "cpython/optimizer.h"    // _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS
 #include "pycore_obmalloc.h"      // _PyMem_init_obmalloc()
 
 #include "opcode.h"
@@ -609,7 +610,11 @@ init_interp_create_gil(PyThreadState *tstate, int gil)
 static int
 builtins_dict_watcher(PyDict_WatchEvent event, PyObject *dict, PyObject *key, PyObject *new_value)
 {
-    RARE_EVENT_INC(builtin_dict);
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    if (event != PyDict_EVENT_CLONED && interp->rare_events.builtin_dict < _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) {
+        _Py_Executors_InvalidateAll(interp);
+    }
+    RARE_EVENT_INTERP_INC(interp, builtin_dict);
     return 0;
 }
 
@@ -1287,11 +1292,9 @@ init_interp_main(PyThreadState *tstate)
         }
     }
 
-    if ((interp->rare_events.builtins_dict_watcher_id = PyDict_AddWatcher(&builtins_dict_watcher)) == -1) {
-        return _PyStatus_ERR("failed to add builtin dict watcher");
-    }
 
-    if (PyDict_Watch(interp->rare_events.builtins_dict_watcher_id, interp->builtins) != 0) {
+    interp->dict_state.watchers[0] = &builtins_dict_watcher;
+    if (PyDict_Watch(0, interp->builtins) != 0) {
         return _PyStatus_ERR("failed to set builtin dict watcher");
     }
 
@@ -1622,8 +1625,13 @@ finalize_modules(PyThreadState *tstate)
 {
     PyInterpreterState *interp = tstate->interp;
 
-    // Stop collecting stats on __builtin__ modifications during teardown
-    PyDict_Unwatch(interp->rare_events.builtins_dict_watcher_id, interp->builtins);
+    // Invalidate all executors and turn off tier 2 optimizer
+    _Py_Executors_InvalidateAll(interp);
+    Py_XDECREF(interp->optimizer);
+    interp->optimizer = &_PyOptimizer_Default;
+
+    // Stop watching __builtin__ modifications
+    PyDict_Unwatch(0, interp->builtins);
 
     PyObject *modules = _PyImport_GetModules(interp);
     if (modules == NULL) {