]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-106603: Make uop struct a triple (opcode, oparg, operand) (#106794)
authorGuido van Rossum <guido@python.org>
Mon, 17 Jul 2023 19:12:33 +0000 (12:12 -0700)
committerGitHub <noreply@github.com>
Mon, 17 Jul 2023 19:12:33 +0000 (12:12 -0700)
Include/internal/pycore_opcode_metadata.h
Include/internal/pycore_uops.h
Lib/test/test_capi/test_misc.py
Python/bytecodes.c
Python/ceval.c
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/optimizer.c
Tools/cases_generator/generate_cases.py

index 028736e115b3f49463e0c7d695dd2b0a006ea767..c3a0dbb478a7c1710ee878c8c33206f64dd7a8e7 100644 (file)
 #define _SKIP_CACHE 314
 #define _GUARD_GLOBALS_VERSION 315
 #define _GUARD_BUILTINS_VERSION 316
-#define _GUARD_TYPE_VERSION 317
-#define _CHECK_MANAGED_OBJECT_HAS_VALUES 318
-#define IS_NONE 319
-#define _ITER_CHECK_LIST 320
-#define _IS_ITER_EXHAUSTED_LIST 321
-#define _ITER_NEXT_LIST 322
-#define _ITER_CHECK_TUPLE 323
-#define _IS_ITER_EXHAUSTED_TUPLE 324
-#define _ITER_NEXT_TUPLE 325
-#define _ITER_CHECK_RANGE 326
-#define _IS_ITER_EXHAUSTED_RANGE 327
-#define _ITER_NEXT_RANGE 328
-#define _POP_JUMP_IF_FALSE 329
-#define _POP_JUMP_IF_TRUE 330
-#define JUMP_TO_TOP 331
+#define _LOAD_GLOBAL_MODULE 317
+#define _LOAD_GLOBAL_BUILTINS 318
+#define _GUARD_TYPE_VERSION 319
+#define _CHECK_MANAGED_OBJECT_HAS_VALUES 320
+#define _LOAD_ATTR_INSTANCE_VALUE 321
+#define IS_NONE 322
+#define _ITER_CHECK_LIST 323
+#define _IS_ITER_EXHAUSTED_LIST 324
+#define _ITER_NEXT_LIST 325
+#define _ITER_CHECK_TUPLE 326
+#define _IS_ITER_EXHAUSTED_TUPLE 327
+#define _ITER_NEXT_TUPLE 328
+#define _ITER_CHECK_RANGE 329
+#define _IS_ITER_EXHAUSTED_RANGE 330
+#define _ITER_NEXT_RANGE 331
+#define _POP_JUMP_IF_FALSE 332
+#define _POP_JUMP_IF_TRUE 333
+#define JUMP_TO_TOP 334
 
 #ifndef NEED_OPCODE_METADATA
 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
@@ -1245,7 +1248,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
     [BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_DICT, 0, 0 } } },
     [LIST_APPEND] = { .nuops = 1, .uops = { { LIST_APPEND, 0, 0 } } },
     [SET_ADD] = { .nuops = 1, .uops = { { SET_ADD, 0, 0 } } },
-    [STORE_SUBSCR] = { .nuops = 1, .uops = { { STORE_SUBSCR, 1, 0 } } },
+    [STORE_SUBSCR] = { .nuops = 1, .uops = { { STORE_SUBSCR, 0, 0 } } },
     [STORE_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { STORE_SUBSCR_LIST_INT, 0, 0 } } },
     [STORE_SUBSCR_DICT] = { .nuops = 1, .uops = { { STORE_SUBSCR_DICT, 0, 0 } } },
     [DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } },
@@ -1264,6 +1267,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
     [UNPACK_SEQUENCE_TUPLE] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_TUPLE, 0, 0 } } },
     [UNPACK_SEQUENCE_LIST] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_LIST, 0, 0 } } },
     [UNPACK_EX] = { .nuops = 1, .uops = { { UNPACK_EX, 0, 0 } } },
+    [STORE_ATTR] = { .nuops = 1, .uops = { { STORE_ATTR, 0, 0 } } },
     [DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } },
     [STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } },
     [DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } },
@@ -1271,6 +1275,8 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
     [LOAD_NAME] = { .nuops = 2, .uops = { { _LOAD_LOCALS, 0, 0 }, { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
     [LOAD_FROM_DICT_OR_GLOBALS] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
     [LOAD_GLOBAL] = { .nuops = 1, .uops = { { LOAD_GLOBAL, 0, 0 } } },
+    [LOAD_GLOBAL_MODULE] = { .nuops = 4, .uops = { { _SKIP_CACHE, 0, 0 }, { _GUARD_GLOBALS_VERSION, 1, 1 }, { _SKIP_CACHE, 0, 0 }, { _LOAD_GLOBAL_MODULE, 1, 3 } } },
+    [LOAD_GLOBAL_BUILTIN] = { .nuops = 4, .uops = { { _SKIP_CACHE, 0, 0 }, { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION, 1, 2 }, { _LOAD_GLOBAL_BUILTINS, 1, 3 } } },
     [DELETE_FAST] = { .nuops = 1, .uops = { { DELETE_FAST, 0, 0 } } },
     [DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } },
     [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } },
@@ -1292,6 +1298,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
     [LOAD_SUPER_ATTR_ATTR] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_ATTR, 0, 0 } } },
     [LOAD_SUPER_ATTR_METHOD] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_METHOD, 0, 0 } } },
     [LOAD_ATTR] = { .nuops = 1, .uops = { { LOAD_ATTR, 0, 0 } } },
+    [LOAD_ATTR_INSTANCE_VALUE] = { .nuops = 4, .uops = { { _SKIP_CACHE, 0, 0 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_MANAGED_OBJECT_HAS_VALUES, 0, 0 }, { _LOAD_ATTR_INSTANCE_VALUE, 1, 3 } } },
     [COMPARE_OP] = { .nuops = 1, .uops = { { COMPARE_OP, 0, 0 } } },
     [COMPARE_OP_FLOAT] = { .nuops = 1, .uops = { { COMPARE_OP_FLOAT, 0, 0 } } },
     [COMPARE_OP_INT] = { .nuops = 1, .uops = { { COMPARE_OP_INT, 0, 0 } } },
@@ -1348,8 +1355,11 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
     [_SKIP_CACHE] = "_SKIP_CACHE",
     [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION",
     [_GUARD_BUILTINS_VERSION] = "_GUARD_BUILTINS_VERSION",
+    [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE",
+    [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS",
     [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION",
     [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES",
+    [_LOAD_ATTR_INSTANCE_VALUE] = "_LOAD_ATTR_INSTANCE_VALUE",
     [IS_NONE] = "IS_NONE",
     [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST",
     [_IS_ITER_EXHAUSTED_LIST] = "_IS_ITER_EXHAUSTED_LIST",
index 5ed275fb8576794eb0ff4e391b09947b358d9344..edb141cc79f752ba279d6537ff4bd123a41090dd 100644 (file)
@@ -11,8 +11,9 @@ extern "C" {
 #define _Py_UOP_MAX_TRACE_LENGTH 32
 
 typedef struct {
-    int opcode;
-    uint64_t operand;  // Sometimes oparg, sometimes a cache entry
+    uint32_t opcode;
+    uint32_t oparg;
+    uint64_t operand;  // A cache entry
 } _PyUOpInstruction;
 
 typedef struct {
index c0dcff825758ad7a0778465c600e7a0090646514..4e519fa73c50cc569ef4660f23b287c4f77aaf5d 100644 (file)
@@ -2448,7 +2448,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("SAVE_IP", uops)
         self.assertIn("LOAD_FAST", uops)
 
@@ -2493,7 +2493,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(many_vars)
         self.assertIsNotNone(ex)
-        self.assertIn(("LOAD_FAST", 259), list(ex))
+        self.assertIn(("LOAD_FAST", 259, 0), list(ex))
 
     def test_unspecialized_unpack(self):
         # An example of an unspecialized opcode
@@ -2514,7 +2514,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("UNPACK_SEQUENCE", uops)
 
     def test_pop_jump_if_false(self):
@@ -2529,7 +2529,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("_POP_JUMP_IF_FALSE", uops)
 
     def test_pop_jump_if_none(self):
@@ -2544,7 +2544,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("_POP_JUMP_IF_TRUE", uops)
 
     def test_pop_jump_if_not_none(self):
@@ -2559,7 +2559,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("_POP_JUMP_IF_FALSE", uops)
 
     def test_pop_jump_if_true(self):
@@ -2574,7 +2574,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("_POP_JUMP_IF_TRUE", uops)
 
     def test_jump_backward(self):
@@ -2589,7 +2589,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("JUMP_TO_TOP", uops)
 
     def test_jump_forward(self):
@@ -2609,7 +2609,7 @@ class TestUops(unittest.TestCase):
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         # Since there is no JUMP_FORWARD instruction,
         # look for indirect evidence: the += operator
         self.assertIn("_BINARY_OP_ADD_INT", uops)
@@ -2630,7 +2630,7 @@ class TestUops(unittest.TestCase):
         self.assertIsNotNone(ex)
         # for i, (opname, oparg) in enumerate(ex):
         #     print(f"{i:4d}: {opname:<20s} {oparg:3d}")
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("_IS_ITER_EXHAUSTED_RANGE", uops)
         # Verification that the jump goes past END_FOR
         # is done by manual inspection of the output
@@ -2652,7 +2652,7 @@ class TestUops(unittest.TestCase):
         self.assertIsNotNone(ex)
         # for i, (opname, oparg) in enumerate(ex):
         #     print(f"{i:4d}: {opname:<20s} {oparg:3d}")
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("_IS_ITER_EXHAUSTED_LIST", uops)
         # Verification that the jump goes past END_FOR
         # is done by manual inspection of the output
@@ -2674,7 +2674,7 @@ class TestUops(unittest.TestCase):
         self.assertIsNotNone(ex)
         # for i, (opname, oparg) in enumerate(ex):
         #     print(f"{i:4d}: {opname:<20s} {oparg:3d}")
-        uops = {opname for opname, _ in ex}
+        uops = {opname for opname, _, _ in ex}
         self.assertIn("_IS_ITER_EXHAUSTED_TUPLE", uops)
         # Verification that the jump goes past END_FOR
         # is done by manual inspection of the output
index 652372cb23dc5e1b8754e3e10ec767921b75dc52..19fb138ee64cba818f373246dff2dadc5cd6c02c 100644 (file)
@@ -645,18 +645,16 @@ dummy_func(
             STORE_SUBSCR_LIST_INT,
         };
 
-        inst(STORE_SUBSCR, (counter/1, v, container, sub -- )) {
+        inst(STORE_SUBSCR, (unused/1, v, container, sub -- )) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
+            if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
                 next_instr--;
                 _Py_Specialize_StoreSubscr(container, sub, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(STORE_SUBSCR, deferred);
-            _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
             DECREMENT_ADAPTIVE_COUNTER(cache->counter);
-            #else
-            (void)counter;  // Unused.
             #endif  /* ENABLE_SPECIALIZATION */
             /* container[sub] = v */
             int err = PyObject_SetItem(container, sub, v);
@@ -1198,19 +1196,17 @@ dummy_func(
             STORE_ATTR_WITH_HINT,
         };
 
-        inst(STORE_ATTR, (counter/1, unused/3, v, owner --)) {
+        inst(STORE_ATTR, (unused/1, unused/3, v, owner --)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            _PyAttrCache *cache = (_PyAttrCache *)next_instr;
+            if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
                 next_instr--;
                 _Py_Specialize_StoreAttr(owner, next_instr, name);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(STORE_ATTR, deferred);
-            _PyAttrCache *cache = (_PyAttrCache *)next_instr;
             DECREMENT_ADAPTIVE_COUNTER(cache->counter);
-            #else
-            (void)counter;  // Unused.
             #endif  /* ENABLE_SPECIALIZATION */
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
             int err = PyObject_SetAttr(owner, name, v);
index f13ba9883d981440c612a754627c4cc97dd8fe0c..b56ddfb4bd286de17c9433789bf5b66b1eafbbf4 100644 (file)
@@ -2747,17 +2747,18 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
     _Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
     int pc = 0;
     int opcode;
-    uint64_t operand;
     int oparg;
+    uint64_t operand;
 
     for (;;) {
         opcode = self->trace[pc].opcode;
+        oparg = self->trace[pc].oparg;
         operand = self->trace[pc].operand;
-        oparg = (int)operand;
         DPRINTF(3,
-                "%4d: uop %s, operand %" PRIu64 ", stack_level %d\n",
+                "%4d: uop %s, oparg %d, operand %" PRIu64 ", stack_level %d\n",
                 pc,
                 opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
+                oparg,
                 operand,
                 (int)(stack_pointer - _PyFrame_Stackbase(frame)));
         pc++;
index d85e23b5abb8e6b9f0aa80a7b7d56156894c2f19..f492c1fa9d8e3fcd83308392e1df0846601cae28 100644 (file)
             PyObject *sub = stack_pointer[-1];
             PyObject *container = stack_pointer[-2];
             PyObject *v = stack_pointer[-3];
-            uint16_t counter = (uint16_t)operand;
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
+            if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
                 next_instr--;
                 _Py_Specialize_StoreSubscr(container, sub, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(STORE_SUBSCR, deferred);
-            _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
             DECREMENT_ADAPTIVE_COUNTER(cache->counter);
-            #else
-            (void)counter;  // Unused.
             #endif  /* ENABLE_SPECIALIZATION */
             /* container[sub] = v */
             int err = PyObject_SetItem(container, sub, v);
             break;
         }
 
+        case STORE_ATTR: {
+            static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size");
+            PyObject *owner = stack_pointer[-1];
+            PyObject *v = stack_pointer[-2];
+            #if ENABLE_SPECIALIZATION
+            _PyAttrCache *cache = (_PyAttrCache *)next_instr;
+            if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
+                PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
+                next_instr--;
+                _Py_Specialize_StoreAttr(owner, next_instr, name);
+                DISPATCH_SAME_OPARG();
+            }
+            STAT_INC(STORE_ATTR, deferred);
+            DECREMENT_ADAPTIVE_COUNTER(cache->counter);
+            #endif  /* ENABLE_SPECIALIZATION */
+            PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
+            int err = PyObject_SetAttr(owner, name, v);
+            Py_DECREF(v);
+            Py_DECREF(owner);
+            if (err) goto pop_2_error;
+            STACK_SHRINK(2);
+            break;
+        }
+
         case DELETE_ATTR: {
             PyObject *owner = stack_pointer[-1];
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
             break;
         }
 
+        case _LOAD_GLOBAL_MODULE: {
+            PyObject *null = NULL;
+            PyObject *res;
+            uint16_t index = (uint16_t)operand;
+            PyDictObject *dict = (PyDictObject *)GLOBALS();
+            PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys);
+            res = entries[index].me_value;
+            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            Py_INCREF(res);
+            STAT_INC(LOAD_GLOBAL, hit);
+            null = NULL;
+            STACK_GROW(1);
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            stack_pointer[-1] = res;
+            if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = null; }
+            break;
+        }
+
+        case _LOAD_GLOBAL_BUILTINS: {
+            PyObject *null = NULL;
+            PyObject *res;
+            uint16_t index = (uint16_t)operand;
+            PyDictObject *bdict = (PyDictObject *)BUILTINS();
+            PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys);
+            res = entries[index].me_value;
+            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            Py_INCREF(res);
+            STAT_INC(LOAD_GLOBAL, hit);
+            null = NULL;
+            STACK_GROW(1);
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            stack_pointer[-1] = res;
+            if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = null; }
+            break;
+        }
+
         case DELETE_FAST: {
             PyObject *v = GETLOCAL(oparg);
             if (v == NULL) goto unbound_local_error;
             break;
         }
 
+        case _LOAD_ATTR_INSTANCE_VALUE: {
+            PyObject *owner = stack_pointer[-1];
+            PyObject *res2 = NULL;
+            PyObject *res;
+            uint16_t index = (uint16_t)operand;
+            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
+            res = _PyDictOrValues_GetValues(dorv)->values[index];
+            DEOPT_IF(res == NULL, LOAD_ATTR);
+            STAT_INC(LOAD_ATTR, hit);
+            Py_INCREF(res);
+            res2 = NULL;
+            Py_DECREF(owner);
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            stack_pointer[-1] = res;
+            if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; }
+            break;
+        }
+
         case COMPARE_OP: {
             static_assert(INLINE_CACHE_ENTRIES_COMPARE_OP == 1, "incorrect cache size");
             PyObject *right = stack_pointer[-1];
index 1fd76715dc3e4ac263c8f0fab003e226ad007ea4..0148078d18bdc33114854627803d6869123edd46 100644 (file)
             PyObject *sub = stack_pointer[-1];
             PyObject *container = stack_pointer[-2];
             PyObject *v = stack_pointer[-3];
-            uint16_t counter = read_u16(&next_instr[0].cache);
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
+            if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
                 next_instr--;
                 _Py_Specialize_StoreSubscr(container, sub, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(STORE_SUBSCR, deferred);
-            _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
             DECREMENT_ADAPTIVE_COUNTER(cache->counter);
-            #else
-            (void)counter;  // Unused.
             #endif  /* ENABLE_SPECIALIZATION */
             /* container[sub] = v */
             int err = PyObject_SetItem(container, sub, v);
             static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size");
             PyObject *owner = stack_pointer[-1];
             PyObject *v = stack_pointer[-2];
-            uint16_t counter = read_u16(&next_instr[0].cache);
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            _PyAttrCache *cache = (_PyAttrCache *)next_instr;
+            if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
                 next_instr--;
                 _Py_Specialize_StoreAttr(owner, next_instr, name);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(STORE_ATTR, deferred);
-            _PyAttrCache *cache = (_PyAttrCache *)next_instr;
             DECREMENT_ADAPTIVE_COUNTER(cache->counter);
-            #else
-            (void)counter;  // Unused.
             #endif  /* ENABLE_SPECIALIZATION */
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
             int err = PyObject_SetAttr(owner, name, v);
index 693ba375971ae76c09167dc978f33911fb2ac3f1..3d385a1506cba3b06aae4485f1acfb5300801059 100644 (file)
@@ -344,13 +344,19 @@ uop_item(_PyUOpExecutorObject *self, Py_ssize_t index)
     if (oname == NULL) {
         return NULL;
     }
+    PyObject *oparg = PyLong_FromUnsignedLong(self->trace[index].oparg);
+    if (oparg == NULL) {
+        Py_DECREF(oname);
+        return NULL;
+    }
     PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
     if (operand == NULL) {
+        Py_DECREF(oparg);
         Py_DECREF(oname);
         return NULL;
     }
-    PyObject *args[2] = { oname, operand };
-    return _PyTuple_FromArraySteal(args, 2);
+    PyObject *args[3] = { oname, oparg, operand };
+    return _PyTuple_FromArraySteal(args, 3);
 }
 
 PySequenceMethods uop_as_sequence = {
@@ -395,29 +401,33 @@ translate_bytecode_to_trace(
 #define DPRINTF(level, ...)
 #endif
 
-#define ADD_TO_TRACE(OPCODE, OPERAND) \
+#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND) \
     DPRINTF(2, \
-            "  ADD_TO_TRACE(%s, %" PRIu64 ")\n", \
+            "  ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \
             uop_name(OPCODE), \
+            (OPARG), \
             (uint64_t)(OPERAND)); \
     assert(trace_length < max_length); \
     assert(reserved > 0); \
     reserved--; \
     trace[trace_length].opcode = (OPCODE); \
+    trace[trace_length].oparg = (OPARG); \
     trace[trace_length].operand = (OPERAND); \
     trace_length++;
 
 #define INSTR_IP(INSTR, CODE) \
-    ((long)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
+    ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
 
-#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \
-    DPRINTF(2, "    ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
+#define ADD_TO_STUB(INDEX, OPCODE, OPARG, OPERAND) \
+    DPRINTF(2, "    ADD_TO_STUB(%d, %s, %d, %" PRIu64 ")\n", \
             (INDEX), \
             uop_name(OPCODE), \
+            (OPARG), \
             (uint64_t)(OPERAND)); \
     assert(reserved > 0); \
     reserved--; \
     trace[(INDEX)].opcode = (OPCODE); \
+    trace[(INDEX)].oparg = (OPARG); \
     trace[(INDEX)].operand = (OPERAND);
 
 // Reserve space for n uops
@@ -433,7 +443,7 @@ translate_bytecode_to_trace(
 #define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode))
 
     DPRINTF(4,
-            "Optimizing %s (%s:%d) at byte offset %ld\n",
+            "Optimizing %s (%s:%d) at byte offset %d\n",
             PyUnicode_AsUTF8(code->co_qualname),
             PyUnicode_AsUTF8(code->co_filename),
             code->co_firstlineno,
@@ -441,11 +451,11 @@ translate_bytecode_to_trace(
 
     for (;;) {
         RESERVE_RAW(2, "epilogue");  // Always need space for SAVE_IP and EXIT_TRACE
-        ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code));
+        ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0);
 
-        int opcode = instr->op.code;
-        int oparg = instr->op.arg;
-        int extras = 0;
+        uint32_t opcode = instr->op.code;
+        uint32_t oparg = instr->op.arg;
+        uint32_t extras = 0;
 
         while (opcode == EXTENDED_ARG) {
             instr++;
@@ -467,7 +477,7 @@ translate_bytecode_to_trace(
             case POP_JUMP_IF_NONE:
             {
                 RESERVE(2, 2);
-                ADD_TO_TRACE(IS_NONE, 0);
+                ADD_TO_TRACE(IS_NONE, 0, 0);
                 opcode = POP_JUMP_IF_TRUE;
                 goto pop_jump_if_bool;
             }
@@ -475,7 +485,7 @@ translate_bytecode_to_trace(
             case POP_JUMP_IF_NOT_NONE:
             {
                 RESERVE(2, 2);
-                ADD_TO_TRACE(IS_NONE, 0);
+                ADD_TO_TRACE(IS_NONE, 0, 0);
                 opcode = POP_JUMP_IF_FALSE;
                 goto pop_jump_if_bool;
             }
@@ -489,11 +499,11 @@ pop_jump_if_bool:
                 _Py_CODEUNIT *target_instr =
                     instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
                 max_length -= 2;  // Really the start of the stubs
-                int uopcode = opcode == POP_JUMP_IF_TRUE ?
+                uint32_t uopcode = opcode == POP_JUMP_IF_TRUE ?
                     _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE;
-                ADD_TO_TRACE(uopcode, max_length);
-                ADD_TO_STUB(max_length, SAVE_IP, INSTR_IP(target_instr, code));
-                ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0);
+                ADD_TO_TRACE(uopcode, max_length, 0);
+                ADD_TO_STUB(max_length, SAVE_IP, INSTR_IP(target_instr, code), 0);
+                ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0, 0);
                 break;
             }
 
@@ -501,7 +511,7 @@ pop_jump_if_bool:
             {
                 if (instr + 2 - oparg == initial_instr) {
                     RESERVE(1, 0);
-                    ADD_TO_TRACE(JUMP_TO_TOP, 0);
+                    ADD_TO_TRACE(JUMP_TO_TOP, 0, 0);
                 }
                 else {
                     DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
@@ -546,14 +556,14 @@ pop_jump_if_bool:
                 _Py_CODEUNIT *target_instr =  // +1 at the end skips over END_FOR
                     instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg + 1;
                 max_length -= 3;  // Really the start of the stubs
-                ADD_TO_TRACE(check_op, 0);
-                ADD_TO_TRACE(exhausted_op, 0);
-                ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length);
-                ADD_TO_TRACE(next_op, 0);
-
-                ADD_TO_STUB(max_length + 0, POP_TOP, 0);
-                ADD_TO_STUB(max_length + 1, SAVE_IP, INSTR_IP(target_instr, code));
-                ADD_TO_STUB(max_length + 2, EXIT_TRACE, 0);
+                ADD_TO_TRACE(check_op, 0, 0);
+                ADD_TO_TRACE(exhausted_op, 0, 0);
+                ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length, 0);
+                ADD_TO_TRACE(next_op, 0, 0);
+
+                ADD_TO_STUB(max_length + 0, POP_TOP, 0, 0);
+                ADD_TO_STUB(max_length + 1, SAVE_IP, INSTR_IP(target_instr, code), 0);
+                ADD_TO_STUB(max_length + 2, EXIT_TRACE, 0, 0);
                 break;
             }
 
@@ -564,19 +574,20 @@ pop_jump_if_bool:
                     // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
                     int nuops = expansion->nuops;
                     RESERVE(nuops, 0);
+                    uint32_t orig_oparg = oparg;  // For OPARG_TOP/BOTTOM
                     for (int i = 0; i < nuops; i++) {
-                        uint64_t operand;
+                        oparg = orig_oparg;
+                        uint64_t operand = 0;
                         int offset = expansion->uops[i].offset;
                         switch (expansion->uops[i].size) {
                             case OPARG_FULL:
-                                operand = oparg;
                                 if (extras && OPCODE_HAS_JUMP(opcode)) {
                                     if (opcode == JUMP_BACKWARD_NO_INTERRUPT) {
-                                        operand -= extras;
+                                        oparg -= extras;
                                     }
                                     else {
                                         assert(opcode != JUMP_BACKWARD);
-                                        operand += extras;
+                                        oparg += extras;
                                     }
                                 }
                                 break;
@@ -590,10 +601,10 @@ pop_jump_if_bool:
                                 operand = read_u64(&instr[offset].cache);
                                 break;
                             case OPARG_TOP:  // First half of super-instr
-                                operand = oparg >> 4;
+                                oparg = orig_oparg >> 4;
                                 break;
                             case OPARG_BOTTOM:  // Second half of super-instr
-                                operand = oparg & 0xF;
+                                oparg = orig_oparg & 0xF;
                                 break;
                             default:
                                 fprintf(stderr,
@@ -603,7 +614,7 @@ pop_jump_if_bool:
                                         expansion->uops[i].offset);
                                 Py_FatalError("garbled expansion");
                         }
-                        ADD_TO_TRACE(expansion->uops[i].uop, operand);
+                        ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand);
                     }
                     break;
                 }
@@ -621,9 +632,9 @@ pop_jump_if_bool:
 done:
     // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
     if (trace_length > 3) {
-        ADD_TO_TRACE(EXIT_TRACE, 0);
+        ADD_TO_TRACE(EXIT_TRACE, 0, 0);
         DPRINTF(1,
-                "Created a trace for %s (%s:%d) at byte offset %ld -- length %d\n",
+                "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
                 PyUnicode_AsUTF8(code->co_qualname),
                 PyUnicode_AsUTF8(code->co_filename),
                 code->co_firstlineno,
@@ -644,10 +655,10 @@ done:
                 if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
                     trace[i].opcode == _POP_JUMP_IF_TRUE)
                 {
-                    uint64_t target = trace[i].operand;
-                    if (target >= (uint64_t)max_length) {
+                    int target = trace[i].oparg;
+                    if (target >= max_length) {
                         target += trace_length - max_length;
-                        trace[i].operand = target;
+                        trace[i].oparg = target;
                     }
                 }
             }
@@ -657,7 +668,7 @@ done:
     }
     else {
         DPRINTF(4,
-                "No trace for %s (%s:%d) at byte offset %ld\n",
+                "No trace for %s (%s:%d) at byte offset %d\n",
                 PyUnicode_AsUTF8(code->co_qualname),
                 PyUnicode_AsUTF8(code->co_filename),
                 code->co_firstlineno,
index 112f29a83e4c10b0f6ee88080213bfb658cd0016..037bee107cb13a1e9dbfa54be189a49c9149dd19 100644 (file)
@@ -417,16 +417,9 @@ class Instruction:
         if self.always_exits:
             dprint(f"Skipping {self.name} because it always exits")
             return False
-        if self.instr_flags.HAS_ARG_FLAG:
-            # If the instruction uses oparg, it cannot use any caches
-            if self.active_caches:
-                dprint(f"Skipping {self.name} because it uses oparg and caches")
-                return False
-        else:
-            # If it doesn't use oparg, it can have one cache entry
-            if len(self.active_caches) > 1:
-                dprint(f"Skipping {self.name} because it has >1 cache entries")
-                return False
+        if len(self.active_caches) > 1:
+            # print(f"Skipping {self.name} because it has >1 cache entries")
+            return False
         res = True
         for forbidden in FORBIDDEN_NAMES_IN_UOPS:
             # NOTE: To disallow unspecialized uops, use
@@ -1374,7 +1367,7 @@ class Analyzer:
                 if not part.instr.is_viable_uop():
                     print(f"NOTE: Part {part.instr.name} of {name} is not a viable uop")
                     return
-                if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches:
+                if not part.active_caches:
                     size, offset = OPARG_SIZES["OPARG_FULL"], 0
                 else:
                     # If this assert triggers, is_viable_uops() lied