]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-116968: Reimplement Tier 2 counters (#117144)
authorGuido van Rossum <guido@python.org>
Thu, 4 Apr 2024 15:03:27 +0000 (08:03 -0700)
committerGitHub <noreply@github.com>
Thu, 4 Apr 2024 15:03:27 +0000 (15:03 +0000)
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
API used for adaptive specialization counters is changed but the behavior is
(supposed to be) identical.

The behavior of the Tier 2 counters is changed:
- There are no longer dynamic thresholds (we never varied these).
- All counters now use the same exponential backoff.
- The counter for ``JUMP_BACKWARD`` starts counting down from 16.
- The ``temperature`` in side exits starts counting down from 64.

19 files changed:
Include/cpython/code.h
Include/cpython/optimizer.h
Include/internal/pycore_backoff.h [new file with mode: 0644]
Include/internal/pycore_code.h
Include/internal/pycore_interp.h
Lib/test/test_capi/test_opt.py
Makefile.pre.in
Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst [new file with mode: 0644]
Modules/_testinternalcapi.c
PCbuild/pythoncore.vcxproj
Python/bytecodes.c
Python/ceval.c
Python/ceval_macros.h
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/instrumentation.c
Python/optimizer.c
Python/specialize.c
Tools/jit/template.c

index d5dac1765638f9c62a7183a6ba823fae2b72bae9..b0e226e0e1971a8f0887a93a676de5682c3b38ae 100644 (file)
@@ -24,6 +24,16 @@ typedef struct _Py_GlobalMonitors {
     uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS];
 } _Py_GlobalMonitors;
 
+typedef struct {
+    union {
+        struct {
+            uint16_t backoff : 4;
+            uint16_t value : 12;
+        };
+        uint16_t as_counter;  // For printf("%#x", ...)
+    };
+} _Py_BackoffCounter;
+
 /* Each instruction in a code object is a fixed-width value,
  * currently 2 bytes: 1-byte opcode + 1-byte oparg.  The EXTENDED_ARG
  * opcode allows for larger values but the current limit is 3 uses
@@ -39,6 +49,7 @@ typedef union {
         uint8_t code;
         uint8_t arg;
     } op;
+    _Py_BackoffCounter counter;  // First cache entry of specializable op
 } _Py_CODEUNIT;
 
 
index bc960c583782c5ba5ef57a15fc8809a7d4fac074..819251a25bb242e8678b3e5b7a3e2fdb6de190c6 100644 (file)
@@ -89,7 +89,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
 
 typedef struct _exit_data {
     uint32_t target;
-    int16_t temperature;
+    _Py_BackoffCounter temperature;
     const struct _PyExecutorObject *executor;
 } _PyExitData;
 
@@ -115,11 +115,6 @@ typedef int (*optimize_func)(
 struct _PyOptimizerObject {
     PyObject_HEAD
     optimize_func optimize;
-    /* These thresholds are treated as signed so do not exceed INT16_MAX
-     * Use INT16_MAX to indicate that the optimizer should never be called */
-    uint16_t resume_threshold;
-    uint16_t side_threshold;
-    uint16_t backedge_threshold;
     /* Data needed by the optimizer goes here, but is opaque to the VM */
 };
 
@@ -151,14 +146,6 @@ extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_inval
 PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
 PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
 
-#define OPTIMIZER_BITS_IN_COUNTER 4
-/* Minimum of 16 additional executions before retry */
-#define MIN_TIER2_BACKOFF 4
-#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
-#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
-/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
-#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX
-
 #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
 
diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h
new file mode 100644 (file)
index 0000000..5d93c88
--- /dev/null
@@ -0,0 +1,128 @@
+
+#ifndef Py_INTERNAL_BACKOFF_H
+#define Py_INTERNAL_BACKOFF_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+/* 16-bit countdown counters using exponential backoff.
+
+   These are used by the adaptive specializer to count down until
+   it is time to specialize an instruction. If specialization fails
+   the counter is reset using exponential backoff.
+
+   Another use is for the Tier 2 optimizer to decide when to create
+   a new Tier 2 trace (executor). Again, exponential backoff is used.
+
+   The 16-bit counter is structured as a 12-bit unsigned 'value'
+   and a 4-bit 'backoff' field. When resetting the counter, the
+   backoff field is incremented (until it reaches a limit) and the
+   value is set to a bit mask representing the value 2**backoff - 1.
+   The maximum backoff is 12 (the number of value bits).
+
+   There is an exceptional value which must not be updated, 0xFFFF.
+*/
+
+#define UNREACHABLE_BACKOFF 0xFFFF
+
+static inline bool
+is_unreachable_backoff_counter(_Py_BackoffCounter counter)
+{
+    return counter.as_counter == UNREACHABLE_BACKOFF;
+}
+
+static inline _Py_BackoffCounter
+make_backoff_counter(uint16_t value, uint16_t backoff)
+{
+    assert(backoff <= 15);
+    assert(value <= 0xFFF);
+    return (_Py_BackoffCounter){.value = value, .backoff = backoff};
+}
+
+static inline _Py_BackoffCounter
+forge_backoff_counter(uint16_t counter)
+{
+    return (_Py_BackoffCounter){.as_counter = counter};
+}
+
+static inline _Py_BackoffCounter
+restart_backoff_counter(_Py_BackoffCounter counter)
+{
+    assert(!is_unreachable_backoff_counter(counter));
+    if (counter.backoff < 12) {
+        return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1);
+    }
+    else {
+        return make_backoff_counter((1 << 12) - 1, 12);
+    }
+}
+
+static inline _Py_BackoffCounter
+pause_backoff_counter(_Py_BackoffCounter counter)
+{
+    return make_backoff_counter(counter.value | 1, counter.backoff);
+}
+
+static inline _Py_BackoffCounter
+advance_backoff_counter(_Py_BackoffCounter counter)
+{
+    if (!is_unreachable_backoff_counter(counter)) {
+        return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff);
+    }
+    else {
+        return counter;
+    }
+}
+
+static inline bool
+backoff_counter_triggers(_Py_BackoffCounter counter)
+{
+    return counter.value == 0;
+}
+
+/* Initial JUMP_BACKWARD counter.
+ * This determines when we create a trace for a loop.
+* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */
+#define JUMP_BACKWARD_INITIAL_VALUE 16
+#define JUMP_BACKWARD_INITIAL_BACKOFF 4
+static inline _Py_BackoffCounter
+initial_jump_backoff_counter(void)
+{
+    return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE,
+                                JUMP_BACKWARD_INITIAL_BACKOFF);
+}
+
+/* Initial exit temperature.
+ * Must be larger than ADAPTIVE_COOLDOWN_VALUE,
+ * otherwise when a side exit warms up we may construct
+ * a new trace before the Tier 1 code has properly re-specialized.
+ * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */
+#define COLD_EXIT_INITIAL_VALUE 64
+#define COLD_EXIT_INITIAL_BACKOFF 6
+
+static inline _Py_BackoffCounter
+initial_temperature_backoff_counter(void)
+{
+    return make_backoff_counter(COLD_EXIT_INITIAL_VALUE,
+                                COLD_EXIT_INITIAL_BACKOFF);
+}
+
+/* Unreachable backoff counter. */
+static inline _Py_BackoffCounter
+initial_unreachable_backoff_counter(void)
+{
+    return forge_backoff_counter(UNREACHABLE_BACKOFF);
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_BACKOFF_H */
index 6c90c9e284103c691177f506724d03fcdc4c13ff..688051bbff7aac39298f8642fd91c97e36398cd4 100644 (file)
@@ -31,7 +31,7 @@ extern "C" {
 #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
     uint16_t module_keys_version;
     uint16_t builtin_keys_version;
     uint16_t index;
@@ -40,44 +40,44 @@ typedef struct {
 #define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PyBinaryOpCache;
 
 #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PyUnpackSequenceCache;
 
 #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
     CACHE_ENTRIES(_PyUnpackSequenceCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PyCompareOpCache;
 
 #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PyBinarySubscrCache;
 
 #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PySuperAttrCache;
 
 #define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
     uint16_t version[2];
     uint16_t index;
 } _PyAttrCache;
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
     uint16_t type_version[2];
     union {
         uint16_t keys_version[2];
@@ -93,39 +93,39 @@ typedef struct {
 #define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
     uint16_t func_version[2];
 } _PyCallCache;
 
 #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PyStoreSubscrCache;
 
 #define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PyForIterCache;
 
 #define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PySendCache;
 
 #define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
     uint16_t version[2];
 } _PyToBoolCache;
 
 #define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache)
 
 typedef struct {
-    uint16_t counter;
+    _Py_BackoffCounter counter;
 } _PyContainsOpCache;
 
 #define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache)
@@ -451,18 +451,14 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
 
 /** Counters
  * The first 16-bit value in each inline cache is a counter.
- * When counting misses, the counter is treated as a simple unsigned value.
  *
  * When counting executions until the next specialization attempt,
  * exponential backoff is used to reduce the number of specialization failures.
- * The high 12 bits store the counter, the low 4 bits store the backoff exponent.
- * On a specialization failure, the backoff exponent is incremented and the
- * counter set to (2**backoff - 1).
- * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
+ * See pycore_backoff.h for more details.
+ * On a specialization failure, the backoff counter is restarted.
  */
 
-/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
-#define ADAPTIVE_BACKOFF_BITS 4
+#include "pycore_backoff.h"
 
 // A value of 1 means that we attempt to specialize the *second* time each
 // instruction is executed. Executing twice is a much better indicator of
@@ -480,36 +476,30 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
 #define ADAPTIVE_COOLDOWN_VALUE 52
 #define ADAPTIVE_COOLDOWN_BACKOFF 0
 
-#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
+// Can't assert this in pycore_backoff.h because of header order dependencies
+static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE,
+    "Cold exit value should be larger than adaptive cooldown value");
 
-
-static inline uint16_t
+static inline _Py_BackoffCounter
 adaptive_counter_bits(uint16_t value, uint16_t backoff) {
-    return ((value << ADAPTIVE_BACKOFF_BITS)
-            | (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1)));
+    return make_backoff_counter(value, backoff);
 }
 
-static inline uint16_t
+static inline _Py_BackoffCounter
 adaptive_counter_warmup(void) {
     return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE,
                                  ADAPTIVE_WARMUP_BACKOFF);
 }
 
-static inline uint16_t
+static inline _Py_BackoffCounter
 adaptive_counter_cooldown(void) {
     return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE,
                                  ADAPTIVE_COOLDOWN_BACKOFF);
 }
 
-static inline uint16_t
-adaptive_counter_backoff(uint16_t counter) {
-    uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1);
-    backoff++;
-    if (backoff > MAX_BACKOFF_VALUE) {
-        backoff = MAX_BACKOFF_VALUE;
-    }
-    uint16_t value = (uint16_t)(1 << backoff) - 1;
-    return adaptive_counter_bits(value, backoff);
+static inline _Py_BackoffCounter
+adaptive_counter_backoff(_Py_BackoffCounter counter) {
+    return restart_backoff_counter(counter);
 }
 
 
index b8d0fdcce11ba8a388677b1a51a0ca00aaf18464..b5cea863ff35dc0fd72bbabb81a0e36df29c09c7 100644 (file)
@@ -239,12 +239,6 @@ struct _is {
     _PyOptimizerObject *optimizer;
     _PyExecutorObject *executor_list_head;
 
-    /* These two values are shifted and offset to speed up check in JUMP_BACKWARD */
-    uint32_t optimizer_resume_threshold;
-    uint32_t optimizer_backedge_threshold;
-
-    uint16_t optimizer_side_threshold;
-
     _rare_events rare_events;
     PyDict_WatchCallback builtins_dict_watcher;
 
index ceb49c3c7129cb243d3a6707023907dddc862847..7ca0f6927fe4a16eb2f62d99c328096f9e92dad0 100644 (file)
@@ -10,6 +10,7 @@ import _testinternalcapi
 
 from test.support import script_helper, requires_specialization
 
+from _testinternalcapi import TIER2_THRESHOLD
 
 @contextlib.contextmanager
 def temporary_optimizer(opt):
@@ -69,7 +70,8 @@ class TestOptimizerAPI(unittest.TestCase):
                 self.assertEqual(opt.get_count(), 0)
                 with clear_executors(loop):
                     loop()
-                self.assertEqual(opt.get_count(), 1000)
+                # Subtract because optimizer doesn't kick in sooner
+                self.assertEqual(opt.get_count(), 1000 - TIER2_THRESHOLD)
 
     def test_long_loop(self):
         "Check that we aren't confused by EXTENDED_ARG"
@@ -81,7 +83,7 @@ class TestOptimizerAPI(unittest.TestCase):
                 pass
 
             def long_loop():
-                for _ in range(10):
+                for _ in range(20):
                     nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
                     nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
                     nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
@@ -96,7 +98,7 @@ class TestOptimizerAPI(unittest.TestCase):
         with temporary_optimizer(opt):
             self.assertEqual(opt.get_count(), 0)
             long_loop()
-            self.assertEqual(opt.get_count(), 10)
+            self.assertEqual(opt.get_count(), 20 - TIER2_THRESHOLD)  # Need iterations to warm up
 
     def test_code_restore_for_ENTER_EXECUTOR(self):
         def testfunc(x):
@@ -932,10 +934,10 @@ class TestUopsOptimization(unittest.TestCase):
         exec(src, ns, ns)
         testfunc = ns['testfunc']
         ns['_test_global'] = 0
-        _, ex = self._run_with_optimizer(testfunc, 16)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
         self.assertIsNone(ex)
         ns['_test_global'] = 1
-        _, ex = self._run_with_optimizer(testfunc, 16)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
         self.assertIsNotNone(ex)
         uops = get_opnames(ex)
         self.assertNotIn("_GUARD_BOTH_INT", uops)
@@ -946,10 +948,10 @@ class TestUopsOptimization(unittest.TestCase):
         exec(src, ns, ns)
         testfunc = ns['testfunc']
         ns['_test_global'] = 0
-        _, ex = self._run_with_optimizer(testfunc, 16)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
         self.assertIsNone(ex)
         ns['_test_global'] = 3.14
-        _, ex = self._run_with_optimizer(testfunc, 16)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
         self.assertIsNone(ex)
 
     def test_combine_stack_space_checks_sequential(self):
index 2a22a1e95a39a2d58822693305b2b0d4c765113a..84058acdcc35fce315c51474f620384a63225390 100644 (file)
@@ -1124,6 +1124,7 @@ PYTHON_HEADERS= \
                $(srcdir)/Include/internal/pycore_ast.h \
                $(srcdir)/Include/internal/pycore_ast_state.h \
                $(srcdir)/Include/internal/pycore_atexit.h \
+               $(srcdir)/Include/internal/pycore_backoff.h \
                $(srcdir)/Include/internal/pycore_bitutils.h \
                $(srcdir)/Include/internal/pycore_blocks_output_buffer.h \
                $(srcdir)/Include/internal/pycore_brc.h \
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst
new file mode 100644 (file)
index 0000000..dc5beee
--- /dev/null
@@ -0,0 +1,11 @@
+Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
+shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
+API used for adaptive specialization counters is changed but the behavior is
+(supposed to be) identical.
+
+The behavior of the Tier 2 counters is changed:
+
+* There are no longer dynamic thresholds (we never varied these).
+* All counters now use the same exponential backoff.
+* The counter for ``JUMP_BACKWARD`` starts counting down from 16.
+* The ``temperature`` in side exits starts counting down from 64.
index 6b5d99f6ffac1fad88bc66ee6536f13ed4265e2e..758e88e288bac6df31b94e96a4d178b92e5df147 100644 (file)
@@ -10,6 +10,7 @@
 #undef NDEBUG
 
 #include "Python.h"
+#include "pycore_backoff.h"       // JUMP_BACKWARD_INITIAL_VALUE
 #include "pycore_bitutils.h"      // _Py_bswap32()
 #include "pycore_bytesobject.h"   // _PyBytes_Find()
 #include "pycore_ceval.h"         // _PyEval_AddPendingCall()
@@ -1819,6 +1820,11 @@ module_exec(PyObject *module)
         return 1;
     }
 
+    if (PyModule_Add(module, "TIER2_THRESHOLD",
+                        PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE)) < 0) {
+        return 1;
+    }
+
     return 0;
 }
 
index 657ffd1aa4c676f83034c213046a1b60ff4f676b..827c9f074de90910952ff23b4659c6547510d3cc 100644 (file)
     <ClInclude Include="..\Include\internal\pycore_ast.h" />
     <ClInclude Include="..\Include\internal\pycore_ast_state.h" />
     <ClInclude Include="..\Include\internal\pycore_atexit.h" />
+    <ClInclude Include="..\Include\internal\pycore_backoff.h" />
     <ClInclude Include="..\Include\internal\pycore_bitutils.h" />
     <ClInclude Include="..\Include\internal\pycore_brc.h" />
     <ClInclude Include="..\Include\internal\pycore_bytes_methods.h" />
index fa53c969fe361eb9f7f5e04e52122e9883f6fc2f..8af48d9a0129b66c6ecff613202567a9c4da145b 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "Python.h"
 #include "pycore_abstract.h"      // _PyIndex_Check()
+#include "pycore_backoff.h"
 #include "pycore_cell.h"          // PyCell_GetRef()
 #include "pycore_code.h"
 #include "pycore_emscripten_signal.h"  // _Py_CHECK_EMSCRIPTEN_SIGNALS
@@ -326,13 +327,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_ToBool(value, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(TO_BOOL, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -551,13 +552,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_BinarySubscr(container, sub, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(BINARY_SUBSCR, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -698,13 +699,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_STORE_SUBSCR, (counter/1, container, sub -- container, sub)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_StoreSubscr(container, sub, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(STORE_SUBSCR, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -982,13 +983,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_SEND, (counter/1, receiver, unused -- receiver, unused)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_Send(receiver, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(SEND, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -1211,13 +1212,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_UnpackSequence(seq, next_instr, oparg);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(UNPACK_SEQUENCE, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
             (void)seq;
             (void)counter;
@@ -1280,14 +1281,14 @@ dummy_func(
 
         specializing op(_SPECIALIZE_STORE_ATTR, (counter/1, owner -- owner)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
                 next_instr = this_instr;
                 _Py_Specialize_StoreAttr(owner, next_instr, name);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(STORE_ATTR, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -1398,14 +1399,14 @@ dummy_func(
 
         specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
                 next_instr = this_instr;
                 _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(LOAD_GLOBAL, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -1711,7 +1712,7 @@ dummy_func(
         inst(INSTRUMENTED_LOAD_SUPER_ATTR, (unused/1, unused, unused, unused -- unused, unused if (oparg & 1))) {
             // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
             // don't want to specialize instrumented instructions
-            INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
             GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
         }
 
@@ -1723,13 +1724,13 @@ dummy_func(
         specializing op(_SPECIALIZE_LOAD_SUPER_ATTR, (counter/1, global_super, class, unused -- global_super, class, unused)) {
             #if ENABLE_SPECIALIZATION
             int load_method = oparg & 1;
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(LOAD_SUPER_ATTR, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -1836,14 +1837,14 @@ dummy_func(
 
         specializing op(_SPECIALIZE_LOAD_ATTR, (counter/1, owner -- owner)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
                 next_instr = this_instr;
                 _Py_Specialize_LoadAttr(owner, next_instr, name);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(LOAD_ATTR, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -2157,13 +2158,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_COMPARE_OP, (counter/1, left, right -- left, right)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_CompareOp(left, right, next_instr, oparg);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(COMPARE_OP, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -2254,13 +2255,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_CONTAINS_OP, (counter/1, left, right -- left, right)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_ContainsOp(right, next_instr);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(CONTAINS_OP, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -2340,16 +2341,8 @@ dummy_func(
             assert(oparg <= INSTR_OFFSET());
             JUMPBY(-oparg);
             #if ENABLE_SPECIALIZATION
-            uint16_t counter = this_instr[1].cache;
-            this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
-            /* We are using unsigned values, but we really want signed values, so
-             * do the 2s complement adjustment manually */
-            uint32_t offset_counter = counter ^ (1 << 15);
-            uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
-            assert((threshold & OPTIMIZER_BITS_MASK) == 0);
-            // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
-            // Double-check that the opcode isn't instrumented or something:
-            if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
+            _Py_BackoffCounter counter = this_instr[1].counter;
+            if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) {
                 _Py_CODEUNIT *start = this_instr;
                 /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
                 while (oparg > 255) {
@@ -2365,17 +2358,12 @@ dummy_func(
                     GOTO_TIER_TWO(executor);
                 }
                 else {
-                    int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
-                    backoff++;
-                    if (backoff < MIN_TIER2_BACKOFF) {
-                        backoff = MIN_TIER2_BACKOFF;
-                    }
-                    else if (backoff > MAX_TIER2_BACKOFF) {
-                        backoff = MAX_TIER2_BACKOFF;
-                    }
-                    this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
+                    this_instr[1].counter = restart_backoff_counter(counter);
                 }
             }
+            else {
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
+            }
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -2535,13 +2523,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_ForIter(iter, next_instr, oparg);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(FOR_ITER, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -3001,7 +2989,7 @@ dummy_func(
                     tstate, PY_MONITORING_EVENT_CALL,
                     frame, this_instr, function, arg);
             ERROR_IF(err, error);
-            INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
             GO_TO_INSTRUCTION(CALL);
         }
 
@@ -3030,13 +3018,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_CALL, (counter/1, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL));
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(CALL, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
         }
 
@@ -3933,13 +3921,13 @@ dummy_func(
 
         specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) {
             #if ENABLE_SPECIALIZATION
-            if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+            if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
                 DISPATCH_SAME_OPARG();
             }
             STAT_INC(BINARY_OP, deferred);
-            DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
             #endif  /* ENABLE_SPECIALIZATION */
             assert(NB_ADD <= oparg);
             assert(oparg <= NB_INPLACE_XOR);
@@ -3965,7 +3953,7 @@ dummy_func(
             ERROR_IF(next_opcode < 0, error);
             next_instr = this_instr;
             if (_PyOpcode_Caches[next_opcode]) {
-                INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter);
             }
             assert(next_opcode > 0 && next_opcode < 256);
             opcode = next_opcode;
@@ -4157,21 +4145,22 @@ dummy_func(
         tier2 op(_COLD_EXIT, (--)) {
             _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
             _PyExitData *exit = &previous->exits[oparg];
-            exit->temperature++;
             PyCodeObject *code = _PyFrame_GetCode(frame);
             _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
-            if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
+            _Py_BackoffCounter temperature = exit->temperature;
+            if (!backoff_counter_triggers(temperature)) {
+                exit->temperature = advance_backoff_counter(temperature);
                 GOTO_TIER_ONE(target);
             }
             _PyExecutorObject *executor;
             if (target->op.code == ENTER_EXECUTOR) {
                 executor = code->co_executors->executors[target->op.arg];
                 Py_INCREF(executor);
-            } else {
+            }
+            else {
                 int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
                 if (optimized <= 0) {
-                    int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
-                    exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
+                    exit->temperature = restart_backoff_counter(temperature);
                     if (optimized < 0) {
                         Py_DECREF(previous);
                         tstate->previous_executor = Py_None;
@@ -4181,7 +4170,7 @@ dummy_func(
                 }
             }
             /* We need two references. One to store in exit->executor and
-             * one to keep the executor alive when executing. */
+            * one to keep the executor alive when executing. */
             Py_INCREF(executor);
             exit->executor = executor;
             GOTO_TIER_TWO(executor);
index f3b73165e9f28b89e18de9f05ef526f506933416..57ae08ee3cf85a1305afb3674bd7c08333d6a46a 100644 (file)
@@ -4,6 +4,7 @@
 
 #include "Python.h"
 #include "pycore_abstract.h"      // _PyIndex_Check()
+#include "pycore_backoff.h"
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 #include "pycore_cell.h"          // PyCell_GetRef()
 #include "pycore_ceval.h"
@@ -822,7 +823,7 @@ resume_frame:
             _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1);
             /* Prevent the underlying instruction from specializing
              * and overwriting the instrumentation. */
-            INCREMENT_ADAPTIVE_COUNTER(cache->counter);
+            PAUSE_ADAPTIVE_COUNTER(cache->counter);
         }
         opcode = original_opcode;
         DISPATCH_GOTO();
@@ -1099,7 +1100,7 @@ exit_to_trace:
         printf("SIDE EXIT: [UOp ");
         _PyUOpPrint(&next_uop[-1]);
         printf(", exit %u, temp %d, target %d -> %s]\n",
-               exit_index, exit->temperature, exit->target,
+               exit_index, exit->temperature.as_counter, exit->target,
                _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]);
     }
 #endif
index 1194c11f8ba6070b949e82a55d66d8bf057fbd7a..224cd1da7d4a0e922a4cc26ecfa745bd99a0d71b 100644 (file)
@@ -262,7 +262,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
         STAT_INC(opcode, miss);                                  \
         STAT_INC((INSTNAME), miss);                              \
         /* The counter is always the first cache entry: */       \
-        if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) {       \
+        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
             STAT_INC((INSTNAME), deopt);                         \
         }                                                        \
     } while (0)
@@ -290,29 +290,28 @@ GETITEM(PyObject *v, Py_ssize_t i) {
         dtrace_function_entry(frame); \
     }
 
-#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \
-    (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0)
-
-#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
-    (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
+/* This takes a uint16_t instead of a _Py_BackoffCounter,
+ * because it is used directly on the cache entry in generated code,
+ * which is always an integral type. */
+#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
+    backoff_counter_triggers(forge_backoff_counter((COUNTER)))
 
 #ifdef Py_GIL_DISABLED
-#define DECREMENT_ADAPTIVE_COUNTER(COUNTER)                             \
-    do {                                                                \
-        /* gh-115999 tracks progress on addressing this. */             \
+#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
+    do { \
+        /* gh-115999 tracks progress on addressing this. */ \
         static_assert(0, "The specializing interpreter is not yet thread-safe"); \
     } while (0);
 #else
-#define DECREMENT_ADAPTIVE_COUNTER(COUNTER)           \
-    do {                                              \
-        assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
-        (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS);    \
+#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
+    do { \
+        (COUNTER) = advance_backoff_counter((COUNTER)); \
     } while (0);
 #endif
 
-#define INCREMENT_ADAPTIVE_COUNTER(COUNTER)          \
-    do {                                             \
-        (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS);   \
+#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
+    do { \
+        (COUNTER) = pause_backoff_counter((COUNTER)); \
     } while (0);
 
 #define UNBOUNDLOCAL_ERROR_MSG \
index 98476798fbbbdf88b8407c0c511978b20b0563b9..8c3d41b64b49a5003ea8d7ec8e0c8caaf8405db7 100644 (file)
             oparg = CURRENT_OPARG();
             _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
             _PyExitData *exit = &previous->exits[oparg];
-            exit->temperature++;
             PyCodeObject *code = _PyFrame_GetCode(frame);
             _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
-            if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
+            _Py_BackoffCounter temperature = exit->temperature;
+            if (!backoff_counter_triggers(temperature)) {
+                exit->temperature = advance_backoff_counter(temperature);
                 GOTO_TIER_ONE(target);
             }
             _PyExecutorObject *executor;
             if (target->op.code == ENTER_EXECUTOR) {
                 executor = code->co_executors->executors[target->op.arg];
                 Py_INCREF(executor);
-            } else {
+            }
+            else {
                 int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
                 if (optimized <= 0) {
-                    int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
-                    exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
+                    exit->temperature = restart_backoff_counter(temperature);
                     if (optimized < 0) {
                         Py_DECREF(previous);
                         tstate->previous_executor = Py_None;
index 6ee794a05b51d46d84b09fedcb39f2e2afde28ae..0116acd5ae302f80303591e7aed9f5fea726676e 100644 (file)
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(BINARY_OP, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
                 assert(NB_ADD <= oparg);
                 assert(oparg <= NB_INPLACE_XOR);
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_BinarySubscr(container, sub, next_instr);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(BINARY_SUBSCR, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _BINARY_SUBSCR
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL));
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(CALL, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             /* Skip 2 cache entries */
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_CompareOp(left, right, next_instr, oparg);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(COMPARE_OP, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _COMPARE_OP
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_ContainsOp(right, next_instr);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(CONTAINS_OP, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _CONTAINS_OP
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_ForIter(iter, next_instr, oparg);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(FOR_ITER, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _FOR_ITER
                 tstate, PY_MONITORING_EVENT_CALL,
                 frame, this_instr, function, arg);
             if (err) goto error;
-            INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
             GO_TO_INSTRUCTION(CALL);
         }
 
             if (next_opcode < 0) goto error;
             next_instr = this_instr;
             if (_PyOpcode_Caches[next_opcode]) {
-                INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter);
             }
             assert(next_opcode > 0 && next_opcode < 256);
             opcode = next_opcode;
             /* Skip 1 cache entry */
             // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
             // don't want to specialize instrumented instructions
-            INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+            PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
             GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
         }
 
             assert(oparg <= INSTR_OFFSET());
             JUMPBY(-oparg);
             #if ENABLE_SPECIALIZATION
-            uint16_t counter = this_instr[1].cache;
-            this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
-            /* We are using unsigned values, but we really want signed values, so
-             * do the 2s complement adjustment manually */
-            uint32_t offset_counter = counter ^ (1 << 15);
-            uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
-            assert((threshold & OPTIMIZER_BITS_MASK) == 0);
-            // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
-            // Double-check that the opcode isn't instrumented or something:
-            if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
+            _Py_BackoffCounter counter = this_instr[1].counter;
+            if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) {
                 _Py_CODEUNIT *start = this_instr;
                 /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
                 while (oparg > 255) {
                     GOTO_TIER_TWO(executor);
                 }
                 else {
-                    int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
-                    backoff++;
-                    if (backoff < MIN_TIER2_BACKOFF) {
-                        backoff = MIN_TIER2_BACKOFF;
-                    }
-                    else if (backoff > MAX_TIER2_BACKOFF) {
-                        backoff = MAX_TIER2_BACKOFF;
-                    }
-                    this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
+                    this_instr[1].counter = restart_backoff_counter(counter);
                 }
             }
+            else {
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
+            }
             #endif  /* ENABLE_SPECIALIZATION */
             DISPATCH();
         }
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
                     next_instr = this_instr;
                     _Py_Specialize_LoadAttr(owner, next_instr, name);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(LOAD_ATTR, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             /* Skip 8 cache entries */
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
                     next_instr = this_instr;
                     _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(LOAD_GLOBAL, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             /* Skip 1 cache entry */
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
                 int load_method = oparg & 1;
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(LOAD_SUPER_ATTR, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _LOAD_SUPER_ATTR
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_Send(receiver, next_instr);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(SEND, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _SEND
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
                     next_instr = this_instr;
                     _Py_Specialize_StoreAttr(owner, next_instr, name);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(STORE_ATTR, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             /* Skip 3 cache entries */
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_StoreSubscr(container, sub, next_instr);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(STORE_SUBSCR, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             // _STORE_SUBSCR
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_ToBool(value, next_instr);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(TO_BOOL, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
             }
             /* Skip 2 cache entries */
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
                 #if ENABLE_SPECIALIZATION
-                if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+                if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _Py_Specialize_UnpackSequence(seq, next_instr, oparg);
                     DISPATCH_SAME_OPARG();
                 }
                 STAT_INC(UNPACK_SEQUENCE, deferred);
-                DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+                ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
                 #endif  /* ENABLE_SPECIALIZATION */
                 (void)seq;
                 (void)counter;
index 018cd662b1561afdac730c078bccda47ee58ba29..0f60290865000cca9241a376121dd124b9c7ca69 100644 (file)
@@ -590,7 +590,7 @@ de_instrument(PyCodeObject *code, int i, int event)
     CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented);
     *opcode_ptr = deinstrumented;
     if (_PyOpcode_Caches[deinstrumented]) {
-        instr[1].cache = adaptive_counter_warmup();
+        instr[1].counter = adaptive_counter_warmup();
     }
 }
 
@@ -611,7 +611,7 @@ de_instrument_line(PyCodeObject *code, int i)
     CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
     instr->op.code = original_opcode;
     if (_PyOpcode_Caches[original_opcode]) {
-        instr[1].cache = adaptive_counter_warmup();
+        instr[1].counter = adaptive_counter_warmup();
     }
     assert(instr->op.code != INSTRUMENTED_LINE);
 }
@@ -634,7 +634,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i)
     CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
     *opcode_ptr = original_opcode;
     if (_PyOpcode_Caches[original_opcode]) {
-        instr[1].cache = adaptive_counter_warmup();
+        instr[1].counter = adaptive_counter_warmup();
     }
     assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION);
     assert(instr->op.code != INSTRUMENTED_INSTRUCTION);
@@ -667,7 +667,7 @@ instrument(PyCodeObject *code, int i)
         assert(instrumented);
         *opcode_ptr = instrumented;
         if (_PyOpcode_Caches[deopt]) {
-            instr[1].cache = adaptive_counter_warmup();
+            instr[1].counter = adaptive_counter_warmup();
         }
     }
 }
index 38ab6d3cf61c722dbd6e8a586d0bd765677849b5..5c69d9d5de92eb804d75aee6ef716806284ae90e 100644 (file)
@@ -1,6 +1,7 @@
 #include "Python.h"
 #include "opcode.h"
 #include "pycore_interp.h"
+#include "pycore_backoff.h"
 #include "pycore_bitutils.h"        // _Py_popcount32()
 #include "pycore_object.h"          // _PyObject_GC_UNTRACK()
 #include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
@@ -110,9 +111,7 @@ never_optimize(
     _PyExecutorObject **exec,
     int Py_UNUSED(stack_entries))
 {
-    /* Although it should be benign for this to be called,
-     * it shouldn't happen, so fail in debug builds. */
-    assert(0 && "never optimize should never be called");
+    // This may be called if the optimizer is reset
     return 0;
 }
 
@@ -127,25 +126,12 @@ PyTypeObject _PyDefaultOptimizer_Type = {
 static _PyOptimizerObject _PyOptimizer_Default = {
     PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
     .optimize = never_optimize,
-    .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
-    .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
-    .side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
 };
 
-static uint32_t
-shift_and_offset_threshold(uint32_t threshold)
-{
-    return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
-}
-
 _PyOptimizerObject *
 PyUnstable_GetOptimizer(void)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    assert(interp->optimizer_backedge_threshold ==
-           shift_and_offset_threshold(interp->optimizer->backedge_threshold));
-    assert(interp->optimizer_resume_threshold ==
-           shift_and_offset_threshold(interp->optimizer->resume_threshold));
     if (interp->optimizer == &_PyOptimizer_Default) {
         return NULL;
     }
@@ -190,13 +176,6 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
     }
     Py_INCREF(optimizer);
     interp->optimizer = optimizer;
-    interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
-    interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
-    interp->optimizer_side_threshold = optimizer->side_threshold;
-    if (optimizer == &_PyOptimizer_Default) {
-        assert(interp->optimizer_backedge_threshold > (1 << 16));
-        assert(interp->optimizer_resume_threshold > (1 << 16));
-    }
     return old;
 }
 
@@ -1109,7 +1088,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
     assert(exit_count < COLD_EXIT_COUNT);
     for (int i = 0; i < exit_count; i++) {
         executor->exits[i].executor = &COLD_EXITS[i];
-        executor->exits[i].temperature = 0;
+        executor->exits[i].temperature = initial_temperature_backoff_counter();
     }
     int next_exit = exit_count-1;
     _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
@@ -1291,11 +1270,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
         return NULL;
     }
     opt->optimize = uop_optimize;
-    opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
-    // Need a few iterations to settle specializations,
-    // and to ammortize the cost of optimization.
-    opt->side_threshold = 16;
-    opt->backedge_threshold = 16;
     return (PyObject *)opt;
 }
 
@@ -1385,9 +1359,6 @@ PyUnstable_Optimizer_NewCounter(void)
         return NULL;
     }
     opt->base.optimize = counter_optimize;
-    opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
-    opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
-    opt->base.backedge_threshold = 0;
     opt->count = 0;
     return (PyObject *)opt;
 }
@@ -1554,7 +1525,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor)
     for (uint32_t i = 0; i < executor->exit_count; i++) {
         Py_DECREF(executor->exits[i].executor);
         executor->exits[i].executor = &COLD_EXITS[i];
-        executor->exits[i].temperature = INT16_MIN;
+        executor->exits[i].temperature = initial_unreachable_backoff_counter();
     }
     _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
     assert(instruction->op.code == ENTER_EXECUTOR);
index f1e32d05af7707412a7380836cf32f57dbe9b67a..0b4b199a23e297feb7cfa2bf4dc4a56de47901c2 100644 (file)
@@ -419,22 +419,20 @@ _PyCode_Quicken(PyCodeObject *code)
         int caches = _PyOpcode_Caches[opcode];
         if (caches) {
             // The initial value depends on the opcode
-            int initial_value;
             switch (opcode) {
                 case JUMP_BACKWARD:
-                    initial_value = 0;
+                    instructions[i + 1].counter = initial_jump_backoff_counter();
                     break;
                 case POP_JUMP_IF_FALSE:
                 case POP_JUMP_IF_TRUE:
                 case POP_JUMP_IF_NONE:
                 case POP_JUMP_IF_NOT_NONE:
-                    initial_value = 0x5555;  // Alternating 0, 1 bits
+                    instructions[i + 1].cache = 0x5555;  // Alternating 0, 1 bits
                     break;
                 default:
-                    initial_value = adaptive_counter_warmup();
+                    instructions[i + 1].counter = adaptive_counter_warmup();
                     break;
             }
-            instructions[i + 1].cache = initial_value;
             i += caches;
         }
     }
index 54160084cda46062026756c85f26027bdac9dd00..351bc2f3dd48de9650819a879cf11fdd0c2cc59a 100644 (file)
@@ -1,5 +1,6 @@
 #include "Python.h"
 
+#include "pycore_backoff.h"
 #include "pycore_call.h"
 #include "pycore_ceval.h"
 #include "pycore_cell.h"