GH-126795: Increase the JIT threshold from 16 to 4096 (GH-126816)

author Brandt Bucher <brandtbucher@microsoft.com>

Mon, 18 Nov 2024 19:11:23 +0000 (11:11 -0800)

committer GitHub <noreply@github.com>

Mon, 18 Nov 2024 19:11:23 +0000 (11:11 -0800)
author Brandt Bucher <brandtbucher@microsoft.com>
Mon, 18 Nov 2024 19:11:23 +0000 (11:11 -0800)
committer GitHub <noreply@github.com>
Mon, 18 Nov 2024 19:11:23 +0000 (11:11 -0800)
diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h

index 20436a68b696774ea35680df823c61130a89502a..3e02728522828e7fa166ca71a55d155490206f02 100644 (file)
--- a/Include/internal/pycore_backoff.h
+++ b/Include/internal/pycore_backoff.h
@@ -102,10 +102,9 @@ backoff_counter_triggers(_Py_BackoffCounter counter)
  }
  
  /* Initial JUMP_BACKWARD counter.
- * This determines when we create a trace for a loop.
-* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */
-#define JUMP_BACKWARD_INITIAL_VALUE 15
-#define JUMP_BACKWARD_INITIAL_BACKOFF 4
+ * This determines when we create a trace for a loop. */
+#define JUMP_BACKWARD_INITIAL_VALUE 4095
+#define JUMP_BACKWARD_INITIAL_BACKOFF 12
  static inline _Py_BackoffCounter
  initial_jump_backoff_counter(void)
  {
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py

index 9726353bcd6a6d6a1f85ff96d7547f637517f267..edd83872e573e4e8b6d3ae7c27bab1a8b6611a4a 100644 (file)
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -62,9 +62,9 @@ class TestOptimizerAPI(unittest.TestCase):
      def test_counter_optimizer(self):
          # Generate a new function at each call
          ns = {}
-        exec(textwrap.dedent("""
+        exec(textwrap.dedent(f"""
              def loop():
-                for _ in range(1000):
+                for _ in range({TIER2_THRESHOLD + 1000}):
                      pass
          """), ns, ns)
          loop = ns['loop']
@@ -75,20 +75,19 @@ class TestOptimizerAPI(unittest.TestCase):
                  self.assertEqual(opt.get_count(), 0)
                  with clear_executors(loop):
                      loop()
-                # Subtract because optimizer doesn't kick in sooner
-                self.assertEqual(opt.get_count(), 1000 - TIER2_THRESHOLD)
+                self.assertEqual(opt.get_count(), 1001)
  
      def test_long_loop(self):
          "Check that we aren't confused by EXTENDED_ARG"
  
          # Generate a new function at each call
          ns = {}
-        exec(textwrap.dedent("""
+        exec(textwrap.dedent(f"""
              def nop():
                  pass
  
              def long_loop():
-                for _ in range(20):
+                for _ in range({TIER2_THRESHOLD + 20}):
                      nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
                      nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
                      nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
@@ -103,7 +102,7 @@ class TestOptimizerAPI(unittest.TestCase):
          with temporary_optimizer(opt):
              self.assertEqual(opt.get_count(), 0)
              long_loop()
-            self.assertEqual(opt.get_count(), 20 - TIER2_THRESHOLD)  # Need iterations to warm up
+            self.assertEqual(opt.get_count(), 21)  # Need iterations to warm up
  
      def test_code_restore_for_ENTER_EXECUTOR(self):
          def testfunc(x):
@@ -159,7 +158,7 @@ class TestExecutorInvalidation(unittest.TestCase):
          func_src = "\n".join(
              f"""
              def f{n}():
-                for _ in range(1000):
+                for _ in range({TIER2_THRESHOLD}):
                      pass
              """ for n in range(5)
          )
@@ -190,9 +189,9 @@ class TestExecutorInvalidation(unittest.TestCase):
      def test_uop_optimizer_invalidation(self):
          # Generate a new function at each call
          ns = {}
-        exec(textwrap.dedent("""
+        exec(textwrap.dedent(f"""
              def f():
-                for i in range(1000):
+                for i in range({TIER2_THRESHOLD}):
                      pass
          """), ns, ns)
          f = ns['f']
@@ -207,7 +206,7 @@ class TestExecutorInvalidation(unittest.TestCase):
  
      def test_sys__clear_internal_caches(self):
          def f():
-            for _ in range(1000):
+            for _ in range(TIER2_THRESHOLD):
                  pass
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
@@ -236,7 +235,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(1000)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -247,7 +246,7 @@ class TestUops(unittest.TestCase):
      def test_extended_arg(self):
          "Check EXTENDED_ARG handling in superblock creation"
          ns = {}
-        exec(textwrap.dedent("""
+        exec(textwrap.dedent(f"""
              def many_vars():
                  # 260 vars, so z9 should have index 259
                  a0 = a1 = a2 = a3 = a4 = a5 = a6 = a7 = a8 = a9 = 42
@@ -275,7 +274,7 @@ class TestUops(unittest.TestCase):
                  w0 = w1 = w2 = w3 = w4 = w5 = w6 = w7 = w8 = w9 = 42
                  x0 = x1 = x2 = x3 = x4 = x5 = x6 = x7 = x8 = x9 = 42
                  y0 = y1 = y2 = y3 = y4 = y5 = y6 = y7 = y8 = y9 = 42
-                z0 = z1 = z2 = z3 = z4 = z5 = z6 = z7 = z8 = z9 = 42
+                z0 = z1 = z2 = z3 = z4 = z5 = z6 = z7 = z8 = z9 = {TIER2_THRESHOLD}
                  while z9 > 0:
                      z9 = z9 - 1
                      +z9
@@ -308,7 +307,7 @@ class TestUops(unittest.TestCase):
          opt = _testinternalcapi.new_uop_optimizer()
  
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -323,7 +322,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -338,7 +337,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(range(20))
+            testfunc(range(TIER2_THRESHOLD))
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -355,7 +354,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(range(20))
+            testfunc(range(TIER2_THRESHOLD))
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -371,7 +370,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -386,7 +385,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -406,7 +405,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -424,8 +423,8 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            total = testfunc(20)
-            self.assertEqual(total, 190)
+            total = testfunc(TIER2_THRESHOLD)
+            self.assertEqual(total, sum(range(TIER2_THRESHOLD)))
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -445,9 +444,9 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            a = list(range(20))
+            a = list(range(TIER2_THRESHOLD))
              total = testfunc(a)
-            self.assertEqual(total, 190)
+            self.assertEqual(total, sum(a))
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -467,9 +466,9 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            a = tuple(range(20))
+            a = tuple(range(TIER2_THRESHOLD))
              total = testfunc(a)
-            self.assertEqual(total, 190)
+            self.assertEqual(total, sum(a))
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -503,7 +502,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -521,7 +520,7 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -549,9 +548,9 @@ class TestUops(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            x = testfunc(10, 10)
+            x = testfunc(TIER2_THRESHOLD, TIER2_THRESHOLD)
  
-        self.assertEqual(x, sum(range(10)) * 10010)
+        self.assertEqual(x, sum(range(TIER2_THRESHOLD)) * TIER2_THRESHOLD * 1001)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
@@ -572,15 +571,13 @@ class TestUops(unittest.TestCase):
                      bits += 1
                  if i&0x10:
                      bits += 1
-                if i&0x20:
-                    bits += 1
              return bits
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            x = testfunc(20)
+            x = testfunc(TIER2_THRESHOLD * 2)
  
-        self.assertEqual(x, 40)
+        self.assertEqual(x, TIER2_THRESHOLD * 5)
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
          ops = list(iter_opnames(ex))
@@ -615,9 +612,9 @@ class TestUopsOptimization(unittest.TestCase):
                  num += 1
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertIsNotNone(ex)
-        self.assertEqual(res, 63)
+        self.assertEqual(res, (TIER2_THRESHOLD - 1) * 2 + 1)
          binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"]
          guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"]
          self.assertGreaterEqual(len(binop_count), 3)
@@ -637,11 +634,11 @@ class TestUopsOptimization(unittest.TestCase):
          opt = _testinternalcapi.new_uop_optimizer()
          res = None
          with temporary_optimizer(opt):
-            res = testfunc(32)
+            res = testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
-        self.assertEqual(res, 124)
+        self.assertEqual(res, (TIER2_THRESHOLD - 1) * 4)
          binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"]
          guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"]
          self.assertGreaterEqual(len(binop_count), 3)
@@ -661,11 +658,11 @@ class TestUopsOptimization(unittest.TestCase):
          opt = _testinternalcapi.new_uop_optimizer()
          res = None
          with temporary_optimizer(opt):
-            res = testfunc(32)
+            res = testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
-        self.assertEqual(res, 124)
+        self.assertEqual(res, (TIER2_THRESHOLD - 1) * 4)
          binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"]
          guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"]
          self.assertGreaterEqual(len(binop_count), 3)
@@ -682,7 +679,7 @@ class TestUopsOptimization(unittest.TestCase):
                  num += 1
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 64)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertIsNotNone(ex)
          binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"]
          self.assertGreaterEqual(len(binop_count), 3)
@@ -694,7 +691,7 @@ class TestUopsOptimization(unittest.TestCase):
              for i in range(n):
                  dummy(i)
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
          self.assertIn("_PUSH_FRAME", uops)
@@ -708,8 +705,8 @@ class TestUopsOptimization(unittest.TestCase):
                  x = i + i
              return x
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 62)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, (TIER2_THRESHOLD - 1) * 2)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
          self.assertNotIn("_GUARD_BOTH_INT", uops)
@@ -726,7 +723,7 @@ class TestUopsOptimization(unittest.TestCase):
                  res = x + z + a + b
              return res
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertEqual(res, 4)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -739,8 +736,8 @@ class TestUopsOptimization(unittest.TestCase):
              for _ in range(n):
                  return [i for i in range(n)]
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, list(range(32)))
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, list(range(TIER2_THRESHOLD)))
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
          self.assertNotIn("_BINARY_OP_ADD_INT", uops)
@@ -797,7 +794,7 @@ class TestUopsOptimization(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          _testinternalcapi.set_optimizer(opt)
-        testfunc(64)
+        testfunc(_testinternalcapi.TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          assert ex is not None
@@ -817,8 +814,8 @@ class TestUopsOptimization(unittest.TestCase):
                  a = a + 0.25
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertAlmostEqual(res, 33.0)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertAlmostEqual(res, TIER2_THRESHOLD + 1)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
          guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"]
@@ -837,8 +834,8 @@ class TestUopsOptimization(unittest.TestCase):
                  a = a - 0.25
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertAlmostEqual(res, -31.0)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertAlmostEqual(res, -TIER2_THRESHOLD + 1)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
          guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"]
@@ -857,7 +854,7 @@ class TestUopsOptimization(unittest.TestCase):
                  a = a * 1.0
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertAlmostEqual(res, 1.0)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -877,7 +874,7 @@ class TestUopsOptimization(unittest.TestCase):
                  a + a
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertEqual(res, "")
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -895,7 +892,7 @@ class TestUopsOptimization(unittest.TestCase):
                  x = a == a
              return x
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertTrue(res)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -913,7 +910,7 @@ class TestUopsOptimization(unittest.TestCase):
                  x = a == a
              return x
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertTrue(res)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -931,7 +928,7 @@ class TestUopsOptimization(unittest.TestCase):
                      x = 1
              return x
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertEqual(res, 1)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -951,7 +948,7 @@ class TestUopsOptimization(unittest.TestCase):
                      x = 1
              return x
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertEqual(res, 1)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -971,7 +968,7 @@ class TestUopsOptimization(unittest.TestCase):
                  x = a == a
              return x
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
          self.assertTrue(res)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
@@ -989,10 +986,10 @@ class TestUopsOptimization(unittest.TestCase):
          exec(src, ns, ns)
          testfunc = ns['testfunc']
          ns['_test_global'] = 0
-        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
          self.assertIsNone(ex)
          ns['_test_global'] = 1
-        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
          self.assertIsNotNone(ex)
          uops = get_opnames(ex)
          self.assertNotIn("_GUARD_BOTH_INT", uops)
@@ -1003,10 +1000,10 @@ class TestUopsOptimization(unittest.TestCase):
          exec(src, ns, ns)
          testfunc = ns['testfunc']
          ns['_test_global'] = 0
-        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
          self.assertIsNone(ex)
          ns['_test_global'] = 3.14
-        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
          self.assertIsNone(ex)
  
      def test_combine_stack_space_checks_sequential(self):
@@ -1023,8 +1020,8 @@ class TestUopsOptimization(unittest.TestCase):
                  a += b + c + d
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 832)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * 26)
          self.assertIsNotNone(ex)
  
          uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
@@ -1050,8 +1047,8 @@ class TestUopsOptimization(unittest.TestCase):
                  a += b + c
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 224)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * 7)
          self.assertIsNotNone(ex)
  
          uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
@@ -1085,8 +1082,8 @@ class TestUopsOptimization(unittest.TestCase):
                  a += b + c + d + e
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 800)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * 25)
          self.assertIsNotNone(ex)
  
          uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
@@ -1121,8 +1118,8 @@ class TestUopsOptimization(unittest.TestCase):
                  a += b + c + d + e
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 800)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * 25)
          self.assertIsNotNone(ex)
  
          uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
@@ -1157,16 +1154,16 @@ class TestUopsOptimization(unittest.TestCase):
              z = dummy0(y)
              return dummy4(z)
          def testfunc(n):
-            a = 0;
-            for _ in range(32):
+            a = 0
+            for _ in range(n):
                  b = dummy5(1)
                  c = dummy0(1)
                  d = dummy6(1)
                  a += b + c + d
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 96)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * 3)
          self.assertIsNotNone(ex)
  
          uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
@@ -1225,8 +1222,8 @@ class TestUopsOptimization(unittest.TestCase):
                  b += dummy15(7)
              return b
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 32 * (repetitions + 9))
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * (repetitions + 9))
          self.assertIsNotNone(ex)
  
          uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
@@ -1259,8 +1256,13 @@ class TestUopsOptimization(unittest.TestCase):
                  a += dummy15(n)
              return a
  
-        res, ex = self._run_with_optimizer(testfunc, 32)
-        self.assertEqual(res, 42 * 32)
+        recursion_limit = sys.getrecursionlimit()
+        try:
+            sys.setrecursionlimit(TIER2_THRESHOLD + recursion_limit)
+            res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        finally:
+            sys.setrecursionlimit(recursion_limit)
+        self.assertEqual(res, TIER2_THRESHOLD * 42)
          self.assertIsNotNone(ex)
  
          uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
@@ -1307,8 +1309,8 @@ class TestUopsOptimization(unittest.TestCase):
              for i in range(n):
                  gen()
              return i
-        res, ex = self._run_with_optimizer(testfunc, 20)
-        self.assertEqual(res, 19)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD - 1)
          self.assertIsNotNone(ex)
          self.assertIn("_RETURN_GENERATOR", get_opnames(ex))
  
@@ -1322,8 +1324,8 @@ class TestUopsOptimization(unittest.TestCase):
              for i in g:
                  s += i
              return s
-        res, ex = self._run_with_optimizer(testfunc, 20)
-        self.assertEqual(res, 190)
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, sum(range(TIER2_THRESHOLD)))
          self.assertIsNotNone(ex)
          self.assertIn("_FOR_ITER_GEN_FRAME", get_opnames(ex))
  
@@ -1342,7 +1344,7 @@ class TestUopsOptimization(unittest.TestCase):
      def test_guard_type_version_removed(self):
          def thing(a):
              x = 0
-            for _ in range(100):
+            for _ in range(TIER2_THRESHOLD):
                  x += a.attr
                  x += a.attr
              return x
@@ -1353,7 +1355,7 @@ class TestUopsOptimization(unittest.TestCase):
          res, ex = self._run_with_optimizer(thing, Foo())
          opnames = list(iter_opnames(ex))
          self.assertIsNotNone(ex)
-        self.assertEqual(res, 200)
+        self.assertEqual(res, TIER2_THRESHOLD * 2)
          guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
          self.assertEqual(guard_type_version_count, 1)
  
@@ -1367,7 +1369,7 @@ class TestUopsOptimization(unittest.TestCase):
  
          def thing(a):
              x = 0
-            for _ in range(100):
+            for _ in range(TIER2_THRESHOLD):
                  x += a.attr
                  fn()
                  x += a.attr
@@ -1379,7 +1381,7 @@ class TestUopsOptimization(unittest.TestCase):
          res, ex = self._run_with_optimizer(thing, Foo())
          opnames = list(iter_opnames(ex))
          self.assertIsNotNone(ex)
-        self.assertEqual(res, 200)
+        self.assertEqual(res, TIER2_THRESHOLD * 2)
          guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
          self.assertEqual(guard_type_version_count, 1)
  
@@ -1390,13 +1392,13 @@ class TestUopsOptimization(unittest.TestCase):
  
          def thing(a):
              x = 0
-            for i in range(100):
+            for i in range(TIER2_THRESHOLD + 100):
                  x += a.attr
-                # for the first 90 iterations we set the attribute on this dummy function which shouldn't
+                # for the first (TIER2_THRESHOLD + 90) iterations we set the attribute on this dummy function which shouldn't
                  # trigger the type watcher
-                # then after 90  it should trigger it and stop optimizing
+                # then after for the next 10 it should trigger it and stop optimizing
                  # Note that the code needs to be in this weird form so it's optimized inline without any control flow
-                setattr((Foo, Bar)[i < 90], "attr", 2)
+                setattr((Foo, Bar)[i < TIER2_THRESHOLD + 90], "attr", 2)
                  x += a.attr
              return x
  
@@ -1410,7 +1412,7 @@ class TestUopsOptimization(unittest.TestCase):
          opnames = list(iter_opnames(ex))
  
          self.assertIsNotNone(ex)
-        self.assertEqual(res, 219)
+        self.assertEqual(res, (TIER2_THRESHOLD * 2) + 219)
          guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
          self.assertEqual(guard_type_version_count, 2)
  
@@ -1449,7 +1451,7 @@ class TestUopsOptimization(unittest.TestCase):
  
          def thing(a):
              x = 0
-            for i in range(100):
+            for i in range(TIER2_THRESHOLD):
                  x += a.attr
                  x += a.attr
              return x
@@ -1458,7 +1460,7 @@ class TestUopsOptimization(unittest.TestCase):
              attr = 1
  
          res, ex = self._run_with_optimizer(thing, Foo())
-        self.assertEqual(res, 200)
+        self.assertEqual(res, TIER2_THRESHOLD * 2)
          self.assertIsNotNone(ex)
          self.assertEqual(list(iter_opnames(ex)).count("_GUARD_TYPE_VERSION"), 1)
          self.assertTrue(ex.is_valid())
@@ -1494,7 +1496,7 @@ class TestUopsOptimization(unittest.TestCase):
  
          opt = _testinternalcapi.new_uop_optimizer()
          with temporary_optimizer(opt):
-            testfunc(20)
+            testfunc(TIER2_THRESHOLD)
  
          ex = get_first_executor(testfunc)
          self.assertIsNotNone(ex)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-13-17-18-13.gh-issue-126795._JBX9e.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-13-17-18-13.gh-issue-126795._JBX9e.rst

new file mode 100644 (file)

index 0000000..1679953
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-13-17-18-13.gh-issue-126795._JBX9e.rst
@@ -0,0 +1,2 @@
+Increase the threshold for JIT code warmup. Depending on platform and workload,
+this can result in performance gains of 1-9% and memory savings of 3-5%.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c

index 35cb215418e0cdd08f812ad0219f786338a28735..5e21d96876e1d55ff04d7d618cc2d96f72e72455 100644 (file)
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2229,7 +2229,7 @@ module_exec(PyObject *module)
      }
  
      if (PyModule_Add(module, "TIER2_THRESHOLD",
-                        PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE)) < 0) {
+                        PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE + 1)) < 0) {
          return 1;
      }
author	Brandt Bucher <brandtbucher@microsoft.com>
	Mon, 18 Nov 2024 19:11:23 +0000 (11:11 -0800)
committer	GitHub <noreply@github.com>
	Mon, 18 Nov 2024 19:11:23 +0000 (11:11 -0800)
Include/internal/pycore_backoff.h		patch \| blob \| blame \| history
Lib/test/test_capi/test_opt.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2024-11-13-17-18-13.gh-issue-126795._JBX9e.rst	[new file with mode: 0644]	patch \| blob
Modules/_testinternalcapi.c		patch \| blob \| blame \| history