]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-106581: Fix two bugs in the code generator's copy optimization (#108380)
authorGuido van Rossum <guido@python.org>
Thu, 24 Aug 2023 19:10:51 +0000 (12:10 -0700)
committerGitHub <noreply@github.com>
Thu, 24 Aug 2023 19:10:51 +0000 (19:10 +0000)
I was comparing the last preceding poke with the *last* peek,
rather than the *first* peek.

Unfortunately this bug obscured another bug:
When the last preceding poke is UNUSED, the first peek disappears,
leaving the variable unassigned. This is how I fixed it:

- Rename CopyEffect to CopyItem.
- Change CopyItem to contain StackItems instead of StackEffects.
- Update those StackItems when adjusting the manager higher or lower.
- Assert that those StackItems' offsets are equivalent.
- Other clever things.

---------

Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com>
Python/generated_cases.c.h
Tools/cases_generator/stacking.py

index f6322df566c6500d1cdd2aabcadfff59002aeb68..1724d11231727ff61857bf979237ceb3146c1b5d 100644 (file)
                 DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
             }
             // _CHECK_STACK_SPACE
-            callable = stack_pointer[-2 - oparg];
             {
                 PyFunctionObject *func = (PyFunctionObject *)callable;
                 PyCodeObject *code = (PyCodeObject *)func->func_code;
             }
             // _INIT_CALL_PY_EXACT_ARGS
             args = stack_pointer - oparg;
-            self_or_null = stack_pointer[-1 - oparg];
-            callable = stack_pointer[-2 - oparg];
             {
                 int argcount = oparg;
                 if (self_or_null != NULL) {
index 1e117f11825938f1195d2050d98a9350598fe815..2d006ba8e5934a5720e3b00f63dc911d315d9344 100644 (file)
@@ -83,6 +83,27 @@ class StackOffset:
         terms = self.as_terms()
         return make_index(terms)
 
+    def equivalent_to(self, other: "StackOffset") -> bool:
+        if self.deep == other.deep and self.high == other.high:
+            return True
+        deep = list(self.deep)
+        for x in other.deep:
+            try:
+                deep.remove(x)
+            except ValueError:
+                return False
+        if deep:
+            return False
+        high = list(self.high)
+        for x in other.high:
+            try:
+                high.remove(x)
+            except ValueError:
+                return False
+        if high:
+            return False
+        return True
+
 
 def make_index(terms: list[tuple[str, str]]) -> str:
     # Produce an index expression from the terms honoring PEP 8,
@@ -131,9 +152,9 @@ class StackItem:
 
 
 @dataclasses.dataclass
-class CopyEffect:
-    src: StackEffect
-    dst: StackEffect
+class CopyItem:
+    src: StackItem
+    dst: StackItem
 
 
 class EffectManager:
@@ -143,7 +164,7 @@ class EffectManager:
     active_caches: list[ActiveCacheEffect]
     peeks: list[StackItem]
     pokes: list[StackItem]
-    copies: list[CopyEffect]  # See merge()
+    copies: list[CopyItem]  # See merge()
     # Track offsets from stack pointer
     min_offset: StackOffset
     final_offset: StackOffset
@@ -179,16 +200,18 @@ class EffectManager:
             while (
                 pred.pokes
                 and self.peeks
-                and pred.pokes[-1].effect == self.peeks[-1].effect
+                and pred.pokes[-1].effect == self.peeks[0].effect
             ):
-                src = pred.pokes.pop(-1).effect
-                dst = self.peeks.pop(0).effect
-                pred.final_offset.deeper(src)
-                if dst.name != UNUSED:
-                    destinations.add(dst.name)
-                    if dst.name != src.name:
-                        sources.add(src.name)
-                    self.copies.append(CopyEffect(src, dst))
+                src = pred.pokes.pop(-1)
+                dst = self.peeks.pop(0)
+                assert src.offset.equivalent_to(dst.offset), (src, dst)
+                pred.final_offset.deeper(src.effect)
+                if dst.effect.name != src.effect.name:
+                    if dst.effect.name != UNUSED:
+                        destinations.add(dst.effect.name)
+                    if src.effect.name != UNUSED:
+                        sources.add(src.effect.name)
+                self.copies.append(CopyItem(src, dst))
             # TODO: Turn this into an error (pass an Analyzer instance?)
             assert sources & destinations == set(), (
                 pred.instr.name,
@@ -202,11 +225,27 @@ class EffectManager:
             else:
                 pred = None  # Break
 
+        # Fix up patterns of copies through UNUSED,
+        # e.g. cp(a, UNUSED) + cp(UNUSED, b) -> cp(a, b).
+        if any(copy.src.effect.name == UNUSED for copy in self.copies):
+            pred = self.pred
+            while pred is not None:
+                for copy in self.copies:
+                    if copy.src.effect.name == UNUSED:
+                        for pred_copy in pred.copies:
+                            if pred_copy.dst == copy.src:
+                                copy.src = pred_copy.src
+                                break
+                pred = pred.pred
+
     def adjust_deeper(self, eff: StackEffect) -> None:
         for peek in self.peeks:
             peek.offset.deeper(eff)
         for poke in self.pokes:
             poke.offset.deeper(eff)
+        for copy in self.copies:
+            copy.src.offset.deeper(eff)
+            copy.dst.offset.deeper(eff)
         self.min_offset.deeper(eff)
         self.final_offset.deeper(eff)
 
@@ -215,6 +254,9 @@ class EffectManager:
             peek.offset.higher(eff)
         for poke in self.pokes:
             poke.offset.higher(eff)
+        for copy in self.copies:
+            copy.src.offset.higher(eff)
+            copy.dst.offset.higher(eff)
         self.min_offset.higher(eff)
         self.final_offset.higher(eff)
 
@@ -248,8 +290,8 @@ class EffectManager:
                     vars[eff.name] = eff
 
         for copy in self.copies:
-            add(copy.src)
-            add(copy.dst)
+            add(copy.src.effect)
+            add(copy.dst.effect)
         for peek in self.peeks:
             add(peek.effect)
         for poke in self.pokes:
@@ -365,8 +407,11 @@ def write_components(
             out.emit(f"// {mgr.instr.name}")
 
         for copy in mgr.copies:
-            if copy.src.name != copy.dst.name:
-                out.assign(copy.dst, copy.src)
+            copy_src_effect = copy.src.effect
+            if copy_src_effect.name != copy.dst.effect.name:
+                if copy_src_effect.name == UNUSED:
+                    copy_src_effect = copy.src.as_stack_effect()
+                out.assign(copy.dst.effect, copy_src_effect)
         for peek in mgr.peeks:
             out.assign(
                 peek.effect,