]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-95150: Use position and exception tables for code hashing and equality (GH-95509)
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 1 Aug 2022 18:33:49 +0000 (11:33 -0700)
committerGitHub <noreply@github.com>
Mon, 1 Aug 2022 18:33:49 +0000 (11:33 -0700)
(cherry picked from commit c7e5bbaee88a71dc6e633e3cd451ed1798436382)

Co-authored-by: Brandt Bucher <brandtbucher@gmail.com>
Lib/test/test_code.py
Lib/test/test_compile.py
Lib/test/test_syntax.py
Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst [new file with mode: 0644]
Objects/codeobject.c

index fd68f6dee7915a583433469aaeab9bbfd8f0ff4a..2386cf6b59f396a06e5a922a47a29cd78b7f9a06 100644 (file)
@@ -428,6 +428,27 @@ class CodeTest(unittest.TestCase):
             self.assertIsNone(line)
             self.assertEqual(end_line, new_code.co_firstlineno + 1)
 
+    def test_code_equality(self):
+        def f():
+            try:
+                a()
+            except:
+                b()
+            else:
+                c()
+            finally:
+                d()
+        code_a = f.__code__
+        code_b = code_a.replace(co_linetable=b"")
+        code_c = code_a.replace(co_exceptiontable=b"")
+        code_d = code_b.replace(co_exceptiontable=b"")
+        self.assertNotEqual(code_a, code_b)
+        self.assertNotEqual(code_a, code_c)
+        self.assertNotEqual(code_a, code_d)
+        self.assertNotEqual(code_b, code_c)
+        self.assertNotEqual(code_b, code_d)
+        self.assertNotEqual(code_c, code_d)
+
 
 def isinterned(s):
     return s is sys.intern(('_' + s + '_')[1:-1])
index edde54aaca00c4047aed1db59dff950b676cb1fa..b4e65c656642912719f4eaf85f98d21ac0538966 100644 (file)
@@ -613,7 +613,7 @@ if 1:
             exec(code, ns)
             f1 = ns['f1']
             f2 = ns['f2']
-            self.assertIs(f1.__code__, f2.__code__)
+            self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
             self.check_constant(f1, const)
             self.assertEqual(repr(f1()), repr(const))
 
@@ -626,7 +626,7 @@ if 1:
         # Note: "lambda: ..." emits "LOAD_CONST Ellipsis",
         # whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis"
         f1, f2 = lambda: ..., lambda: ...
-        self.assertIs(f1.__code__, f2.__code__)
+        self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
         self.check_constant(f1, Ellipsis)
         self.assertEqual(repr(f1()), repr(Ellipsis))
 
@@ -641,7 +641,7 @@ if 1:
         # {0} is converted to a constant frozenset({0}) by the peephole
         # optimizer
         f1, f2 = lambda x: x in {0}, lambda x: x in {0}
-        self.assertIs(f1.__code__, f2.__code__)
+        self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
         self.check_constant(f1, frozenset({0}))
         self.assertTrue(f1(0))
 
@@ -1264,6 +1264,27 @@ f(
             self.assertIsNotNone(end_column)
             self.assertLessEqual((line, column), (end_line, end_column))
 
+    @support.cpython_only
+    def test_column_offset_deduplication(self):
+        # GH-95150: Code with different column offsets shouldn't be merged!
+        for source in [
+            "lambda: a",
+            "(a for b in c)",
+            "[a for b in c]",
+            "{a for b in c}",
+            "{a: b for c in d}",
+        ]:
+            with self.subTest(source):
+                code = compile(f"{source}, {source}", "<test>", "eval")
+                self.assertEqual(len(code.co_consts), 2)
+                self.assertIsInstance(code.co_consts[0], types.CodeType)
+                self.assertIsInstance(code.co_consts[1], types.CodeType)
+                self.assertNotEqual(code.co_consts[0], code.co_consts[1])
+                self.assertNotEqual(
+                    list(code.co_consts[0].co_positions()),
+                    list(code.co_consts[1].co_positions()),
+                )
+
 
 class TestExpressionStackSize(unittest.TestCase):
     # These tests check that the computed stack size for a code object
index b22a96b20298dd2e1731438d4563322ea40b5933..ae1066924b3cf513bd83060d5a044c8dd4c0b45d 100644 (file)
@@ -2012,7 +2012,8 @@ def fib(n):
     a, b = 0, 1
 """
         try:
-            self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
+            compile(s1, '<string>', 'exec')
+            compile(s2, '<string>', 'exec')
         except SyntaxError:
             self.fail("Indented statement over multiple lines is valid")
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst
new file mode 100644 (file)
index 0000000..c3db471
--- /dev/null
@@ -0,0 +1,3 @@
+Update code object hashing and equality to consider all debugging and
+exception handling tables. This fixes an issue where certain non-identical
+code objects could be "deduplicated" during compilation.
index 40ac19d543b1ecb219b1c7726abd9bb37ffc2861..970aa6116bfab8a43b6fdbf28509083e2dee3a58 100644 (file)
@@ -1693,6 +1693,15 @@ code_richcompare(PyObject *self, PyObject *other, int op)
     eq = PyObject_RichCompareBool(co->co_localsplusnames,
                                   cp->co_localsplusnames, Py_EQ);
     if (eq <= 0) goto unequal;
+    eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ);
+    if (eq <= 0) {
+        goto unequal;
+    }
+    eq = PyObject_RichCompareBool(co->co_exceptiontable,
+                                  cp->co_exceptiontable, Py_EQ);
+    if (eq <= 0) {
+        goto unequal;
+    }
 
     if (op == Py_EQ)
         res = Py_True;
@@ -1725,7 +1734,15 @@ code_hash(PyCodeObject *co)
     if (h2 == -1) return -1;
     h3 = PyObject_Hash(co->co_localsplusnames);
     if (h3 == -1) return -1;
-    h = h0 ^ h1 ^ h2 ^ h3 ^
+    Py_hash_t h4 = PyObject_Hash(co->co_linetable);
+    if (h4 == -1) {
+        return -1;
+    }
+    Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable);
+    if (h5 == -1) {
+        return -1;
+    }
+    h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^
         co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^
         co->co_flags;
     if (h == -1) h = -2;