]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-47084: Clear Unicode cached representations on finalization (GH-32032)
authorJeremy Kloth <jeremy.kloth@gmail.com>
Tue, 22 Mar 2022 12:53:51 +0000 (06:53 -0600)
committerGitHub <noreply@github.com>
Tue, 22 Mar 2022 12:53:51 +0000 (13:53 +0100)
Include/internal/pycore_unicodeobject.h
Lib/__hello__.py
Lib/test/test_embed.py
Objects/unicodeobject.c
Tools/scripts/deepfreeze.py

index 4394ce939b5673353ebaa80c4a9e30e3fad1ed25..c7f06051a622fca94487eb47ced91bb0a3fa2ffb 100644 (file)
@@ -18,6 +18,7 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
 extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
 extern void _PyUnicode_Fini(PyInterpreterState *);
 extern void _PyUnicode_FiniTypes(PyInterpreterState *);
+extern void _PyStaticUnicode_Dealloc(PyObject *);
 
 
 /* other API */
index d37bd2766ac1c6c50660afff724338ab696afbbd..c09d6a4f52332e958259618fdd68d8aeb7391b21 100644 (file)
@@ -1,5 +1,14 @@
 initialized = True
 
+class TestFrozenUtf8_1:
+    """\u00b6"""
+
+class TestFrozenUtf8_2:
+    """\u03c0"""
+
+class TestFrozenUtf8_4:
+    """\U0001f600"""
+
 def main():
     print("Hello world!")
 
index 80b9674c1c2582f092a7ca8c453e4e741e2f2843..f0c88de68e89eb16d6586f2a1a00bef27fc007c3 100644 (file)
@@ -1645,24 +1645,29 @@ class MiscTests(EmbeddingTestsMixin, unittest.TestCase):
                          '-X showrefcount requires a Python debug build')
     def test_no_memleak(self):
         # bpo-1635741: Python must release all memory at exit
-        cmd = [sys.executable, "-I", "-X", "showrefcount", "-c", "pass"]
-        proc = subprocess.run(cmd,
-                              stdout=subprocess.PIPE,
-                              stderr=subprocess.STDOUT,
-                              text=True)
-        self.assertEqual(proc.returncode, 0)
-        out = proc.stdout.rstrip()
-        match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
-        if not match:
-            self.fail(f"unexpected output: {out!a}")
-        refs = int(match.group(1))
-        blocks = int(match.group(2))
-        self.assertEqual(refs, 0, out)
-        if not MS_WINDOWS:
-            self.assertEqual(blocks, 0, out)
-        else:
-            # bpo-46857: on Windows, Python still leaks 1 memory block at exit
-            self.assertIn(blocks, (0, 1), out)
+        tests = (
+            ('off', 'pass'),
+            ('on', 'pass'),
+            ('off', 'import __hello__'),
+            ('on', 'import __hello__'),
+        )
+        for flag, stmt in tests:
+            xopt = f"frozen_modules={flag}"
+            cmd = [sys.executable, "-I", "-X", "showrefcount", "-X", xopt, "-c", stmt]
+            proc = subprocess.run(cmd,
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.STDOUT,
+                                  text=True)
+            self.assertEqual(proc.returncode, 0)
+            out = proc.stdout.rstrip()
+            match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
+            if not match:
+                self.fail(f"unexpected output: {out!a}")
+            refs = int(match.group(1))
+            blocks = int(match.group(2))
+            with self.subTest(frozen_modules=flag, stmt=stmt):
+                self.assertEqual(refs, 0, out)
+                self.assertEqual(blocks, 0, out)
 
 
 class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase):
index 5dfe6e1e93f9f7396f14f22779eaec02f1a97fa4..ce3ebce1ff72d0df64a7bc53875a29650f98c7f8 100644 (file)
@@ -16057,6 +16057,35 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
 }
 
 
+static void unicode_static_dealloc(PyObject *op)
+{
+    PyASCIIObject* ascii = (PyASCIIObject*)op;
+
+    assert(ascii->state.compact);
+
+    if (ascii->state.ascii) {
+        if (ascii->wstr) {
+            PyObject_Free(ascii->wstr);
+            ascii->wstr = NULL;
+        }
+    }
+    else {
+        PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
+        void* data = (void*)(compact + 1);
+        if (ascii->wstr && ascii->wstr != data) {
+            PyObject_Free(ascii->wstr);
+            ascii->wstr = NULL;
+            compact->wstr_length = 0;
+        }
+        if (compact->utf8) {
+            PyObject_Free(compact->utf8);
+            compact->utf8 = NULL;
+            compact->utf8_length = 0;
+        }
+    }
+}
+
+
 void
 _PyUnicode_Fini(PyInterpreterState *interp)
 {
@@ -16070,6 +16099,21 @@ _PyUnicode_Fini(PyInterpreterState *interp)
     _PyUnicode_FiniEncodings(&state->fs_codec);
 
     unicode_clear_identifiers(state);
+
+    // Clear the single character singletons
+    for (int i = 0; i < 128; i++) {
+        unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
+    }
+    for (int i = 0; i < 128; i++) {
+        unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
+    }
+}
+
+
+void
+_PyStaticUnicode_Dealloc(PyObject *op)
+{
+    unicode_static_dealloc(op);
 }
 
 
index d208258dbc54c48a466f83c393a3837a891910ac..1831c15784af7715e0f9a73b0336a14bb5e32d32 100644 (file)
@@ -185,6 +185,7 @@ class Printer:
                 else:
                     self.write("PyCompactUnicodeObject _compact;")
                 self.write(f"{datatype} _data[{len(s)+1}];")
+        self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
         with self.block(f"{name} =", ";"):
             if ascii:
                 with self.block("._ascii =", ","):