]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-145202: Fix crash in unicodedata's GraphemeBreakIterator and Segment (GH-145216)
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 26 Feb 2026 09:30:08 +0000 (11:30 +0200)
committerGitHub <noreply@github.com>
Thu, 26 Feb 2026 09:30:08 +0000 (11:30 +0200)
Remove the tp_clear slots and make Segment members read-only.

Also add tests for reference loops involving GraphemeBreakIterator
and Segment.

Lib/test/test_unicodedata.py
Modules/unicodedata.c

index 30a26751d3ac54e997ee2eef2d71bac9a24c70b6..8ecb0df2f8e5ddce5999efd44d984a5287653f97 100644 (file)
@@ -12,7 +12,9 @@ from http.client import HTTPException
 import sys
 import unicodedata
 import unittest
+import weakref
 from test.support import (
+    gc_collect,
     open_urlresource,
     requires_resource,
     script_helper,
@@ -1338,6 +1340,28 @@ class GraphemeBreakTest(unittest.TestCase):
                     self.assertEqual([x.start for x in result], breaks[i:-1], comment)
                     self.assertEqual([x.end for x in result], breaks[i+1:], comment)
 
+    def test_reference_loops(self):
+        # Test that reference loops involving GraphemeBreakIterator or
+        # Segment can be broken by the garbage collector.
+        class S(str):
+            pass
+
+        s = S('abc')
+        s.ref = unicodedata.iter_graphemes(s)
+        wr = weakref.ref(s)
+        del s
+        self.assertIsNotNone(wr())
+        gc_collect()
+        self.assertIsNone(wr())
+
+        s = S('abc')
+        s.ref = next(unicodedata.iter_graphemes(s))
+        wr = weakref.ref(s)
+        del s
+        self.assertIsNotNone(wr())
+        gc_collect()
+        self.assertIsNone(wr())
+
 
 if __name__ == "__main__":
     unittest.main()
index 401f64e74169440f9504186ec32b96de93d053e9..2c67c23d98ed81a5bd9633fd1f07e546f187b5e0 100644 (file)
@@ -1925,13 +1925,6 @@ Segment_traverse(PyObject *self, visitproc visit, void *arg)
     return 0;
 }
 
-static int
-Segment_clear(PyObject *self)
-{
-    Py_CLEAR(((SegmentObject *)self)->string);
-    return 0;
-}
-
 static PyObject *
 Segment_str(PyObject *self)
 {
@@ -1947,9 +1940,9 @@ Segment_repr(PyObject *self)
 }
 
 static PyMemberDef Segment_members[] = {
-    {"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), 0,
+    {"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), Py_READONLY,
         PyDoc_STR("grapheme start")},
-    {"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), 0,
+    {"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), Py_READONLY,
         PyDoc_STR("grapheme end")},
     {NULL}  /* Sentinel */
 };
@@ -1957,7 +1950,6 @@ static PyMemberDef Segment_members[] = {
 static PyType_Slot Segment_slots[] = {
     {Py_tp_dealloc, Segment_dealloc},
     {Py_tp_traverse, Segment_traverse},
-    {Py_tp_clear, Segment_clear},
     {Py_tp_str, Segment_str},
     {Py_tp_repr, Segment_repr},
     {Py_tp_members, Segment_members},
@@ -2001,13 +1993,6 @@ GBI_traverse(PyObject *self, visitproc visit, void *arg)
     return 0;
 }
 
-static int
-GBI_clear(PyObject *self)
-{
-    Py_CLEAR(((GraphemeBreakIterator *)self)->iter.str);
-    return 0;
-}
-
 static PyObject *
 GBI_iternext(PyObject *self)
 {
@@ -2038,7 +2023,6 @@ static PyType_Slot GraphemeBreakIterator_slots[] = {
     {Py_tp_iter, PyObject_SelfIter},
     {Py_tp_iternext, GBI_iternext},
     {Py_tp_traverse, GBI_traverse},
-    {Py_tp_clear, GBI_clear},
     {0, 0},
 };