gh-93575: Use correct way to calculate PyUnicode struct sizes (GH-93602)

author Christian Heimes <christian@python.org>

Wed, 8 Jun 2022 18:18:08 +0000 (20:18 +0200)

committer GitHub <noreply@github.com>

Wed, 8 Jun 2022 18:18:08 +0000 (20:18 +0200)
author Christian Heimes <christian@python.org>
Wed, 8 Jun 2022 18:18:08 +0000 (20:18 +0200)
committer GitHub <noreply@github.com>
Wed, 8 Jun 2022 18:18:08 +0000 (20:18 +0200)
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py

index 94a09ff549331a2e8158c17d290c687da70996ca..1dc10d8b0a39ac3df4b7b91016565a2d073a2c63 100644 (file)
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -1539,6 +1539,7 @@ class SizeofTest(unittest.TestCase):
          samples = ['1'*100, '\xff'*50,
                     '\u0100'*40, '\uffff'*100,
                     '\U00010000'*30, '\U0010ffff'*100]
+        # also update field definitions in test_unicode.test_raiseMemError
          asciifields = "nnb"
          compactfields = asciifields + "nP"
          unicodefields = compactfields + "P"
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 64abc0c761b3c8c85e339bbae21f741b084e4031..9765ed97a60a446730da076c725f8596908e69fb 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2370,15 +2370,19 @@ class UnicodeTest(string_tests.CommonTest,
          self.assertIs(s.expandtabs(), s)
  
      def test_raiseMemError(self):
-        null_byte = 1
-        ascii_struct_size = sys.getsizeof("a") - len("a") - null_byte
-        compact_struct_size = sys.getsizeof("\xff") - len("\xff") - null_byte
+        asciifields = "nnb"
+        compactfields = asciifields + "nP"
+        ascii_struct_size = support.calcobjsize(asciifields)
+        compact_struct_size = support.calcobjsize(compactfields)
  
          for char in ('a', '\xe9', '\u20ac', '\U0010ffff'):
              code = ord(char)
-            if code < 0x100:
+            if code < 0x80:
                  char_size = 1  # sizeof(Py_UCS1)
                  struct_size = ascii_struct_size
+            elif code < 0x100:
+                char_size = 1  # sizeof(Py_UCS1)
+                struct_size = compact_struct_size
              elif code < 0x10000:
                  char_size = 2  # sizeof(Py_UCS2)
                  struct_size = compact_struct_size
@@ -2390,7 +2394,16 @@ class UnicodeTest(string_tests.CommonTest,
              # be allocatable, given enough memory.
              maxlen = ((sys.maxsize - struct_size) // char_size)
              alloc = lambda: char * maxlen
-            with self.subTest(char=char):
+            with self.subTest(
+                char=char,
+                struct_size=struct_size,
+                char_size=char_size
+            ):
+                # self-check
+                self.assertEqual(
+                    sys.getsizeof(char * 42),
+                    struct_size + (char_size * (42 + 1))
+                )
                  self.assertRaises(MemoryError, alloc)
                  self.assertRaises(MemoryError, alloc)
  
diff --git a/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst b/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst

new file mode 100644 (file)

index 0000000..98d1532
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst
@@ -0,0 +1,4 @@
+Fix issue with test_unicode test_raiseMemError. The test case now use
+``test.support.calcobjsize`` to calculate size of PyUnicode structs.
+:func:`sys.getsizeof` may return different size when string has UTF-8
+memory.
author	Christian Heimes <christian@python.org>
	Wed, 8 Jun 2022 18:18:08 +0000 (20:18 +0200)
committer	GitHub <noreply@github.com>
	Wed, 8 Jun 2022 18:18:08 +0000 (20:18 +0200)
Lib/test/test_sys.py		patch \| blob \| blame \| history
Lib/test/test_unicode.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst	[new file with mode: 0644]	patch \| blob