]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-126004: Remove redundant safeguards for codecs handlers tests (#127680)
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>
Thu, 23 Jan 2025 15:06:16 +0000 (16:06 +0100)
committerGitHub <noreply@github.com>
Thu, 23 Jan 2025 15:06:16 +0000 (16:06 +0100)
We remove the safeguards that were added in `Lib/test/test_capi/test_codecs.py`
since they are now redundant (see 32e07fd377f81cbeb8c108fc791a3e7d631319b6
for additional context).

Indeed, the codecs handlers now correctly handle the `start` and `end` positions
of `UnicodeError` objects and thus should not crash.

Lib/test/test_capi/test_codecs.py

index e368f70c196abf15d9c0e1e373a78b617de21d70..a0355c7a388c57e6e794324fbacc8ddedefc18a6 100644 (file)
@@ -839,38 +839,33 @@ class CAPICodecErrors(unittest.TestCase):
 
     def test_codec_replace_errors_handler(self):
         handler = _testcapi.codec_replace_errors
-        self.do_test_codec_errors_handler(handler, self.all_unicode_errors,
-                                          safe=True)
+        self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
 
     def test_codec_xmlcharrefreplace_errors_handler(self):
         handler = _testcapi.codec_xmlcharrefreplace_errors
-        self.do_test_codec_errors_handler(handler, self.unicode_encode_errors,
-                                          safe=True)
+        self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
 
     def test_codec_backslashreplace_errors_handler(self):
         handler = _testcapi.codec_backslashreplace_errors
-        self.do_test_codec_errors_handler(handler, self.all_unicode_errors,
-                                          safe=True)
+        self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
 
     def test_codec_namereplace_errors_handler(self):
         handler = _testlimitedcapi.codec_namereplace_errors
         self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
 
-    def do_test_codec_errors_handler(self, handler, exceptions, *, safe=False):
-        at_least_one = False
+    def do_test_codec_errors_handler(self, handler, exceptions):
+        self.assertNotEqual(len(exceptions), 0)
         for exc in exceptions:
-            # See https://github.com/python/cpython/issues/123378 and related
-            # discussion and issues for details.
-            if not safe and self._exception_may_crash(exc):
-                continue
-
-            at_least_one = True
             with self.subTest(handler=handler, exc=exc):
                 # test that the handler does not crash
-                self.assertIsInstance(handler(exc), tuple)
-
-        if exceptions:
-            self.assertTrue(at_least_one, "all exceptions are crashing")
+                res = handler(exc)
+                self.assertIsInstance(res, tuple)
+                self.assertEqual(len(res), 2)
+                replacement, continue_from = res
+                self.assertIsInstance(replacement, str)
+                self.assertIsInstance(continue_from, int)
+                self.assertGreaterEqual(continue_from, 0)
+                self.assertLessEqual(continue_from, len(exc.object))
 
         for bad_exc in (
             self.bad_unicode_errors
@@ -879,30 +874,6 @@ class CAPICodecErrors(unittest.TestCase):
             with self.subTest('bad type', handler=handler, exc=bad_exc):
                 self.assertRaises(TypeError, handler, bad_exc)
 
-    @classmethod
-    def _exception_may_crash(cls, exc):
-        """Indicate whether a Unicode exception might currently crash
-        the interpreter when used by a built-in codecs error handler.
-
-        Until gh-123378 is fixed, we skip the tests for these exceptions.
-
-        This should only be used by "do_test_codec_errors_handler".
-        """
-        message, start, end = exc.object, exc.start, exc.end
-        match exc:
-            case UnicodeEncodeError():
-                return end < start or (end - start) >= len(message)
-            case UnicodeDecodeError():
-                # The case "end - start >= len(message)" does not crash.
-                return end < start
-            case UnicodeTranslateError():
-                # Test "end <= start" because PyCodec_ReplaceErrors checks
-                # the Unicode kind of a 0-length string which by convention
-                # is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
-                # the handler currently expects.
-                return end <= start or (end - start) >= len(message)
-        return False
-
 
 if __name__ == "__main__":
     unittest.main()