gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (#137415)

author Victor Stinner <vstinner@python.org>

Wed, 6 Aug 2025 12:35:27 +0000 (14:35 +0200)

committer GitHub <noreply@github.com>

Wed, 6 Aug 2025 12:35:27 +0000 (14:35 +0200)
author Victor Stinner <vstinner@python.org>
Wed, 6 Aug 2025 12:35:27 +0000 (14:35 +0200)
committer GitHub <noreply@github.com>
Wed, 6 Aug 2025 12:35:27 +0000 (14:35 +0200)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py

index d8666f7290e72e8213d6b041e0b523f24ab1f9a8..fd7769e8c275d3d6ed700846e0d2ce6ecf8b04d6 100644 (file)
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3293,7 +3293,7 @@ class CodePageTest(unittest.TestCase):
              codecs.code_page_encode, 932, '\xff')
          self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
              codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)
-        self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
+        self.assertRaisesRegex(UnicodeDecodeError, 'cp65001',
              codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
  
      def check_decode(self, cp, tests):
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst

new file mode 100644 (file)

index 0000000..f875d4c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst
@@ -0,0 +1,3 @@
+Fix name of the Python encoding in Unicode errors of the code page codec:
+use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not
+valid Python code names. Patch by Victor Stinner.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 8df7a48284dccdd98c5c737ffeab1e62574f5945..425e4681f0a4dc3e4e2c0048395aae13a6b4caf4 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7684,10 +7684,6 @@ code_page_name(UINT code_page, PyObject **obj)
      *obj = NULL;
      if (code_page == CP_ACP)
          return "mbcs";
-    if (code_page == CP_UTF7)
-        return "CP_UTF7";
-    if (code_page == CP_UTF8)
-        return "CP_UTF8";
  
      *obj = PyBytes_FromFormat("cp%u", code_page);
      if (*obj == NULL)
diff --git a/Python/codecs.c b/Python/codecs.c

index caf8d9d5f3c1887b5c9bbf2c0516dc0b6fee894b..4e9aecfe75c2c9f94731e8d718ac5323138ccc0e 100644 (file)
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -1204,7 +1204,7 @@ get_standard_encoding_impl(const char *encoding, int *bytelength)
              }
          }
      }
-    else if (strcmp(encoding, "CP_UTF8") == 0) {
+    else if (strcmp(encoding, "cp65001") == 0) {
          *bytelength = 3;
          return ENC_UTF8;
      }
author	Victor Stinner <vstinner@python.org>
	Wed, 6 Aug 2025 12:35:27 +0000 (14:35 +0200)
committer	GitHub <noreply@github.com>
	Wed, 6 Aug 2025 12:35:27 +0000 (14:35 +0200)
Lib/test/test_codecs.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst	[new file with mode: 0644]	patch \| blob
Objects/unicodeobject.c		patch \| blob \| blame \| history
Python/codecs.c		patch \| blob \| blame \| history