]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415) (#137461)
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Wed, 6 Aug 2025 12:59:11 +0000 (14:59 +0200)
committerGitHub <noreply@github.com>
Wed, 6 Aug 2025 12:59:11 +0000 (12:59 +0000)
gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415)

Fix name of the Python encoding in Unicode errors of the code page
codec: use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8"
which are not valid Python code names.
(cherry picked from commit ce1b747ff68754635b7b12870dfc527184ee3b39)

Co-authored-by: Victor Stinner <vstinner@python.org>
Lib/test/test_codecs.py
Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst [new file with mode: 0644]
Objects/unicodeobject.c
Python/codecs.c

index e98570ba19f029ad6e8c35e673ba1c2ee62e0ec9..2d20efe74f2ea6f986f53e54124cede94ff19a2a 100644 (file)
@@ -3284,7 +3284,7 @@ class CodePageTest(unittest.TestCase):
             codecs.code_page_encode, 932, '\xff')
         self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
             codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)
-        self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
+        self.assertRaisesRegex(UnicodeDecodeError, 'cp65001',
             codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
 
     def check_decode(self, cp, tests):
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst
new file mode 100644 (file)
index 0000000..f875d4c
--- /dev/null
@@ -0,0 +1,3 @@
+Fix name of the Python encoding in Unicode errors of the code page codec:
+use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not
+valid Python code names. Patch by Victor Stinner.
index a996d8f501ac2dfd118bba66fdd86de76081a437..395430e398d50d5fd31ed53f15e0779faaf6120d 100644 (file)
@@ -7290,10 +7290,6 @@ code_page_name(UINT code_page, PyObject **obj)
     *obj = NULL;
     if (code_page == CP_ACP)
         return "mbcs";
-    if (code_page == CP_UTF7)
-        return "CP_UTF7";
-    if (code_page == CP_UTF8)
-        return "CP_UTF8";
 
     *obj = PyBytes_FromFormat("cp%u", code_page);
     if (*obj == NULL)
index 9c0a3fad314cb5cd9ff32cce1f2286e1f5f64663..d2e3f25157144a872752a9324271debe54cd6b15 100644 (file)
@@ -1087,7 +1087,7 @@ get_standard_encoding(const char *encoding, int *bytelength)
             }
         }
     }
-    else if (strcmp(encoding, "CP_UTF8") == 0) {
+    else if (strcmp(encoding, "cp65001") == 0) {
         *bytelength = 3;
         return ENC_UTF8;
     }