]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.10] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out...
authorŁukasz Langa <lukasz@langa.pl>
Mon, 6 Nov 2023 14:46:20 +0000 (15:46 +0100)
committerGitHub <noreply@github.com>
Mon, 6 Nov 2023 14:46:20 +0000 (15:46 +0100)
(cherry picked from commit c8faa3568afd255708096f6aa8df0afa80cf7697)

Co-authored-by: Masayuki Moriyama <masayuki.moriyama@miraclelinux.com>
Lib/test/test_codecencodings_iso2022.py
Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst [new file with mode: 0644]
Modules/cjkcodecs/_codecs_iso2022.c

index 00ea1c39dd6fb66f36c1032e9aea3af9a9b47059..027dbecc6134df75a59af35022878c0c1faa6a8f 100644 (file)
@@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase):
         (b'ab\x1BNdef', 'replace', 'abdef'),
     )
 
+class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase):
+    encoding = 'iso2022_jp_3'
+    tstring = multibytecodec_support.load_teststring('iso2022_jp')
+    codectests = COMMON_CODEC_TESTS + (
+        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+        (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402'      ),
+        (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B'  ),
+        (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+        (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02'      ),
+        (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2'  ),
+        ('\u3402',       'strict', b'\x1B$(O\x2E\x23\x1B(B'),
+        ('\U0002000B',   'strict', b'\x1B$(O\x2E\x22\x1B(B'),
+        ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'),
+        ('\u4E02',       'strict', b'\x1B$(P\x21\x22\x1B(B'),
+        ('\U0002A6B2',   'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+        (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'),
+        ('ab\u4FF1def', 'replace', b'ab?def'),
+    )
+    xmlcharnametest = (
+        '\xAB\u211C\xBB = \u2329\u1234\u232A',
+        b'\x1B$(O\x29\x28\x1B(B&real;\x1B$(O\x29\x32\x1B(B = &lang;&#4660;&rang;'
+    )
+
+class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase):
+    encoding = 'iso2022_jp_2004'
+    tstring = multibytecodec_support.load_teststring('iso2022_jp')
+    codectests = COMMON_CODEC_TESTS + (
+        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+        (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402'      ),
+        (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B'  ),
+        (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+        (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02'      ),
+        (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2'  ),
+        ('\u3402',       'strict', b'\x1B$(Q\x2E\x23\x1B(B'),
+        ('\U0002000B',   'strict', b'\x1B$(Q\x2E\x22\x1B(B'),
+        ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'),
+        ('\u4E02',       'strict', b'\x1B$(P\x21\x22\x1B(B'),
+        ('\U0002A6B2',   'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+        (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'),
+        ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'),
+    )
+    xmlcharnametest = (
+        '\xAB\u211C\xBB = \u2329\u1234\u232A',
+        b'\x1B$(Q\x29\x28\x1B(B&real;\x1B$(Q\x29\x32\x1B(B = &lang;&#4660;&rang;'
+    )
+
 class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
     encoding = 'iso2022_kr'
     tstring = multibytecodec_support.load_teststring('iso2022_kr')
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst
new file mode 100644 (file)
index 0000000..268a3d3
--- /dev/null
@@ -0,0 +1 @@
+Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds
index 7394cf67e0e7dd561f4c7c99d101ce5768fcec52..6d906ecdd396c298dbf009049698e3afd0eec0fc 100644 (file)
@@ -181,8 +181,9 @@ ENCODER(iso2022)
 
         encoded = MAP_UNMAPPABLE;
         for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+            Py_UCS4 buf[2] = {c, 0};
             Py_ssize_t length = 1;
-            encoded = dsg->encoder(&c, &length);
+            encoded = dsg->encoder(buf, &length);
             if (encoded == MAP_MULTIPLE_AVAIL) {
                 /* this implementation won't work for pair
                  * of non-bmp characters. */
@@ -191,9 +192,11 @@ ENCODER(iso2022)
                         return MBERR_TOOFEW;
                     length = -1;
                 }
-                else
+                else {
+                    buf[1] = INCHAR2;
                     length = 2;
-                encoded = dsg->encoder(&c, &length);
+                }
+                encoded = dsg->encoder(buf, &length);
                 if (encoded != MAP_UNMAPPABLE) {
                     insize = length;
                     break;