]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds...
authorMasayuki Moriyama <masayuki.moriyama@miraclelinux.com>
Mon, 6 Nov 2023 10:59:22 +0000 (19:59 +0900)
committerGitHub <noreply@github.com>
Mon, 6 Nov 2023 10:59:22 +0000 (19:59 +0900)
Lib/test/test_codecencodings_iso2022.py
Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst [new file with mode: 0644]
Modules/cjkcodecs/_codecs_iso2022.c

index 00ea1c39dd6fb66f36c1032e9aea3af9a9b47059..027dbecc6134df75a59af35022878c0c1faa6a8f 100644 (file)
@@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase):
         (b'ab\x1BNdef', 'replace', 'abdef'),
     )
 
+class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase):
+    encoding = 'iso2022_jp_3'
+    tstring = multibytecodec_support.load_teststring('iso2022_jp')
+    codectests = COMMON_CODEC_TESTS + (
+        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+        (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402'      ),
+        (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B'  ),
+        (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+        (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02'      ),
+        (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2'  ),
+        ('\u3402',       'strict', b'\x1B$(O\x2E\x23\x1B(B'),
+        ('\U0002000B',   'strict', b'\x1B$(O\x2E\x22\x1B(B'),
+        ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'),
+        ('\u4E02',       'strict', b'\x1B$(P\x21\x22\x1B(B'),
+        ('\U0002A6B2',   'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+        (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'),
+        ('ab\u4FF1def', 'replace', b'ab?def'),
+    )
+    xmlcharnametest = (
+        '\xAB\u211C\xBB = \u2329\u1234\u232A',
+        b'\x1B$(O\x29\x28\x1B(B&real;\x1B$(O\x29\x32\x1B(B = &lang;&#4660;&rang;'
+    )
+
+class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase):
+    encoding = 'iso2022_jp_2004'
+    tstring = multibytecodec_support.load_teststring('iso2022_jp')
+    codectests = COMMON_CODEC_TESTS + (
+        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+        (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402'      ),
+        (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B'  ),
+        (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+        (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02'      ),
+        (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2'  ),
+        ('\u3402',       'strict', b'\x1B$(Q\x2E\x23\x1B(B'),
+        ('\U0002000B',   'strict', b'\x1B$(Q\x2E\x22\x1B(B'),
+        ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'),
+        ('\u4E02',       'strict', b'\x1B$(P\x21\x22\x1B(B'),
+        ('\U0002A6B2',   'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+        (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'),
+        ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'),
+    )
+    xmlcharnametest = (
+        '\xAB\u211C\xBB = \u2329\u1234\u232A',
+        b'\x1B$(Q\x29\x28\x1B(B&real;\x1B$(Q\x29\x32\x1B(B = &lang;&#4660;&rang;'
+    )
+
 class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
     encoding = 'iso2022_kr'
     tstring = multibytecodec_support.load_teststring('iso2022_kr')
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst
new file mode 100644 (file)
index 0000000..268a3d3
--- /dev/null
@@ -0,0 +1 @@
+Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds
index 86bb73b982a5519cbebada08e9c589647c09c986..e8835ad0909633df124da6d85f777cdb24c27346 100644 (file)
@@ -207,8 +207,9 @@ ENCODER(iso2022)
 
         encoded = MAP_UNMAPPABLE;
         for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+            Py_UCS4 buf[2] = {c, 0};
             Py_ssize_t length = 1;
-            encoded = dsg->encoder(codec, &c, &length);
+            encoded = dsg->encoder(codec, buf, &length);
             if (encoded == MAP_MULTIPLE_AVAIL) {
                 /* this implementation won't work for pair
                  * of non-bmp characters. */
@@ -217,9 +218,11 @@ ENCODER(iso2022)
                         return MBERR_TOOFEW;
                     length = -1;
                 }
-                else
+                else {
+                    buf[1] = INCHAR2;
                     length = 2;
-                encoded = dsg->encoder(codec, &c, &length);
+                }
+                encoded = dsg->encoder(codec, buf, &length);
                 if (encoded != MAP_UNMAPPABLE) {
                     insize = length;
                     break;