]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-101828: Fix `jisx0213` codecs removing null characters (gh-139340) (gh...
authorStan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Tue, 14 Oct 2025 14:48:29 +0000 (15:48 +0100)
committerGitHub <noreply@github.com>
Tue, 14 Oct 2025 14:48:29 +0000 (14:48 +0000)
* [3.13] gh-101828: Fix `jisx0213` codecs removing null characters (gh-139340)
(cherry picked from commit 87eadce3e0309d80a95e85d70a00028b5dca9907)

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
* Accidentally removed line

Lib/test/multibytecodec_support.py
Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst [new file with mode: 0644]
Modules/cjkcodecs/_codecs_iso2022.c
Modules/cjkcodecs/_codecs_jp.c

index dbf0cc428e3ff63fce94d32d2ad14a0e9a81e589..6b4c57d0b4bad7a0066987818f81f8a448f20259 100644 (file)
@@ -282,6 +282,23 @@ class TestBase:
         with self.assertRaises(AttributeError):
             del e.errors
 
+    def test_null_terminator(self):
+        # see gh-101828
+        text = "フルーツ"
+        try:
+            text.encode(self.encoding)
+        except UnicodeEncodeError:
+            text = "Python is cool"
+        encode_w_null = (text + "\0").encode(self.encoding)
+        encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
+        self.assertTrue(encode_w_null.endswith(b'\x00'))
+        self.assertEqual(encode_w_null, encode_plus_null)
+
+        encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
+        encode_plus_null_2 = encode_plus_null + encode_plus_null
+        self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
+        self.assertEqual(encode_w_null_2, encode_plus_null_2)
+
 
 class TestBase_Mapping(unittest.TestCase):
     pass_enctest = []
diff --git a/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
new file mode 100644 (file)
index 0000000..1d10018
--- /dev/null
@@ -0,0 +1,3 @@
+Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
+``'euc_jis_2004'`` codecs truncating null chars
+as they were treated as part of multi-character sequences.
index e8835ad0909633df124da6d85f777cdb24c27346..bdbaca2c42189bff0707f0259c066f0c625deb15 100644 (file)
@@ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
         return coded;
 
     case 2: /* second character of unicode pair */
-        coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
-                                jisx0213_pair_encmap, JISX0213_ENCPAIRS);
-        if (coded != DBCINV)
-            return coded;
+        if (data[1] != 0) { /* Don't consume null char as part of pair */
+            coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
+                                    jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+            if (coded != DBCINV) {
+                return coded;
+            }
+        }
         /* fall through */
 
     case -1: /* flush unterminated */
index f7127487aa5f5944e84564daed24d399ddee213e..cd77888d5514b82bc6169a9d964803e4bc88bda3 100644 (file)
@@ -192,8 +192,11 @@ ENCODER(euc_jis_2004)
                                 JISX0213_ENCPAIRS);
                             if (code == DBCINV)
                                 return 1;
-                        } else
+                        }
+                        else if (c2 != 0) {
+                            /* Don't consume null char as part of pair */
                             insize = 2;
+                        }
                     }
                 }
             }
@@ -611,8 +614,10 @@ ENCODER(shift_jis_2004)
                             if (code == DBCINV)
                                 return 1;
                             }
-                            else
+                            else if (ch2 != 0) {
+                                /* Don't consume null char as part of pair */
                                 insize = 2;
+                            }
                         }
                     }
                 }