with self.assertRaises(AttributeError):
del e.errors
+ def test_null_terminator(self):
+ # see gh-101828
+ text = "γγ«γΌγ"
+ try:
+ text.encode(self.encoding)
+ except UnicodeEncodeError:
+ text = "Python is cool"
+ encode_w_null = (text + "\0").encode(self.encoding)
+ encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
+ self.assertTrue(encode_w_null.endswith(b'\x00'))
+ self.assertEqual(encode_w_null, encode_plus_null)
+
+ encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
+ encode_plus_null_2 = encode_plus_null + encode_plus_null
+ self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
+ self.assertEqual(encode_w_null_2, encode_plus_null_2)
+
class TestBase_Mapping(unittest.TestCase):
pass_enctest = []
--- /dev/null
+Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
+``'euc_jis_2004'`` codecs truncating null chars
+as they were treated as part of multi-character sequences.
return coded;
case 2: /* second character of unicode pair */
- coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
- if (coded != DBCINV)
- return coded;
+ if (data[1] != 0) { /* Don't consume null char as part of pair */
+ coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ if (coded != DBCINV) {
+ return coded;
+ }
+ }
/* fall through */
case -1: /* flush unterminated */