]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-150560: Fix crash in XML parser on invalid XML with multi-byte encoding (GH-150568)
authorSerhiy Storchaka <storchaka@gmail.com>
Fri, 29 May 2026 21:23:32 +0000 (00:23 +0300)
committerGitHub <noreply@github.com>
Fri, 29 May 2026 21:23:32 +0000 (00:23 +0300)
Lib/test/test_pyexpat.py
Lib/test/test_xml_etree.py
Modules/pyexpat.c

index 3f2c5f7021018de6870a6495e79da6ccf1786293..060a509c1bd1c7ac83fccf3979edad94409780de 100644 (file)
@@ -426,6 +426,16 @@ class ParseTest(unittest.TestCase):
         with self.assertRaises(LookupError):
             parser.Parse(data, True)
 
+    @support.subTests('sample,exception', [
+        (b'<x> \xa1</x>', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1</x', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1', expat.ExpatError),
+    ])
+    def test_multibyte_encoding_errors(self, sample, exception):
+        parser = expat.ParserCreate()
+        data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+        with self.assertRaises(exception):
+            parser.Parse(data, True)
 
 class NamespaceSeparatorTest(unittest.TestCase):
     def test_legal(self):
index 89aff568a1b4ef92119b24adc999ddc981c8ac43..acec4ec2ca257c41704abc7594ce02a131df0427 100644 (file)
@@ -1064,6 +1064,17 @@ class ElementTreeTest(unittest.TestCase):
         self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
         self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
 
+    @support.subTests('sample,exception', [
+        (b'<x> \xa1</x>', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1</x', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1', None), # ET.ParseError
+    ])
+    def test_multibyte_encoding_errors(self, sample, exception):
+        exception = exception or ET.ParseError
+        data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+        with self.assertRaises(exception):
+            ET.XML(data)
+
     def test_methods(self):
         # Test serialization methods.
 
index aef6ebad9ce578eb9f6727517aca74659eacd468..53d42ad50e37b96cedc91e1668a2673b944a59fe 100644 (file)
@@ -1473,6 +1473,9 @@ pyexpat_encoding_create(const char *name, PyObject *mapping)
 static int
 pyexpat_encoding_convert(void *data, const char *s)
 {
+    if (PyErr_Occurred()) {
+        return -1;
+    }
     pyexpat_encoding_info *info = (pyexpat_encoding_info *)data;
     int i = (unsigned char)s[0];
     assert(info->map[i] < -1);