From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Mon, 15 Jun 2026 13:52:47 +0000 (+0200) Subject: [3.13] gh-148441: Avoid integer overflow in Expat's CharacterDataHandler (GH-148904... X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=502534ed0b85223cafb9617b4c0bbf592aee62d1;p=thirdparty%2FPython%2Fcpython.git [3.13] gh-148441: Avoid integer overflow in Expat's CharacterDataHandler (GH-148904) (#149637) * gh-148441: Avoid integer overflow in Expat's CharacterDataHandler (GH-148904) (cherry picked from commit bc1be4f6174086b4a46e3fe656552f5bb4e6e7b2) Co-authored-by: ByteFlow Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Apply suggestion from @picnixz Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --------- Co-authored-by: ByteFlow Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Serhiy Storchaka Co-authored-by: Victor Stinner --- diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index ebd8c955dc24..c1ce87609386 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -783,6 +783,20 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2, True) self.assertEqual(self.n, 4) + @support.requires_resource('cpu') + @support.requires_resource('walltime') + @support.bigmemtest(size=2**31, memuse=4, dry_run=False) + def test_large_character_data_does_not_crash(self, size): + # See https://github.com/python/cpython/issues/148441 + parser = expat.ParserCreate() + parser.buffer_text = True + parser.buffer_size = 2**31 - 1 # INT_MAX + N = 2049 * (1 << 20) - 3 # Character data greater than INT_MAX + self.assertGreater(N, parser.buffer_size) + parser.CharacterDataHandler = lambda text: None + xml_data = b"" + b"A" * N + b"" + self.assertEqual(parser.Parse(xml_data, True), 1) + class ElementDeclHandlerTest(unittest.TestCase): def test_trigger_leak(self): # Unfixed, this test would leak the memory of the so-called diff --git a/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst new file mode 100644 index 000000000000..762815270e4d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst @@ -0,0 +1,4 @@ +:mod:`xml.parsers.expat`: prevent a crash in +:meth:`~xml.parsers.expat.xmlparser.CharacterDataHandler` +when the character data size exceeds the parser's +:attr:`buffer size `. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 915d01e8bd01..871b05d0953d 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -371,7 +371,7 @@ my_CharacterDataHandler(void *userData, const XML_Char *data, int len) if (self->buffer == NULL) call_character_handler(self, data, len); else { - if ((self->buffer_used + len) > self->buffer_size) { + if (len > (self->buffer_size - self->buffer_used)) { if (flush_character_buffer(self) < 0) return; /* handler might have changed; drop the rest on the floor