From: Tom Lane Date: Mon, 28 Jul 2025 20:50:42 +0000 (-0400) Subject: Avoid regression in the size of XML input that we will accept. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=589d6e6408b4f2fd4e7ac6941cd3bc2c16893b47;p=thirdparty%2Fpostgresql.git Avoid regression in the size of XML input that we will accept. This mostly reverts commit 6082b3d5d, "Use xmlParseInNodeContext not xmlParseBalancedChunkMemory". It turns out that xmlParseInNodeContext will reject text chunks exceeding 10MB, while (in most libxml2 versions) xmlParseBalancedChunkMemory will not. The bleeding-edge libxml2 bug that we needed to work around a year ago is presumably no longer a factor, and the argument that xmlParseBalancedChunkMemory is semi-deprecated is not enough to justify a functionality regression. Hence, go back to doing it the old way. Reported-by: Michael Paquier Author: Michael Paquier Co-authored-by: Erik Wienhold Reviewed-by: Tom Lane Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz Backpatch-through: 13 --- diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 30a6f54cd61..8d7f084e7b5 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1529,6 +1529,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, PgXmlErrorContext *xmlerrcxt; volatile xmlParserCtxtPtr ctxt = NULL; volatile xmlDocPtr doc = NULL; + volatile int save_keep_blanks = -1; len = VARSIZE_ANY_EXHDR(data); /* will be useful later */ string = xml_text2xmlChar(data); @@ -1545,7 +1546,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, PG_TRY(); { bool parse_as_document = false; - int options; int res_code; size_t count = 0; xmlChar *version = NULL; @@ -1571,25 +1571,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, parse_as_document = true; } - /* - * Select parse options. - * - * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR) - * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by - * internal DTD are applied'. As for external DTDs, we try to support - * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really - * happen because xmlPgEntityLoader prevents it. - */ - options = XML_PARSE_NOENT | XML_PARSE_DTDATTR - | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); - if (parse_as_document) { + int options; + + /* set up parser context used by xmlCtxtReadDoc */ ctxt = xmlNewParserCtxt(); if (ctxt == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); + /* + * Select parse options. + * + * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR) + * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined + * by internal DTD are applied'. As for external DTDs, we try to + * support them too (see SQL/XML:2008 GR 10.16.7.e), but that + * doesn't really happen because xmlPgEntityLoader prevents it. + */ + options = XML_PARSE_NOENT | XML_PARSE_DTDATTR + | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); + doc = xmlCtxtReadDoc(ctxt, utf8string, NULL, /* no URL */ "UTF-8", @@ -1608,36 +1611,27 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, } else { - xmlNodePtr root; - - /* set up document with empty root node to be the context node */ + /* set up document that xmlParseBalancedChunkMemory will add to */ doc = xmlNewDoc(version); Assert(doc->encoding == NULL); doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); doc->standalone = standalone; - root = xmlNewNode(NULL, (const xmlChar *) "content-root"); - if (root == NULL || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xml node"); - /* This attaches root to doc, so we need not free it separately. */ - xmlDocSetRootElement(doc, root); + /* set parse options --- have to do this the ugly way */ + save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0); /* allow empty content */ if (*(utf8string + count)) { xmlNodePtr node_list = NULL; - xmlParserErrors res; - res = xmlParseInNodeContext(root, - (char *) utf8string + count, - strlen((char *) utf8string + count), - options, - &node_list); + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, + utf8string + count, + &node_list); xmlFreeNodeList(node_list); - if (res != XML_ERR_OK || xmlerrcxt->err_occurred) + if (res_code != 0 || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); } @@ -1645,6 +1639,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, } PG_CATCH(); { + if (save_keep_blanks != -1) + xmlKeepBlanksDefault(save_keep_blanks); if (doc != NULL) xmlFreeDoc(doc); if (ctxt != NULL) @@ -1656,6 +1652,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, } PG_END_TRY(); + if (save_keep_blanks != -1) + xmlKeepBlanksDefault(save_keep_blanks); + if (ctxt != NULL) xmlFreeParserCtxt(ctxt);