]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Avoid regression in the size of XML input that we will accept.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Jul 2025 20:50:41 +0000 (16:50 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Jul 2025 20:50:41 +0000 (16:50 -0400)
This mostly reverts commit 6082b3d5d, "Use xmlParseInNodeContext
not xmlParseBalancedChunkMemory".  It turns out that
xmlParseInNodeContext will reject text chunks exceeding 10MB, while
(in most libxml2 versions) xmlParseBalancedChunkMemory will not.
The bleeding-edge libxml2 bug that we needed to work around a year
ago is presumably no longer a factor, and the argument that
xmlParseBalancedChunkMemory is semi-deprecated is not enough to
justify a functionality regression.  Hence, go back to doing it
the old way.

Reported-by: Michael Paquier <michael@paquier.xyz>
Author: Michael Paquier <michael@paquier.xyz>
Co-authored-by: Erik Wienhold <ewie@ewie.name>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz
Backpatch-through: 13

src/backend/utils/adt/xml.c

index 056d811594909252cc54b1fc07078a825956cb23..28af16fe93eea226a9ab42b3bd03463fe28a0db7 100644 (file)
@@ -1730,7 +1730,7 @@ xml_doctype_in_content(const xmlChar *str)
  * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
  *
  * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
- * of parsed nodes from the xmlParseInNodeContext call will be returned
+ * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
  * to *parsed_nodes.  (It is caller's responsibility to free that.)
  *
  * Errors normally result in ereport(ERROR), but if escontext is an
@@ -1756,6 +1756,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
        PgXmlErrorContext *xmlerrcxt;
        volatile xmlParserCtxtPtr ctxt = NULL;
        volatile xmlDocPtr doc = NULL;
+       volatile int save_keep_blanks = -1;
 
        /*
         * This step looks annoyingly redundant, but we must do it to have a
@@ -1783,7 +1784,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
        PG_TRY();
        {
                bool            parse_as_document = false;
-               int                     options;
                int                     res_code;
                size_t          count = 0;
                xmlChar    *version = NULL;
@@ -1814,18 +1814,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                                parse_as_document = true;
                }
 
-               /*
-                * Select parse options.
-                *
-                * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
-                * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
-                * internal DTD are applied'.  As for external DTDs, we try to support
-                * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
-                * happen because xmlPgEntityLoader prevents it.
-                */
-               options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
-                       | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
-
                /* initialize output parameters */
                if (parsed_xmloptiontype != NULL)
                        *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@@ -1835,11 +1823,26 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 
                if (parse_as_document)
                {
+                       int                     options;
+
+                       /* set up parser context used by xmlCtxtReadDoc */
                        ctxt = xmlNewParserCtxt();
                        if (ctxt == NULL || xmlerrcxt->err_occurred)
                                xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                        "could not allocate parser context");
 
+                       /*
+                        * Select parse options.
+                        *
+                        * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
+                        * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
+                        * by internal DTD are applied'.  As for external DTDs, we try to
+                        * support them too (see SQL/XML:2008 GR 10.16.7.e), but that
+                        * doesn't really happen because xmlPgEntityLoader prevents it.
+                        */
+                       options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
+                               | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
+
                        doc = xmlCtxtReadDoc(ctxt, utf8string,
                                                                 NULL,  /* no URL */
                                                                 "UTF-8",
@@ -1861,10 +1864,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                }
                else
                {
-                       xmlNodePtr      root;
-                       xmlNodePtr      oldroot PG_USED_FOR_ASSERTS_ONLY;
-
-                       /* set up document with empty root node to be the context node */
+                       /* set up document that xmlParseBalancedChunkMemory will add to */
                        doc = xmlNewDoc(version);
                        if (doc == NULL || xmlerrcxt->err_occurred)
                                xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@@ -1877,36 +1877,23 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                                                        "could not allocate XML document");
                        doc->standalone = standalone;
 
-                       root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-                       if (root == NULL || xmlerrcxt->err_occurred)
-                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-                                                       "could not allocate xml node");
-
-                       /*
-                        * This attaches root to doc, so we need not free it separately;
-                        * and there can't yet be any old root to free.
-                        */
-                       oldroot = xmlDocSetRootElement(doc, root);
-                       Assert(oldroot == NULL);
+                       /* set parse options --- have to do this the ugly way */
+                       save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
 
                        /* allow empty content */
                        if (*(utf8string + count))
                        {
                                xmlNodePtr      node_list = NULL;
-                               xmlParserErrors res;
-
-                               res = xmlParseInNodeContext(root,
-                                                                                       (char *) utf8string + count,
-                                                                                       strlen((char *) utf8string + count),
-                                                                                       options,
-                                                                                       &node_list);
 
-                               if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
+                               res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
+                                                                                                          utf8string + count,
+                                                                                                          &node_list);
+                               if (res_code != 0 || xmlerrcxt->err_occurred)
                                {
-                                       xmlFreeNodeList(node_list);
                                        xml_errsave(escontext, xmlerrcxt,
                                                                ERRCODE_INVALID_XML_CONTENT,
                                                                "invalid XML content");
+                                       xmlFreeNodeList(node_list);
                                        goto fail;
                                }
 
@@ -1922,6 +1909,8 @@ fail:
        }
        PG_CATCH();
        {
+               if (save_keep_blanks != -1)
+                       xmlKeepBlanksDefault(save_keep_blanks);
                if (doc != NULL)
                        xmlFreeDoc(doc);
                if (ctxt != NULL)
@@ -1933,6 +1922,9 @@ fail:
        }
        PG_END_TRY();
 
+       if (save_keep_blanks != -1)
+               xmlKeepBlanksDefault(save_keep_blanks);
+
        if (ctxt != NULL)
                xmlFreeParserCtxt(ctxt);