]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Avoid regression in the size of XML input that we will accept.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Jul 2025 20:50:42 +0000 (16:50 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Jul 2025 20:50:42 +0000 (16:50 -0400)
This mostly reverts commit 6082b3d5d, "Use xmlParseInNodeContext
not xmlParseBalancedChunkMemory".  It turns out that
xmlParseInNodeContext will reject text chunks exceeding 10MB, while
(in most libxml2 versions) xmlParseBalancedChunkMemory will not.
The bleeding-edge libxml2 bug that we needed to work around a year
ago is presumably no longer a factor, and the argument that
xmlParseBalancedChunkMemory is semi-deprecated is not enough to
justify a functionality regression.  Hence, go back to doing it
the old way.

Reported-by: Michael Paquier <michael@paquier.xyz>
Author: Michael Paquier <michael@paquier.xyz>
Co-authored-by: Erik Wienhold <ewie@ewie.name>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz
Backpatch-through: 13

src/backend/utils/adt/xml.c

index 630b1b84e1c5ba5f4ad8e8370bf98fe8473d156a..86b6425267f187367197d093a9d95c761d917c1d 100644 (file)
@@ -1529,6 +1529,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        PgXmlErrorContext *xmlerrcxt;
        volatile xmlParserCtxtPtr ctxt = NULL;
        volatile xmlDocPtr doc = NULL;
+       volatile int save_keep_blanks = -1;
 
        len = VARSIZE_ANY_EXHDR(data);  /* will be useful later */
        string = xml_text2xmlChar(data);
@@ -1545,7 +1546,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        PG_TRY();
        {
                bool            parse_as_document = false;
-               int                     options;
                int                     res_code;
                size_t          count = 0;
                xmlChar    *version = NULL;
@@ -1571,25 +1571,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
                                parse_as_document = true;
                }
 
-               /*
-                * Select parse options.
-                *
-                * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
-                * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
-                * internal DTD are applied'.  As for external DTDs, we try to support
-                * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
-                * happen because xmlPgEntityLoader prevents it.
-                */
-               options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
-                       | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
-
                if (parse_as_document)
                {
+                       int                     options;
+
+                       /* set up parser context used by xmlCtxtReadDoc */
                        ctxt = xmlNewParserCtxt();
                        if (ctxt == NULL || xmlerrcxt->err_occurred)
                                xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                        "could not allocate parser context");
 
+                       /*
+                        * Select parse options.
+                        *
+                        * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
+                        * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
+                        * by internal DTD are applied'.  As for external DTDs, we try to
+                        * support them too (see SQL/XML:2008 GR 10.16.7.e), but that
+                        * doesn't really happen because xmlPgEntityLoader prevents it.
+                        */
+                       options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
+                               | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
+
                        doc = xmlCtxtReadDoc(ctxt, utf8string,
                                                                 NULL,  /* no URL */
                                                                 "UTF-8",
@@ -1608,36 +1611,27 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
                }
                else
                {
-                       xmlNodePtr      root;
-
-                       /* set up document with empty root node to be the context node */
+                       /* set up document that xmlParseBalancedChunkMemory will add to */
                        doc = xmlNewDoc(version);
                        Assert(doc->encoding == NULL);
                        doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
                        doc->standalone = standalone;
 
-                       root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-                       if (root == NULL || xmlerrcxt->err_occurred)
-                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-                                                       "could not allocate xml node");
-                       /* This attaches root to doc, so we need not free it separately. */
-                       xmlDocSetRootElement(doc, root);
+                       /* set parse options --- have to do this the ugly way */
+                       save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
 
                        /* allow empty content */
                        if (*(utf8string + count))
                        {
                                xmlNodePtr      node_list = NULL;
-                               xmlParserErrors res;
 
-                               res = xmlParseInNodeContext(root,
-                                                                                       (char *) utf8string + count,
-                                                                                       strlen((char *) utf8string + count),
-                                                                                       options,
-                                                                                       &node_list);
+                               res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
+                                                                                                          utf8string + count,
+                                                                                                          &node_list);
 
                                xmlFreeNodeList(node_list);
 
-                               if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
+                               if (res_code != 0 || xmlerrcxt->err_occurred)
                                        xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
                                                                "invalid XML content");
                        }
@@ -1645,6 +1639,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        }
        PG_CATCH();
        {
+               if (save_keep_blanks != -1)
+                       xmlKeepBlanksDefault(save_keep_blanks);
                if (doc != NULL)
                        xmlFreeDoc(doc);
                if (ctxt != NULL)
@@ -1656,6 +1652,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        }
        PG_END_TRY();
 
+       if (save_keep_blanks != -1)
+               xmlKeepBlanksDefault(save_keep_blanks);
+
        if (ctxt != NULL)
                xmlFreeParserCtxt(ctxt);