]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Make our back branches compatible with libxml2 2.13.x.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 11 Jul 2024 00:15:52 +0000 (20:15 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 11 Jul 2024 00:15:52 +0000 (20:15 -0400)
This back-patches HEAD commits 066e8ac6e6082b3d5de7192486d,
and 896cd266f into supported branches.  Changes:

* Use xmlAddChildList not xmlAddChild in XMLSERIALIZE
(affects v16 and up only).  This was a flat-out coding mistake
that we got away with due to lax checking in previous versions
of xmlAddChild.

* Use xmlParseInNodeContext not xmlParseBalancedChunkMemory.
This is to dodge a bug in xmlParseBalancedChunkMemory in libxm2
releases 2.13.0-2.13.2.  While that bug is now fixed upstream and
will probably never be seen in any production-oriented distro, it is
currently a problem on some more-bleeding-edge-friendly platforms.

* Suppress "chunk is not well balanced" errors from libxml2,
unless it is the only error.  This eliminates an error-reporting
discrepancy between 2.13 and older releases.  This error is
almost always redundant with previous errors, if not flat-out
inappropriate, which is why 2.13 changed the behavior and why
nobody's likely to miss it.

Erik Wienhold and Tom Lane, per report from Frank Streitzig.

Discussion: https://postgr.es/m/trinity-b0161630-d230-4598-9ebc-7a23acdb37cb-1720186432160@3c-app-gmx-bap25
Discussion: https://postgr.es/m/trinity-361ba18b-541a-4fe7-bc63-655ae3a7d599-1720259822452@3c-app-gmx-bs01

src/backend/utils/adt/xml.c
src/test/regress/expected/xml.out
src/test/regress/expected/xml_2.out

index 3e4ca874d81ade71d5034040d2a1a950c9648321..447e72b21eb4454dc52f043f03a3ede4393d0b8e 100644 (file)
@@ -757,7 +757,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 
                        /* This attaches root to doc, so we need not free it separately. */
                        xmlDocSetRootElement(doc, root);
-                       xmlAddChild(root, content_nodes);
+                       xmlAddChildList(root, content_nodes);
 
                        /*
                         * We use this node to insert newlines in the dump.  Note: in at
@@ -1696,9 +1696,9 @@ xml_doctype_in_content(const xmlChar *str)
  * XmlOptionType actually used to parse the input (typically the same as
  * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
  *
- * If parsed_nodes isn't NULL and the input is not an XML document, the list
- * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
- * to *parsed_nodes.
+ * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
+ * of parsed nodes from the xmlParseInNodeContext call will be returned
+ * to *parsed_nodes.  (It is caller's responsibility to free that.)
  *
  * Errors normally result in ereport(ERROR), but if escontext is an
  * ErrorSaveContext, then "safe" errors are reported there instead, and the
@@ -1750,6 +1750,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
        PG_TRY();
        {
                bool            parse_as_document = false;
+               int                     options;
                int                     res_code;
                size_t          count = 0;
                xmlChar    *version = NULL;
@@ -1758,11 +1759,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                /* Any errors here are reported as hard ereport's */
                xmlInitParser();
 
-               ctxt = xmlNewParserCtxt();
-               if (ctxt == NULL || xmlerrcxt->err_occurred)
-                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-                                               "could not allocate parser context");
-
                /* Decide whether to parse as document or content */
                if (xmloption_arg == XMLOPTION_DOCUMENT)
                        parse_as_document = true;
@@ -1785,6 +1781,18 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                                parse_as_document = true;
                }
 
+               /*
+                * Select parse options.
+                *
+                * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
+                * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
+                * internal DTD are applied'.  As for external DTDs, we try to support
+                * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
+                * happen because xmlPgEntityLoader prevents it.
+                */
+               options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
+                       | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
+
                /* initialize output parameters */
                if (parsed_xmloptiontype != NULL)
                        *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@@ -1794,18 +1802,16 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 
                if (parse_as_document)
                {
-                       /*
-                        * Note, that here we try to apply DTD defaults
-                        * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
-                        * 'Default values defined by internal DTD are applied'. As for
-                        * external DTDs, we try to support them too, (see SQL/XML:2008 GR
-                        * 10.16.7.e)
-                        */
+                       ctxt = xmlNewParserCtxt();
+                       if (ctxt == NULL || xmlerrcxt->err_occurred)
+                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+                                                       "could not allocate parser context");
+
                        doc = xmlCtxtReadDoc(ctxt, utf8string,
-                                                                NULL,
+                                                                NULL,  /* no URL */
                                                                 "UTF-8",
-                                                                XML_PARSE_NOENT | XML_PARSE_DTDATTR
-                                                                | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
+                                                                options);
+
                        if (doc == NULL || xmlerrcxt->err_occurred)
                        {
                                /* Use original option to decide which error code to report */
@@ -1822,6 +1828,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                }
                else
                {
+                       xmlNodePtr      root;
+
+                       /* set up document with empty root node to be the context node */
                        doc = xmlNewDoc(version);
                        if (doc == NULL || xmlerrcxt->err_occurred)
                                xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@@ -1834,19 +1843,38 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                                                        "could not allocate XML document");
                        doc->standalone = standalone;
 
+                       root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+                       if (root == NULL || xmlerrcxt->err_occurred)
+                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+                                                       "could not allocate xml node");
+                       /* This attaches root to doc, so we need not free it separately. */
+                       xmlDocSetRootElement(doc, root);
+
                        /* allow empty content */
                        if (*(utf8string + count))
                        {
-                               res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
-                                                                                                          utf8string + count,
-                                                                                                          parsed_nodes);
-                               if (res_code != 0 || xmlerrcxt->err_occurred)
+                               xmlNodePtr      node_list = NULL;
+                               xmlParserErrors res;
+
+                               res = xmlParseInNodeContext(root,
+                                                                                       (char *) utf8string + count,
+                                                                                       strlen((char *) utf8string + count),
+                                                                                       options,
+                                                                                       &node_list);
+
+                               if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
                                {
+                                       xmlFreeNodeList(node_list);
                                        xml_errsave(escontext, xmlerrcxt,
                                                                ERRCODE_INVALID_XML_CONTENT,
                                                                "invalid XML content");
                                        goto fail;
                                }
+
+                               if (parsed_nodes != NULL)
+                                       *parsed_nodes = node_list;
+                               else
+                                       xmlFreeNodeList(node_list);
                        }
                }
 
@@ -1866,7 +1894,8 @@ fail:
        }
        PG_END_TRY();
 
-       xmlFreeParserCtxt(ctxt);
+       if (ctxt != NULL)
+               xmlFreeParserCtxt(ctxt);
 
        pg_xml_done(xmlerrcxt, false);
 
@@ -2085,6 +2114,19 @@ xml_errorHandler(void *data, PgXmlErrorPtr error)
        switch (domain)
        {
                case XML_FROM_PARSER:
+
+                       /*
+                        * XML_ERR_NOT_WELL_BALANCED is typically reported after some
+                        * other, more on-point error.  Furthermore, libxml2 2.13 reports
+                        * it under a completely different set of rules than prior
+                        * versions.  To avoid cross-version behavioral differences,
+                        * suppress it so long as we already logged some error.
+                        */
+                       if (error->code == XML_ERR_NOT_WELL_BALANCED &&
+                               xmlerrcxt->err_occurred)
+                               return;
+                       /* fall through */
+
                case XML_FROM_NONE:
                case XML_FROM_MEMORY:
                case XML_FROM_IO:
index 6500cff885d10a78dad632c6807c7e2939d89a0a..93a79cda8fd4e7ad99fc3df31b1f70a8053ea9a2 100644 (file)
@@ -254,17 +254,11 @@ ERROR:  invalid XML content
 DETAIL:  line 1: xmlParseEntityRef: no name
 <invalidentity>&</invalidentity>
                 ^
-line 1: chunk is not well balanced
-<invalidentity>&</invalidentity>
-                                ^
 SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
 ERROR:  invalid XML content
 DETAIL:  line 1: Entity 'idontexist' not defined
 <undefinedentity>&idontexist;</undefinedentity>
                              ^
-line 1: chunk is not well balanced
-<undefinedentity>&idontexist;</undefinedentity>
-                                               ^
 SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
          xmlparse          
 ---------------------------
@@ -283,9 +277,6 @@ DETAIL:  line 1: Entity 'idontexist' not defined
 <twoerrors>&idontexist;</unbalanced>
                        ^
 line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
-<twoerrors>&idontexist;</unbalanced>
-                                    ^
-line 1: chunk is not well balanced
 <twoerrors>&idontexist;</unbalanced>
                                     ^
 SELECT xmlparse(content '<nosuchprefix:tag/>');
index e1d165c6c936005cbc0f724d9a175d604068b549..f956322c69384d6f67fb61b89333c733f5c71349 100644 (file)
@@ -250,13 +250,11 @@ ERROR:  invalid XML content
 DETAIL:  line 1: xmlParseEntityRef: no name
 <invalidentity>&</invalidentity>
                 ^
-line 1: chunk is not well balanced
 SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
 ERROR:  invalid XML content
 DETAIL:  line 1: Entity 'idontexist' not defined
 <undefinedentity>&idontexist;</undefinedentity>
                              ^
-line 1: chunk is not well balanced
 SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
          xmlparse          
 ---------------------------
@@ -275,7 +273,6 @@ DETAIL:  line 1: Entity 'idontexist' not defined
 <twoerrors>&idontexist;</unbalanced>
                        ^
 line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
-line 1: chunk is not well balanced
 SELECT xmlparse(content '<nosuchprefix:tag/>');
       xmlparse       
 ---------------------