]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Make our back branches compatible with libxml2 2.13.x.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 11 Jul 2024 00:15:52 +0000 (20:15 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 11 Jul 2024 00:15:52 +0000 (20:15 -0400)
This back-patches HEAD commits 066e8ac6e6082b3d5de7192486d,
and 896cd266f into supported branches.  Changes:

* Use xmlAddChildList not xmlAddChild in XMLSERIALIZE
(affects v16 and up only).  This was a flat-out coding mistake
that we got away with due to lax checking in previous versions
of xmlAddChild.

* Use xmlParseInNodeContext not xmlParseBalancedChunkMemory.
This is to dodge a bug in xmlParseBalancedChunkMemory in libxm2
releases 2.13.0-2.13.2.  While that bug is now fixed upstream and
will probably never be seen in any production-oriented distro, it is
currently a problem on some more-bleeding-edge-friendly platforms.

* Suppress "chunk is not well balanced" errors from libxml2,
unless it is the only error.  This eliminates an error-reporting
discrepancy between 2.13 and older releases.  This error is
almost always redundant with previous errors, if not flat-out
inappropriate, which is why 2.13 changed the behavior and why
nobody's likely to miss it.

Erik Wienhold and Tom Lane, per report from Frank Streitzig.

Discussion: https://postgr.es/m/trinity-b0161630-d230-4598-9ebc-7a23acdb37cb-1720186432160@3c-app-gmx-bap25
Discussion: https://postgr.es/m/trinity-361ba18b-541a-4fe7-bc63-655ae3a7d599-1720259822452@3c-app-gmx-bs01

src/backend/utils/adt/xml.c
src/test/regress/expected/xml.out
src/test/regress/expected/xml_2.out

index aae069258659eed6d6b6566025626b47757f2c58..d42ce713e6160ea1c384ecd949c3df9f3cc04d73 100644 (file)
@@ -1544,6 +1544,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        PG_TRY();
        {
                bool            parse_as_document = false;
+               int                     options;
                int                     res_code;
                size_t          count = 0;
                xmlChar    *version = NULL;
@@ -1551,11 +1552,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
 
                xmlInitParser();
 
-               ctxt = xmlNewParserCtxt();
-               if (ctxt == NULL || xmlerrcxt->err_occurred)
-                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-                                               "could not allocate parser context");
-
                /* Decide whether to parse as document or content */
                if (xmloption_arg == XMLOPTION_DOCUMENT)
                        parse_as_document = true;
@@ -1574,20 +1570,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
                                parse_as_document = true;
                }
 
+               /*
+                * Select parse options.
+                *
+                * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
+                * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
+                * internal DTD are applied'.  As for external DTDs, we try to support
+                * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
+                * happen because xmlPgEntityLoader prevents it.
+                */
+               options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
+                       | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
+
                if (parse_as_document)
                {
-                       /*
-                        * Note, that here we try to apply DTD defaults
-                        * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
-                        * 'Default values defined by internal DTD are applied'. As for
-                        * external DTDs, we try to support them too, (see SQL/XML:2008 GR
-                        * 10.16.7.e)
-                        */
+                       ctxt = xmlNewParserCtxt();
+                       if (ctxt == NULL || xmlerrcxt->err_occurred)
+                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+                                                       "could not allocate parser context");
+
                        doc = xmlCtxtReadDoc(ctxt, utf8string,
-                                                                NULL,
+                                                                NULL,  /* no URL */
                                                                 "UTF-8",
-                                                                XML_PARSE_NOENT | XML_PARSE_DTDATTR
-                                                                | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
+                                                                options);
+
                        if (doc == NULL || xmlerrcxt->err_occurred)
                        {
                                /* Use original option to decide which error code to throw */
@@ -1601,17 +1607,36 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
                }
                else
                {
+                       xmlNodePtr      root;
+
+                       /* set up document with empty root node to be the context node */
                        doc = xmlNewDoc(version);
                        Assert(doc->encoding == NULL);
                        doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
                        doc->standalone = standalone;
 
+                       root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+                       if (root == NULL || xmlerrcxt->err_occurred)
+                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+                                                       "could not allocate xml node");
+                       /* This attaches root to doc, so we need not free it separately. */
+                       xmlDocSetRootElement(doc, root);
+
                        /* allow empty content */
                        if (*(utf8string + count))
                        {
-                               res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
-                                                                                                          utf8string + count, NULL);
-                               if (res_code != 0 || xmlerrcxt->err_occurred)
+                               xmlNodePtr      node_list = NULL;
+                               xmlParserErrors res;
+
+                               res = xmlParseInNodeContext(root,
+                                                                                       (char *) utf8string + count,
+                                                                                       strlen((char *) utf8string + count),
+                                                                                       options,
+                                                                                       &node_list);
+
+                               xmlFreeNodeList(node_list);
+
+                               if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
                                        xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
                                                                "invalid XML content");
                        }
@@ -1630,7 +1655,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        }
        PG_END_TRY();
 
-       xmlFreeParserCtxt(ctxt);
+       if (ctxt != NULL)
+               xmlFreeParserCtxt(ctxt);
 
        pg_xml_done(xmlerrcxt, false);
 
@@ -1811,6 +1837,19 @@ xml_errorHandler(void *data, PgXmlErrorPtr error)
        switch (domain)
        {
                case XML_FROM_PARSER:
+
+                       /*
+                        * XML_ERR_NOT_WELL_BALANCED is typically reported after some
+                        * other, more on-point error.  Furthermore, libxml2 2.13 reports
+                        * it under a completely different set of rules than prior
+                        * versions.  To avoid cross-version behavioral differences,
+                        * suppress it so long as we already logged some error.
+                        */
+                       if (error->code == XML_ERR_NOT_WELL_BALANCED &&
+                               xmlerrcxt->err_occurred)
+                               return;
+                       /* fall through */
+
                case XML_FROM_NONE:
                case XML_FROM_MEMORY:
                case XML_FROM_IO:
index 55ac49be261c3b6080ffc45b08b0b5f54e38a1f5..5b6ae62c044744b499afe6ab8bbb03f648cc30dc 100644 (file)
@@ -223,17 +223,11 @@ ERROR:  invalid XML content
 DETAIL:  line 1: xmlParseEntityRef: no name
 <invalidentity>&</invalidentity>
                 ^
-line 1: chunk is not well balanced
-<invalidentity>&</invalidentity>
-                                ^
 SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
 ERROR:  invalid XML content
 DETAIL:  line 1: Entity 'idontexist' not defined
 <undefinedentity>&idontexist;</undefinedentity>
                              ^
-line 1: chunk is not well balanced
-<undefinedentity>&idontexist;</undefinedentity>
-                                               ^
 SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
          xmlparse          
 ---------------------------
@@ -252,9 +246,6 @@ DETAIL:  line 1: Entity 'idontexist' not defined
 <twoerrors>&idontexist;</unbalanced>
                        ^
 line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
-<twoerrors>&idontexist;</unbalanced>
-                                    ^
-line 1: chunk is not well balanced
 <twoerrors>&idontexist;</unbalanced>
                                     ^
 SELECT xmlparse(content '<nosuchprefix:tag/>');
index 493c6186e161318ffc9697ddf75967ef71be0694..6638458ba2ddc01fb8cbe1cce092dd9784a54150 100644 (file)
@@ -219,13 +219,11 @@ ERROR:  invalid XML content
 DETAIL:  line 1: xmlParseEntityRef: no name
 <invalidentity>&</invalidentity>
                 ^
-line 1: chunk is not well balanced
 SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
 ERROR:  invalid XML content
 DETAIL:  line 1: Entity 'idontexist' not defined
 <undefinedentity>&idontexist;</undefinedentity>
                              ^
-line 1: chunk is not well balanced
 SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
          xmlparse          
 ---------------------------
@@ -244,7 +242,6 @@ DETAIL:  line 1: Entity 'idontexist' not defined
 <twoerrors>&idontexist;</unbalanced>
                        ^
 line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
-line 1: chunk is not well balanced
 SELECT xmlparse(content '<nosuchprefix:tag/>');
       xmlparse       
 ---------------------