]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284)
authorStephen Morton <git@tungol.org>
Tue, 7 Jan 2025 10:40:41 +0000 (02:40 -0800)
committerGitHub <noreply@github.com>
Tue, 7 Jan 2025 10:40:41 +0000 (12:40 +0200)
* Allow DOMParser.parse() to correctly handle DOMInputSource instances
  that only have a systemId attribute set.
* Fix DOMEntityResolver.resolveEntity(), which was broken by the
  Python 3.0 transition.
* Add Lib/test/test_xml_dom_xmlbuilder.py with few tests.

Lib/test/test_xml_dom_xmlbuilder.py [new file with mode: 0644]
Lib/xml/dom/xmlbuilder.py
Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst [new file with mode: 0644]
Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst [new file with mode: 0644]

diff --git a/Lib/test/test_xml_dom_xmlbuilder.py b/Lib/test/test_xml_dom_xmlbuilder.py
new file mode 100644 (file)
index 0000000..5f5f2eb
--- /dev/null
@@ -0,0 +1,88 @@
+import io
+import unittest
+from http import client
+from test.test_httplib import FakeSocket
+from unittest import mock
+from xml.dom import getDOMImplementation, minidom, xmlbuilder
+
+SMALL_SAMPLE = b"""<?xml version="1.0"?>
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
+<!-- A comment -->
+<title>Introduction to XSL</title>
+<hr/>
+<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
+</html>"""
+
+
+class XMLBuilderTest(unittest.TestCase):
+    def test_entity_resolver(self):
+        body = (
+            b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+            + SMALL_SAMPLE
+        )
+
+        sock = FakeSocket(body)
+        response = client.HTTPResponse(sock)
+        response.begin()
+        attrs = {"open.return_value": response}
+        opener = mock.Mock(**attrs)
+
+        resolver = xmlbuilder.DOMEntityResolver()
+
+        with mock.patch("urllib.request.build_opener") as mock_build:
+            mock_build.return_value = opener
+            source = resolver.resolveEntity(None, "http://example.com/2000/svg")
+
+        self.assertIsInstance(source, xmlbuilder.DOMInputSource)
+        self.assertIsNone(source.publicId)
+        self.assertEqual(source.systemId, "http://example.com/2000/svg")
+        self.assertEqual(source.baseURI, "http://example.com/2000/")
+        self.assertEqual(source.encoding, "utf-8")
+        self.assertIs(source.byteStream, response)
+
+        self.assertIsNone(source.characterStream)
+        self.assertIsNone(source.stringData)
+
+    def test_builder(self):
+        imp = getDOMImplementation()
+        self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS)
+
+        builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+        self.assertIsInstance(builder, xmlbuilder.DOMBuilder)
+
+    def test_parse_uri(self):
+        body = (
+            b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+            + SMALL_SAMPLE
+        )
+
+        sock = FakeSocket(body)
+        response = client.HTTPResponse(sock)
+        response.begin()
+        attrs = {"open.return_value": response}
+        opener = mock.Mock(**attrs)
+
+        with mock.patch("urllib.request.build_opener") as mock_build:
+            mock_build.return_value = opener
+
+            imp = getDOMImplementation()
+            builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+            document = builder.parseURI("http://example.com/2000/svg")
+
+        self.assertIsInstance(document, minidom.Document)
+        self.assertEqual(len(document.childNodes), 1)
+
+    def test_parse_with_systemId(self):
+        response = io.BytesIO(SMALL_SAMPLE)
+
+        with mock.patch("urllib.request.urlopen") as mock_open:
+            mock_open.return_value = response
+
+            imp = getDOMImplementation()
+            source = imp.createDOMInputSource()
+            builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+            source.systemId = "http://example.com/2000/svg"
+            document = builder.parse(source)
+
+        self.assertIsInstance(document, minidom.Document)
+        self.assertEqual(len(document.childNodes), 1)
index 8a200263497b8927e7630ec5d8c14cfd8d8f5a19..a8852625a2f9a2c22227cac152f6c4e34281710e 100644 (file)
@@ -189,7 +189,7 @@ class DOMBuilder:
         options.filter = self.filter
         options.errorHandler = self.errorHandler
         fp = input.byteStream
-        if fp is None and options.systemId:
+        if fp is None and input.systemId:
             import urllib.request
             fp = urllib.request.urlopen(input.systemId)
         return self._parse_bytestream(fp, options)
@@ -247,10 +247,12 @@ class DOMEntityResolver(object):
 
     def _guess_media_encoding(self, source):
         info = source.byteStream.info()
-        if "Content-Type" in info:
-            for param in info.getplist():
-                if param.startswith("charset="):
-                    return param.split("=", 1)[1].lower()
+        # import email.message
+        # assert isinstance(info, email.message.Message)
+        charset = info.get_param('charset')
+        if charset is not None:
+            return charset.lower()
+        return None
 
 
 class DOMInputSource(object):
diff --git a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst
new file mode 100644 (file)
index 0000000..56e2fe6
--- /dev/null
@@ -0,0 +1,3 @@
+Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle
+:class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a
+:attr:`!systemId` attribute set.
diff --git a/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst
new file mode 100644 (file)
index 0000000..98c0729
--- /dev/null
@@ -0,0 +1,2 @@
+Fix :meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity`, which was
+broken by the Python 3.0 transition.