]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-50002: xml.dom.minidom now preserves whitespaces in attributes (GH-107947)
authorSerhiy Storchaka <storchaka@gmail.com>
Wed, 23 Aug 2023 12:23:41 +0000 (15:23 +0300)
committerGitHub <noreply@github.com>
Wed, 23 Aug 2023 12:23:41 +0000 (15:23 +0300)
Also double quotes (") are now only quoted in attributes.

Lib/test/test_minidom.py
Lib/xml/dom/minidom.py
Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst [new file with mode: 0644]
Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst [new file with mode: 0644]

index 699265ccadc7f9c5d11d3d8d4ea70365065a5b2a..3ecd1af31eea77ec41389911a10781cd7e03805f 100644 (file)
@@ -505,6 +505,46 @@ class MinidomTest(unittest.TestCase):
         dom.unlink()
         self.confirm(str == domstr)
 
+    def test_toxml_quote_text(self):
+        dom = Document()
+        elem = dom.appendChild(dom.createElement('elem'))
+        elem.appendChild(dom.createTextNode('&<>"'))
+        cr = elem.appendChild(dom.createElement('cr'))
+        cr.appendChild(dom.createTextNode('\r'))
+        crlf = elem.appendChild(dom.createElement('crlf'))
+        crlf.appendChild(dom.createTextNode('\r\n'))
+        lflf = elem.appendChild(dom.createElement('lflf'))
+        lflf.appendChild(dom.createTextNode('\n\n'))
+        ws = elem.appendChild(dom.createElement('ws'))
+        ws.appendChild(dom.createTextNode('\t\n\r '))
+        domstr = dom.toxml()
+        dom.unlink()
+        self.assertEqual(domstr, '<?xml version="1.0" ?>'
+                '<elem>&amp;&lt;&gt;"'
+                '<cr>\r</cr>'
+                '<crlf>\r\n</crlf>'
+                '<lflf>\n\n</lflf>'
+                '<ws>\t\n\r </ws></elem>')
+
+    def test_toxml_quote_attrib(self):
+        dom = Document()
+        elem = dom.appendChild(dom.createElement('elem'))
+        elem.setAttribute("a", '&<>"')
+        elem.setAttribute("cr", "\r")
+        elem.setAttribute("lf", "\n")
+        elem.setAttribute("crlf", "\r\n")
+        elem.setAttribute("lflf", "\n\n")
+        elem.setAttribute("ws", "\t\n\r ")
+        domstr = dom.toxml()
+        dom.unlink()
+        self.assertEqual(domstr, '<?xml version="1.0" ?>'
+                '<elem a="&amp;&lt;&gt;&quot;" '
+                'cr="&#13;" '
+                'lf="&#10;" '
+                'crlf="&#13;&#10;" '
+                'lflf="&#10;&#10;" '
+                'ws="&#9;&#10;&#13; "/>')
+
     def testAltNewline(self):
         str = '<?xml version="1.0" ?>\n<a b="c"/>\n'
         dom = parseString(str)
index ef8a159833bbc07672cd3a17a3e49943a97c85e1..db51f350ea0153d2cf521c713aba0147acd871b9 100644 (file)
@@ -300,12 +300,28 @@ def _in_document(node):
         node = node.parentNode
     return False
 
-def _write_data(writer, data):
+def _write_data(writer, text, attr):
     "Writes datachars to writer."
-    if data:
-        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
-                    replace("\"", "&quot;").replace(">", "&gt;")
-        writer.write(data)
+    if not text:
+        return
+    # See the comments in ElementTree.py for behavior and
+    # implementation details.
+    if "&" in text:
+        text = text.replace("&", "&amp;")
+    if "<" in text:
+        text = text.replace("<", "&lt;")
+    if ">" in text:
+        text = text.replace(">", "&gt;")
+    if attr:
+        if '"' in text:
+            text = text.replace('"', "&quot;")
+        if "\r" in text:
+            text = text.replace("\r", "&#13;")
+        if "\n" in text:
+            text = text.replace("\n", "&#10;")
+        if "\t" in text:
+            text = text.replace("\t", "&#9;")
+    writer.write(text)
 
 def _get_elements_by_tagName_helper(parent, name, rc):
     for node in parent.childNodes:
@@ -883,7 +899,7 @@ class Element(Node):
 
         for a_name in attrs.keys():
             writer.write(" %s=\"" % a_name)
-            _write_data(writer, attrs[a_name].value)
+            _write_data(writer, attrs[a_name].value, True)
             writer.write("\"")
         if self.childNodes:
             writer.write(">")
@@ -1112,7 +1128,7 @@ class Text(CharacterData):
         return newText
 
     def writexml(self, writer, indent="", addindent="", newl=""):
-        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
+        _write_data(writer, "%s%s%s" % (indent, self.data, newl), False)
 
     # DOM Level 3 (WD 9 April 2002)
 
diff --git a/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst b/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst
new file mode 100644 (file)
index 0000000..ca5c074
--- /dev/null
@@ -0,0 +1 @@
+:mod:`xml.dom.minidom` now preserves whitespaces in attributes.
diff --git a/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst b/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst
new file mode 100644 (file)
index 0000000..241a50f
--- /dev/null
@@ -0,0 +1 @@
+:mod:`xml.dom.minidom` now only quotes ``"`` in attributes.