]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-149489: Fix ElementTree serialization to HTML (GH-149490) (GH-150596) ...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sat, 30 May 2026 11:16:32 +0000 (13:16 +0200)
committerGitHub <noreply@github.com>
Sat, 30 May 2026 11:16:32 +0000 (11:16 +0000)
* The content of elements "xmp", "iframe", "noembed", "noframes",
  and "plaintext" is no longer escaped.
* The "plaintext" element no longer have the closing tag.
(cherry picked from commit c42e6d3f1a066186f74d3971df4c512bc11c7997)

(cherry picked from commit bcd29e466f55d8b4e3849ed6ada8ce86a46f5072)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Lib/test/test_xml_etree.py
Lib/xml/etree/ElementTree.py
Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst [new file with mode: 0644]

index 51f8a2d52161cc65cac35464ac49727093538020..1a39de8b5aac3aa5b81ab6bd96eac70e1e3cb193 100644 (file)
@@ -1248,7 +1248,12 @@ class ElementTreeTest(unittest.TestCase):
               {'': 'http://www.w3.org/2001/XMLSchema',
                'ns': 'http://www.w3.org/2001/XMLSchema'})
 
-    def test_processinginstruction(self):
+    def test_comment_serialization(self):
+        comm = ET.Comment('<spam> & ham')
+        # comments are not escaped
+        self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
+
+    def test_processinginstruction_serialization(self):
         # Test ProcessingInstruction directly
 
         self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
@@ -1257,13 +1262,22 @@ class ElementTreeTest(unittest.TestCase):
                 b'<?test instruction?>')
 
         # Issue #2746
-
+        # processing instructions are not escaped
         self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
                 b'<?test <testing&>?>')
         self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
                 b"<?xml version='1.0' encoding='latin-1'?>\n"
                 b"<?test <testing&>\xe3?>")
 
+    @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
+    def test_html_cdata_elems_serialization(self, tag):
+        # content of raw text elements is not escaped in html
+        tag = tag.title()
+        elem = ET.Element(tag)
+        elem.text = '<spam>&ham'
+        self.assertEqual(ET.tostring(elem, method='html'),
+                         ('<%s><spam>&ham</%s>' % (tag, tag)).encode())
+
     def test_html_empty_elems_serialization(self):
         # issue 15970
         # from http://www.w3.org/TR/html401/index/elements.html
@@ -1278,6 +1292,14 @@ class ElementTreeTest(unittest.TestCase):
                                        method='html')
                 self.assertEqual(serialized, expected)
 
+    def test_html_plaintext_serialization(self):
+        # content of plaintext is not escaped in html
+        # no end tag for plaintext
+        elem = ET.Element('PlainText')
+        elem.text = '<spam>&ham'
+        self.assertEqual(ET.tostring(elem, method='html'),
+                         b'<PlainText><spam>&ham')
+
     def test_dump_attribute_order(self):
         # See BPO 34160
         e = ET.Element('cirriculum', status='public', company='example')
index e8e237bf7815fd1b7bbd49362ecb4ecb0a964b37..ef162a967d8f889ea34e5e53e304414c4ad95262 100644 (file)
@@ -913,9 +913,12 @@ def _serialize_xml(write, elem, qnames, namespaces,
     if elem.tail:
         write(_escape_cdata(elem.tail))
 
+_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
+                           "noframes", "plaintext"}
+
 HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
               "img", "input", "isindex", "link", "meta", "param", "source",
-              "track", "wbr"}
+              "track", "wbr", "plaintext"}
 
 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
     tag = elem.tag
@@ -956,7 +959,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
             write(">")
             ltag = tag.lower()
             if text:
-                if ltag == "script" or ltag == "style":
+                if ltag in _CDATA_CONTENT_ELEMENTS:
                     write(text)
                 else:
                     write(_escape_cdata(text))
diff --git a/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
new file mode 100644 (file)
index 0000000..4f47d36
--- /dev/null
@@ -0,0 +1,3 @@
+Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
+elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer
+escaped. The "plaintext" element no longer have the closing tag.