]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-86155: Fix data loss after unclosed script or style tag in HTMLParser ...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sat, 10 May 2025 17:58:29 +0000 (19:58 +0200)
committerGitHub <noreply@github.com>
Sat, 10 May 2025 17:58:29 +0000 (17:58 +0000)
When calling .close() the HTMLParser should flush all remaining content,
even when that content is in an unclosed script or style tag.
(cherry picked from commit 53383e90e4df7029f792b7aa81aa2e4cff348ed0)

Co-authored-by: Waylan Limberg <waylan.limberg@icloud.com>
Lib/html/parser.py
Lib/test/test_htmlparser.py
Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst [new file with mode: 0644]

index 1b8b6ea0e5ab7ab5acada5dc0dd896137b558520..1e30956fe24f83b6e88a4cb2e9114b16760969ed 100644 (file)
@@ -260,7 +260,7 @@ class HTMLParser(_markupbase.ParserBase):
             else:
                 assert 0, "interesting.search() lied"
         # end while
-        if end and i < n and not self.cdata_elem:
+        if end and i < n:
             if self.convert_charrefs and not self.cdata_elem:
                 self.handle_data(unescape(rawdata[i:n]))
             else:
index 68649e9d6d5e9c86a038bb4450a26e86bcee030e..61fa24fab574f25f07eac855fcfc223d166d0b14 100644 (file)
@@ -317,6 +317,16 @@ text
                                 ("endtag", element_lower)],
                             collector=Collector(convert_charrefs=False))
 
+    def test_EOF_in_cdata(self):
+        content = """<!-- not a comment --> &not-an-entity-ref;
+                  <a href="" /> </p><p> <span></span></style>
+                  '</script' + '>'"""
+        s = f'<script>{content}'
+        self._run_check(s, [
+            ("starttag", 'script', []),
+            ("data", content)
+        ])
+
     def test_comments(self):
         html = ("<!-- I'm a valid comment -->"
                 '<!--me too!-->'
diff --git a/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst b/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst
new file mode 100644 (file)
index 0000000..bb85481
--- /dev/null
@@ -0,0 +1,2 @@
+:meth:`html.parser.HTMLParser.close` no longer loses data when the
+``<script>`` tag is not closed. Patch by Waylan Limberg.