[3.12] gh-135661: Fix parsing start and end tags in HTMLParser according to the HTML5...

author Serhiy Storchaka <storchaka@gmail.com>

Fri, 4 Jul 2025 15:28:00 +0000 (18:28 +0300)

committer GitHub <noreply@github.com>

Fri, 4 Jul 2025 15:28:00 +0000 (17:28 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Fri, 4 Jul 2025 15:28:00 +0000 (18:28 +0300)
committer GitHub <noreply@github.com>
Fri, 4 Jul 2025 15:28:00 +0000 (17:28 +0200)
diff --git a/Lib/html/parser.py b/Lib/html/parser.py

index ecd5e0f019ac96d0db77cbd22b7fc9223590a65d..37931ca1af02092d3bc8003acc0770bd89b7c92a 100644 (file)
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -29,15 +29,43 @@ endtagopen = re.compile('</[a-zA-Z]')
  piclose = re.compile('>')
  commentclose = re.compile(r'--\s*>')
  # Note:
-#  1) if you change tagfind/attrfind remember to update locatestarttagend too;
-#  2) if you change tagfind/attrfind and/or locatestarttagend the parser will
+#  1) if you change tagfind/attrfind remember to update locatetagend too;
+#  2) if you change tagfind/attrfind and/or locatetagend the parser will
  #     explode, so don't do it.
-# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
-# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
-tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
-attrfind_tolerant = re.compile(
-    r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
-    r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
+# see the HTML5 specs section "13.2.5.6 Tag open state",
+# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state".
+# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
+# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
+tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*')
+attrfind_tolerant = re.compile(r"""
+  (
+    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
+   )
+  (=                                # value indicator
+    ('[^']*'                        # LITA-enclosed value
+    |"[^"]*"                        # LIT-enclosed value
+    |(?!['"])[^>\t\n\r\f ]*         # bare value
+    )
+   )?
+  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
+""", re.VERBOSE)
+locatetagend = re.compile(r"""
+  [a-zA-Z][^\t\n\r\f />]*           # tag name
+  [\t\n\r\f /]*                     # optional whitespace before attribute name
+  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
+    (?:=                            # value indicator
+      (?:'[^']*'                    # LITA-enclosed value
+        |"[^"]*"                    # LIT-enclosed value
+        |(?!['"])[^>\t\n\r\f ]*     # bare value
+       )
+     )?
+    [\t\n\r\f /]*                   # possibly followed by a space
+   )*
+   >?
+""", re.VERBOSE)
+# The following variables are not used, but are temporarily left for
+# backward compatibility.
  locatestarttagend_tolerant = re.compile(r"""
    <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
    (?:[\s/]*                          # optional whitespace before attribute name
@@ -54,8 +82,6 @@ locatestarttagend_tolerant = re.compile(r"""
    \s*                                # trailing whitespace
  """, re.VERBOSE)
  endendtag = re.compile('>')
-# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
-# </ and the tag name, so maybe this should be fixed
  endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
  
  
@@ -123,7 +149,8 @@ class HTMLParser(_markupbase.ParserBase):
  
      def set_cdata_mode(self, elem):
          self.cdata_elem = elem.lower()
-        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
+        self.interesting = re.compile(r'</%s(?=[\t\n\r\f />])' % self.cdata_elem,
+                                      re.IGNORECASE|re.ASCII)
  
      def clear_cdata_mode(self):
          self.interesting = interesting_normal
@@ -148,7 +175,7 @@ class HTMLParser(_markupbase.ParserBase):
                      # & near the end and see if it's followed by a space or ;.
                      amppos = rawdata.rfind('&', max(i, n-34))
                      if (amppos >= 0 and
-                        not re.compile(r'[\s;]').search(rawdata, amppos)):
+                        not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)):
                          break  # wait till we get all the text
                      j = n
              else:
@@ -261,7 +288,7 @@ class HTMLParser(_markupbase.ParserBase):
              else:
                  assert 0, "interesting.search() lied"
          # end while
-        if end and i < n and not self.cdata_elem:
+        if end and i < n:
              if self.convert_charrefs and not self.cdata_elem:
                  self.handle_data(unescape(rawdata[i:n]))
              else:
@@ -292,7 +319,7 @@ class HTMLParser(_markupbase.ParserBase):
              return self.parse_bogus_comment(i)
  
      # Internal -- parse bogus comment, return length or -1 if not terminated
-    # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
+    # see https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
      def parse_bogus_comment(self, i, report=1):
          rawdata = self.rawdata
          assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
@@ -318,6 +345,8 @@ class HTMLParser(_markupbase.ParserBase):
  
      # Internal -- handle starttag, return end or -1 if not terminated
      def parse_starttag(self, i):
+        # See the HTML5 specs section "13.2.5.8 Tag name state"
+        # https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
          self.__starttag_text = None
          endpos = self.check_for_whole_start_tag(i)
          if endpos < 0:
@@ -363,76 +392,42 @@ class HTMLParser(_markupbase.ParserBase):
      # or -1 if incomplete.
      def check_for_whole_start_tag(self, i):
          rawdata = self.rawdata
-        m = locatestarttagend_tolerant.match(rawdata, i)
-        if m:
-            j = m.end()
-            next = rawdata[j:j+1]
-            if next == ">":
-                return j + 1
-            if next == "/":
-                if rawdata.startswith("/>", j):
-                    return j + 2
-                if rawdata.startswith("/", j):
-                    # buffer boundary
-                    return -1
-                # else bogus input
-                if j > i:
-                    return j
-                else:
-                    return i + 1
-            if next == "":
-                # end of input
-                return -1
-            if next in ("abcdefghijklmnopqrstuvwxyz=/"
-                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
-                # end of input in or before attribute value, or we have the
-                # '/' from a '/>' ending
-                return -1
-            if j > i:
-                return j
-            else:
-                return i + 1
-        raise AssertionError("we should not get here!")
+        match = locatetagend.match(rawdata, i+1)
+        assert match
+        j = match.end()
+        if rawdata[j-1] != ">":
+            return -1
+        return j
  
      # Internal -- parse endtag, return end or -1 if incomplete
      def parse_endtag(self, i):
+        # See the HTML5 specs section "13.2.5.7 End tag open state"
+        # https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
          rawdata = self.rawdata
          assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
-        match = endendtag.search(rawdata, i+1) # >
-        if not match:
+        if rawdata.find('>', i+2) < 0:  # fast check
              return -1
-        gtpos = match.end()
-        match = endtagfind.match(rawdata, i) # </ + tag + >
-        if not match:
-            if self.cdata_elem is not None:
-                self.handle_data(rawdata[i:gtpos])
-                return gtpos
-            # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
-            namematch = tagfind_tolerant.match(rawdata, i+2)
-            if not namematch:
-                # w3.org/TR/html5/tokenization.html#end-tag-open-state
-                if rawdata[i:i+3] == '</>':
-                    return i+3
-                else:
-                    return self.parse_bogus_comment(i)
-            tagname = namematch.group(1).lower()
-            # consume and ignore other stuff between the name and the >
-            # Note: this is not 100% correct, since we might have things like
-            # </tag attr=">">, but looking for > after the name should cover
-            # most of the cases and is much simpler
-            gtpos = rawdata.find('>', namematch.end())
-            self.handle_endtag(tagname)
-            return gtpos+1
+        if not endtagopen.match(rawdata, i):  # </ + letter
+            if rawdata[i+2:i+3] == '>':  # </> is ignored
+                # "missing-end-tag-name" parser error
+                return i+3
+            else:
+                return self.parse_bogus_comment(i)
  
-        elem = match.group(1).lower() # script or style
-        if self.cdata_elem is not None:
-            if elem != self.cdata_elem:
-                self.handle_data(rawdata[i:gtpos])
-                return gtpos
+        match = locatetagend.match(rawdata, i+2)
+        assert match
+        j = match.end()
+        if rawdata[j-1] != ">":
+            return -1
  
-        self.handle_endtag(elem)
+        # find the name: "13.2.5.8 Tag name state"
+        # https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
+        match = tagfind_tolerant.match(rawdata, i+2)
+        assert match
+        tag = match.group(1).lower()
+        self.handle_endtag(tag)
          self.clear_cdata_mode()
-        return gtpos
+        return j
  
      # Overridable -- finish processing of start+end tag: <tag.../>
      def handle_startendtag(self, tag, attrs):
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py

index ee70407ba83d3f594da453b166a9109bc7498918..ae10de5911b3532f446575f91cf835385d2cae3e 100644 (file)
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -867,6 +867,31 @@ def check_sizeof(test, o, size):
              % (type(o), result, size)
      test.assertEqual(result, size, msg)
  
+def subTests(arg_names, arg_values, /, *, _do_cleanups=False):
+    """Run multiple subtests with different parameters.
+    """
+    single_param = False
+    if isinstance(arg_names, str):
+        arg_names = arg_names.replace(',',' ').split()
+        if len(arg_names) == 1:
+            single_param = True
+    arg_values = tuple(arg_values)
+    def decorator(func):
+        if isinstance(func, type):
+            raise TypeError('subTests() can only decorate methods, not classes')
+        @functools.wraps(func)
+        def wrapper(self, /, *args, **kwargs):
+            for values in arg_values:
+                if single_param:
+                    values = (values,)
+                subtest_kwargs = dict(zip(arg_names, values))
+                with self.subTest(**subtest_kwargs):
+                    func(self, *args, **kwargs, **subtest_kwargs)
+                if _do_cleanups:
+                    self.doCleanups()
+        return wrapper
+    return decorator
+
  #=======================================================================
  # Decorator/context manager for running a code in a different locale,
  # correctly resetting it afterwards.
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py

index 729dfe08fa6d2b83e0fa0a4d1e0daeca4ef3b91e..008d55097db1709fc6edc5244f521dc7a9a7855f 100644 (file)
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -81,6 +81,13 @@ class EventCollectorCharrefs(EventCollector):
          self.fail('This should never be called with convert_charrefs=True')
  
  
+# The normal event collector normalizes the events in get_events,
+# so we override it to return the original list of events.
+class EventCollectorNoNormalize(EventCollector):
+    def get_events(self):
+        return self.events
+
+
  class TestCaseBase(unittest.TestCase):
  
      def get_collector(self):
@@ -265,8 +272,7 @@ text
              ("starttag", "foo:bar", [("one", "1"), ("two", "2")]),
              ("starttag_text", s)])
  
-    def test_cdata_content(self):
-        contents = [
+    @support.subTests('content', [
              '<!-- not a comment --> &not-an-entity-ref;',
              "<not a='start tag'>",
              '<a href="" /> <p> <span></span>',
@@ -279,44 +285,83 @@ text
               'src="http://www.example.org/r=\'+new '
               'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'),
              '\n<!-- //\nvar foo = 3.14;\n// -->\n',
-            'foo = "</sty" + "le>";',
              '<!-- \u2603 -->',
-            # these two should be invalid according to the HTML 5 spec,
-            # section 8.1.2.2
-            #'foo = </\nscript>',
-            #'foo = </ script>',
-        ]
-        elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style']
-        for content in contents:
-            for element in elements:
-                element_lower = element.lower()
-                s = '<{element}>{content}</{element}>'.format(element=element,
-                                                               content=content)
-                self._run_check(s, [("starttag", element_lower, []),
-                                    ("data", content),
-                                    ("endtag", element_lower)])
-
-    def test_cdata_with_closing_tags(self):
+            'foo = "</ script>"',
+            'foo = "</scripture>"',
+            'foo = "</script\v>"',
+            'foo = "</script\xa0>"',
+            'foo = "</ſcript>"',
+            'foo = "</scrıpt>"',
+        ])
+    def test_script_content(self, content):
+        s = f'<script>{content}</script>'
+        self._run_check(s, [("starttag", "script", []),
+                            ("data", content),
+                            ("endtag", "script")])
+
+    @support.subTests('content', [
+            'a::before { content: "<!-- not a comment -->"; }',
+            'a::before { content: "&not-an-entity-ref;"; }',
+            'a::before { content: "<not a=\'start tag\'>"; }',
+            'a::before { content: "\u2603"; }',
+            'a::before { content: "< /style>"; }',
+            'a::before { content: "</ style>"; }',
+            'a::before { content: "</styled>"; }',
+            'a::before { content: "</style\v>"; }',
+            'a::before { content: "</style\xa0>"; }',
+            'a::before { content: "</ſtyle>"; }',
+        ])
+    def test_style_content(self, content):
+        s = f'<style>{content}</style>'
+        self._run_check(s, [("starttag", "style", []),
+                            ("data", content),
+                            ("endtag", "style")])
+
+    @support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n',
+                                 'script/', 'script foo=bar', 'script foo=">"'])
+    def test_script_closing_tag(self, endtag):
          # see issue #13358
          # make sure that HTMLParser calls handle_data only once for each CDATA.
-        # The normal event collector normalizes  the events in get_events,
-        # so we override it to return the original list of events.
-        class Collector(EventCollector):
-            def get_events(self):
-                return self.events
-
          content = """<!-- not a comment --> &not-an-entity-ref;
                    <a href="" /> </p><p> <span></span></style>
                    '</script' + '>'"""
-        for element in [' script', 'script ', ' script ',
-                        '\nscript', 'script\n', '\nscript\n']:
-            element_lower = element.lower().strip()
-            s = '<script>{content}</{element}>'.format(element=element,
-                                                       content=content)
-            self._run_check(s, [("starttag", element_lower, []),
-                                ("data", content),
-                                ("endtag", element_lower)],
-                            collector=Collector(convert_charrefs=False))
+        s = f'<ScrIPt>{content}</{endtag}>'
+        self._run_check(s, [("starttag", "script", []),
+                            ("data", content),
+                            ("endtag", "script")],
+                        collector=EventCollectorNoNormalize(convert_charrefs=False))
+
+    @support.subTests('endtag', ['style', 'STYLE', 'style ', 'style\n',
+                                 'style/', 'style foo=bar', 'style foo=">"'])
+    def test_style_closing_tag(self, endtag):
+        content = """
+            b::before { content: "<!-- not a comment -->"; }
+            p::before { content: "&not-an-entity-ref;"; }
+            a::before { content: "<i>"; }
+            a::after { content: "</i>"; }
+            """
+        s = f'<StyLE>{content}</{endtag}>'
+        self._run_check(s, [("starttag", "style", []),
+                            ("data", content),
+                            ("endtag", "style")],
+                        collector=EventCollectorNoNormalize(convert_charrefs=False))
+
+    @support.subTests('tail,end', [
+        ('', False),
+        ('<', False),
+        ('</', False),
+        ('</s', False),
+        ('</script', False),
+        ('</script ', True),
+        ('</script foo=bar', True),
+        ('</script foo=">', True),
+    ])
+    def test_eof_in_script(self, tail, end):
+        content = "a = 123"
+        s = f'<ScrIPt>{content}{tail}'
+        self._run_check(s, [("starttag", "script", []),
+                            ("data", content if end else content + tail)],
+                        collector=EventCollectorNoNormalize(convert_charrefs=False))
  
      def test_comments(self):
          html = ("<!-- I'm a valid comment -->"
@@ -406,7 +451,7 @@ text
          self._run_check("</$>", [('comment', '$')])
          self._run_check("</", [('data', '</')])
          self._run_check("</a", [])
-        self._run_check("</ a>", [('endtag', 'a')])
+        self._run_check("</ a>", [('comment', ' a')])
          self._run_check("</ a", [('comment', ' a')])
          self._run_check("<a<a>", [('starttag', 'a<a', [])])
          self._run_check("</a<a>", [('endtag', 'a<a')])
@@ -454,6 +499,10 @@ text
          ]
          self._run_check(html, expected)
  
+    def test_slashes_in_endtag(self):
+        self._run_check('</a/>', [('endtag', 'a')])
+        self._run_check('</a foo="var"/>', [('endtag', 'a')])
+
      def test_declaration_junk_chars(self):
          self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')])
  
@@ -488,15 +537,11 @@ text
          self._run_check(html, expected)
  
      def test_broken_invalid_end_tag(self):
-        # This is technically wrong (the "> shouldn't be included in the 'data')
-        # but is probably not worth fixing it (in addition to all the cases of
-        # the previous test, it would require a full attribute parsing).
-        # see #13993
          html = '<b>This</b attr=">"> confuses the parser'
          expected = [('starttag', 'b', []),
                      ('data', 'This'),
                      ('endtag', 'b'),
-                    ('data', '"> confuses the parser')]
+                    ('data', ' confuses the parser')]
          self._run_check(html, expected)
  
      def test_correct_detection_of_start_tags(self):
@@ -523,7 +568,7 @@ text
  
          html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
          expected = [
-            ('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]),
+            ('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]),
              ('starttag', 'b', []),
              ('data', 'The '),
              ('starttag', 'a', [('href', 'some_url')]),
@@ -678,9 +723,15 @@ class AttributesTestCase(TestCaseBase):
            ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
          ]
          self._run_check("""<a b='v' c="v" d=v e>""", output)
-        self._run_check("""<a  b = 'v' c = "v" d = v e>""", output)
-        self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
-        self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
+        self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
+        self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
+        self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
+        self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
+        self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
+        self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
+        self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
+        self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
+        self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])
  
      def test_attr_values(self):
          self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
@@ -689,6 +740,10 @@ class AttributesTestCase(TestCaseBase):
                                              ("d", "\txyz\n")])])
          self._run_check("""<a b='' c="">""",
                          [("starttag", "a", [("b", ""), ("c", "")])])
+        self._run_check("<a b=\t c=\n>",
+                        [("starttag", "a", [("b", ""), ("c", "")])])
+        self._run_check("<a b=\v c=\xa0>",
+                        [("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
          # Regression test for SF patch #669683.
          self._run_check("<e a=rgb(1,2,3)>",
                          [("starttag", "e", [("a", "rgb(1,2,3)")])])
@@ -760,7 +815,7 @@ class AttributesTestCase(TestCaseBase):
              ('data', 'test - bad2'), ('endtag', 'a'),
              ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
              ('data', 'test - bad3'), ('endtag', 'a'),
-            ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]),
+            ('starttag', 'a', [('href', None), ('=', None), ("test'&nbsp;style", 'color:red;bad4')]),
              ('data', 'test - bad4'), ('endtag', 'a')
          ]
          self._run_check(html, expected)
diff --git a/Misc/NEWS.d/next/Security/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst b/Misc/NEWS.d/next/Security/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst

new file mode 100644 (file)

index 0000000..bb85481
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst
@@ -0,0 +1,2 @@
+:meth:`html.parser.HTMLParser.close` no longer loses data when the
+``<script>`` tag is not closed. Patch by Waylan Limberg.
diff --git a/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst

new file mode 100644 (file)

index 0000000..b6f9e10
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst
@@ -0,0 +1,25 @@
+Fix parsing start and end tags in :class:`html.parser.HTMLParser`
+according to the HTML5 standard.
+
+* Whitespaces no longer accepted between ``</`` and the tag name.
+  E.g. ``</ script>`` does not end the script section.
+
+* Vertical tabulation (``\v``) and non-ASCII whitespaces no longer recognized
+  as whitespaces. The only whitespaces are ``\t\n\r\f`` and space.
+
+* Null character (U+0000) no longer ends the tag name.
+
+* Attributes and slashes after the tag name in end tags are now ignored,
+  instead of terminating after the first ``>`` in quoted attribute value.
+  E.g. ``</script/foo=">"/>``.
+
+* Multiple slashes and whitespaces between the last attribute and closing ``>``
+  are now ignored in both start and end tags. E.g. ``<a foo=bar/ //>``.
+
+* Multiple ``=`` between attribute name and value are no longer collapsed.
+  E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar".
+
+* Whitespaces between the ``=`` separator and attribute name or value are no
+  longer ignored. E.g. ``<a foo =bar>`` produces two attributes "foo" and
+  "=bar", both with value None; ``<a foo= bar>`` produces two attributes:
+  "foo" with value "" and "bar" with value None.
author	Serhiy Storchaka <storchaka@gmail.com>
	Fri, 4 Jul 2025 15:28:00 +0000 (18:28 +0300)
committer	GitHub <noreply@github.com>
	Fri, 4 Jul 2025 15:28:00 +0000 (17:28 +0200)
Lib/html/parser.py		patch \| blob \| blame \| history
Lib/test/support/__init__.py		patch \| blob \| blame \| history
Lib/test/test_htmlparser.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Security/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst	[new file with mode: 0644]	patch \| blob
Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst	[new file with mode: 0644]	patch \| blob