From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Tue, 22 Jul 2025 09:57:56 +0000 (+0200) Subject: [3.10] gh-135661: Fix parsing attributes with whitespaces around the "=" separator... X-Git-Tag: v3.10.19~12 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1df5d0014578be7fe7ae25e2cc60c50c8b5cc0f7;p=thirdparty%2FPython%2Fcpython.git [3.10] gh-135661: Fix parsing attributes with whitespaces around the "=" separator in HTMLParser (GH-136908) (GH-136921) This fixes a regression introduced in GH-135930. (cherry picked from commit dee650189497735edbc08a54edabb5b06ef1bd09) Co-authored-by: Serhiy Storchaka --- diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 9a1b430d4e3e..c7dde2e911f9 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -43,7 +43,7 @@ attrfind_tolerant = re.compile(r""" ( (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name ) - (= # value indicator + ([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator ('[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value @@ -55,7 +55,7 @@ locatetagend = re.compile(r""" [a-zA-Z][^\t\n\r\f />]* # tag name [\t\n\r\f /]* # optional whitespace before attribute name (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name - (?:= # value indicator + (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 708b35192ead..e78e806e19cb 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -595,7 +595,7 @@ text html = '
The rain' expected = [ - ('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]), + ('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]), ('starttag', 'b', []), ('data', 'The '), ('starttag', 'a', [('href', 'some_url')]), @@ -751,12 +751,12 @@ class AttributesTestCase(TestCaseBase): ] self._run_check("""""", output) self._run_check("", [('starttag', 'a', [('foo', '=bar')])]) - self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) - self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo\v', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo\xa0', 'bar')])]) - self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) - self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo', '\vbar')])]) self._run_check("", [('starttag', 'a', [('foo', '\xa0bar')])]) @@ -767,8 +767,8 @@ class AttributesTestCase(TestCaseBase): ("d", "\txyz\n")])]) self._run_check("""""", [("starttag", "a", [("b", ""), ("c", "")])]) - self._run_check("", - [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("", + [('starttag', 'a', [('b', 'x'), ('c', 'y')])]) self._run_check("", [("starttag", "a", [("b", "\v"), ("c", "\xa0")])]) # Regression test for SF patch #669683. @@ -837,13 +837,17 @@ class AttributesTestCase(TestCaseBase): ) expected = [ ('starttag', 'a', [('href', "test'style='color:red;bad1'")]), - ('data', 'test - bad1'), ('endtag', 'a'), + ('data', 'test - bad1'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'+style='color:red;ba2'")]), - ('data', 'test - bad2'), ('endtag', 'a'), + ('data', 'test - bad2'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]), - ('data', 'test - bad3'), ('endtag', 'a'), - ('starttag', 'a', [('href', None), ('=', None), ("test' style", 'color:red;bad4')]), - ('data', 'test - bad4'), ('endtag', 'a') + ('data', 'test - bad3'), + ('endtag', 'a'), + ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]), + ('data', 'test - bad4'), + ('endtag', 'a'), ] self._run_check(html, expected) diff --git a/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst index b6f9e104e440..27e886abdb58 100644 --- a/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst +++ b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst @@ -18,8 +18,3 @@ according to the HTML5 standard. * Multiple ``=`` between attribute name and value are no longer collapsed. E.g. ```` produces attribute "foo" with value "=bar". - -* Whitespaces between the ``=`` separator and attribute name or value are no - longer ignored. E.g. ```` produces two attributes "foo" and - "=bar", both with value None; ```` produces two attributes: - "foo" with value "" and "bar" with value None.