From: Andrew M. Kuchling Date: Sat, 5 Jun 2004 15:31:45 +0000 (+0000) Subject: [Bug #921657] Allow '@' in unquoted HTML attributes. Not strictly legal according... X-Git-Tag: v2.4a1~260 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b7d8ce0275d7b4c9a9c2312d0add835c6eac1730;p=thirdparty%2FPython%2Fcpython.git [Bug #921657] Allow '@' in unquoted HTML attributes. Not strictly legal according to the HTML REC, but HTMLParser is already a pretty loose parser. Reported by Bernd Zimmermann. --- diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 733458126b75..553e8427ccc2 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>') tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') attrfind = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~]*))?') + r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index a830ed7736b1..5b4bd560d0b5 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -204,6 +204,10 @@ DOCTYPE html [ self._run_check("", [ ("starttag", "e", [("a", "rgb(1,2,3)")]), ]) + # Regression test for SF bug #921657. + self._run_check("", [ + ("starttag", "a", [("href", "mailto:xyz@example.com")]), + ]) def test_attr_entity_replacement(self): self._run_check("""""", [