From: Ezio Melotti Date: Wed, 15 Feb 2012 11:19:10 +0000 (+0200) Subject: #13987: HTMLParser is now able to handle malformed start tags. X-Git-Tag: v2.7.3rc1~56 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=65d36dab4d915eb9fada52b867301b546e840fae;p=thirdparty%2FPython%2Fcpython.git #13987: HTMLParser is now able to handle malformed start tags. --- diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index d2268d02cd0d..5081a62562c0 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase): - self.__starttag_text.rfind("\n") else: offset = offset + len(self.__starttag_text) - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) + self.handle_data(rawdata[i:endpos]) + return endpos if end.endswith('/>'): # XHTML-style empty tag: self.handle_startendtag(tag, attrs) @@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase): # end of input in or before attribute value, or we have the # '/' from a '/>' ending return -1 - self.updatepos(i, j) - self.error("malformed start tag") + if j > i: + return j + else: + return i + 1 raise AssertionError("we should not get here!") # Internal -- parse endtag, return end or -1 if incomplete diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index ba775abdac1b..8136bca3e28e 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -206,7 +206,8 @@ text self._run_check("", [('comment', '$')]) self._run_check("") + # XXX this might be wrong + self._run_check("", [('data', '", [('endtag', 'a