From: Mark Hammond Date: Thu, 27 Feb 2003 06:59:10 +0000 (+0000) Subject: When bad HTML is encountered, ignore the page rather than failing with X-Git-Tag: v2.3c1~1668 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ce56c377a0f548cdac3ab9c66117df654f934484;p=thirdparty%2FPython%2Fcpython.git When bad HTML is encountered, ignore the page rather than failing with a traceback. --- diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index e8d0ed746fe0..e89529e5cf3a 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -400,7 +400,15 @@ class Checker: if local_fragment and self.nonames: self.markdone(url_pair) return - page = self.getpage(url_pair) + try: + page = self.getpage(url_pair) + except sgmllib.SGMLParseError, msg: + msg = self.sanitize(msg) + self.note(0, "Error parsing %s: %s", + self.format_url(url_pair), msg) + # Dont actually mark the URL as bad - it exists, just + # we can't parse it! + page = None if page: # Store the page which corresponds to this URL. self.name_table[url] = page