From: Fred Drake Date: Fri, 16 Mar 2001 20:04:57 +0000 (+0000) Subject: Change RuntimeError to SGMLParseError, which subclasses RuntimeError X-Git-Tag: v2.1b2~158 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=669573726bb31d8862e98b3843549e5fe0b54d37;p=thirdparty%2FPython%2Fcpython.git Change RuntimeError to SGMLParseError, which subclasses RuntimeError for backward compatibility. Add support for SGML declaration syntax () to some reasonable degree. This does not support everything allowed in SGML, but should work with "real" HTML (internal subset in a DOCTYPE is not handled). The content of the declaration is passed to the .handle_decl() method, which can be overridden by subclasses. --- diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 19580e259998..2ebe6981f5d0 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -39,6 +39,14 @@ attrfind = re.compile( r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?') +declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*') +declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*') + + +class SGMLParseError(RuntimeError): + """Exception raised for all parse errors.""" + pass + # SGML parser base class -- find tags and call handler functions. # Usage: p = SGMLParser(); p.feed(data); ...; p.close(). @@ -144,7 +152,12 @@ class SGMLParser: self.handle_data(rawdata[i]) i = i+1 continue - i = match.end(0) + # This is some sort of declaration; in "HTML as + # deployed," this should only be the document type + # declaration (""). + k = self.parse_declaration(i) + if k < 0: break + i = k continue elif rawdata[i] == '&': match = charref.match(rawdata, i) @@ -162,7 +175,7 @@ class SGMLParser: if rawdata[i-1] != ';': i = i-1 continue else: - raise RuntimeError, 'neither < nor & ??' + raise SGMLParserError('neither < nor & ??') # We get here only if incomplete matches but # nothing else match = incomplete.match(rawdata, i) @@ -186,7 +199,7 @@ class SGMLParser: def parse_comment(self, i): rawdata = self.rawdata if rawdata[i:i+4] != '