]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
major rewrite using different formatting paradigm
authorGuido van Rossum <guido@python.org>
Fri, 4 Aug 1995 04:23:30 +0000 (04:23 +0000)
committerGuido van Rossum <guido@python.org>
Fri, 4 Aug 1995 04:23:30 +0000 (04:23 +0000)
Lib/htmllib.py

index 10ca81063c7caa49776cfb6547f0c477799f105b..4af446a57d3876dbc4d9a610f311150b935378f8 100644 (file)
-# A parser for HTML documents
+# New HTML class
 
+# XXX Check against HTML 2.0 spec
 
-# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to
-# describe hypertext documents
-#
-# SGML: Standard Generalized Markup Language
-#
-# WWW: World-Wide Web; a distributed hypertext system develped at CERN
-#
-# CERN: European Particle Physics Laboratory in Geneva, Switzerland
+# XXX reorder methods according to hierarchy
+# - html structure: head, body, title, isindex
+# - headers
+# - lists, items
+# - paragraph styles
+# - forms
+# - character styles
+# - images
+# - bookkeeping
+# - output generation
 
 
-# This file is only concerned with parsing and formatting HTML
-# documents, not with the other (hypertext and networking) aspects of
-# the WWW project.  (It does support highlighting of anchors.)
-
-
-import os
 import sys
-import regex
+import regsub
 import string
-import sgmllib
-
-
-class HTMLParser(sgmllib.SGMLParser):
-
-       # Copy base class entities and add some
-       entitydefs = {}
-       for key in sgmllib.SGMLParser.entitydefs.keys():
-               entitydefs[key] = sgmllib.SGMLParser.entitydefs[key]
-       entitydefs['bullet'] = '*'
-
-       # Provided -- handlers for tags introducing literal text
-       
-       def start_listing(self, attrs):
-               self.setliteral('listing')
-               self.literal_bgn('listing', attrs)
-
-       def end_listing(self):
-               self.literal_end('listing')
-
-       def start_xmp(self, attrs):
-               self.setliteral('xmp')
-               self.literal_bgn('xmp', attrs)
-
-       def end_xmp(self):
-               self.literal_end('xmp')
-
-       def do_plaintext(self, attrs):
-               self.setnomoretags()
-               self.literal_bgn('plaintext', attrs)
-
-       # To be overridden -- begin/end literal mode
-       def literal_bgn(self, tag, attrs): pass
-       def literal_end(self, tag): pass
-
-
-# Next level of sophistication -- collect anchors, title, nextid and isindex
-class CollectingParser(HTMLParser):
-       #
-       def __init__(self):
-               HTMLParser.__init__(self)
-               self.savetext = None
-               self.nextid = []
-               self.isindex = 0
-               self.title = ''
-               self.inanchor = 0
-               self.anchors = []
-               self.anchornames = []
-               self.anchortypes = []
-       #
-       def start_a(self, attrs):
-               self.inanchor = 0
-               href = ''
-               name = ''
-               type = ''
-               for attrname, value in attrs:
-                       if attrname == 'href':
-                               href = value
-                       if attrname == 'name=':
-                               name = value
-                       if attrname == 'type=':
-                               type = string.lower(value)
-               if not (href or name):
-                       return
-               self.anchors.append(href)
-               self.anchornames.append(name)
-               self.anchortypes.append(type)
-               self.inanchor = len(self.anchors)
-               if not href:
-                       self.inanchor = -self.inanchor
-       #
-       def end_a(self):
-               if self.inanchor > 0:
-                       # Don't show anchors pointing into the current document
-                       if self.anchors[self.inanchor-1][:1] <> '#':
-                               self.handle_data('[' + `self.inanchor` + ']')
-               self.inanchor = 0
-       #
-       def start_html(self, attrs): pass
-       def end_html(self): pass
-       #
-       def start_head(self, attrs): pass
-       def end_head(self): pass
-       #
-       def start_body(self, attrs): pass
-       def end_body(self): pass
-       #
-       def do_nextid(self, attrs):
-               self.nextid = attrs
-       #
-       def do_isindex(self, attrs):
-               self.isindex = 1
-       #
-       def start_title(self, attrs):
-               self.savetext = ''
-       #
-       def end_title(self):
-               if self.savetext <> None:
-                       self.title = self.savetext
-                       self.savetext = None
-       #
-       def handle_data(self, text):
-               if self.savetext is not None:
-                       self.savetext = self.savetext + text
-
-
-# Formatting parser -- takes a formatter and a style sheet as arguments
-
-# XXX The use of style sheets should change: for each tag and end tag
-# there should be a style definition, and a style definition should
-# encompass many more parameters: font, justification, indentation,
-# vspace before, vspace after, hanging tag...
-
-wordprog = regex.compile('[^ \t\n]*')
-spaceprog = regex.compile('[ \t\n]*')
-
-class FormattingParser(CollectingParser):
-
-       def __init__(self, formatter, stylesheet):
-               CollectingParser.__init__(self)
-               self.fmt = formatter
-               self.stl = stylesheet
-               self.savetext = None
-               self.compact = 0
-               self.nofill = 0
-               self.resetfont()
-               self.setindent(self.stl.stdindent)
-
-       def resetfont(self):
-               self.fontstack = []
-               self.stylestack = []
-               self.fontset = self.stl.stdfontset
-               self.style = ROMAN
-               self.passfont()
-
-       def passfont(self):
-               font = self.fontset[self.style]
-               self.fmt.setfont(font)
-
-       def pushstyle(self, style):
-               self.stylestack.append(self.style)
-               self.style = min(style, len(self.fontset)-1)
-               self.passfont()
-
-       def popstyle(self):
-               self.style = self.stylestack[-1]
-               del self.stylestack[-1]
-               self.passfont()
-
-       def pushfontset(self, fontset, style):
-               self.fontstack.append(self.fontset)
-               self.fontset = fontset
-               self.pushstyle(style)
-
-       def popfontset(self):
-               self.fontset = self.fontstack[-1]
-               del self.fontstack[-1]
-               self.popstyle()
-
-       def flush(self):
-               self.fmt.flush()
-
-       def setindent(self, n):
-               self.fmt.setleftindent(n)
-
-       def needvspace(self, n):
-               self.fmt.needvspace(n)
-
-       def close(self):
-               HTMLParser.close(self)
-               self.fmt.flush()
-
-       def handle_literal(self, text):
-               lines = string.splitfields(text, '\n')
-               for i in range(1, len(lines)):
-                       lines[i] = string.expandtabs(lines[i], 8)
-               for line in lines[:-1]:
-                       self.fmt.addword(line, 0)
-                       self.fmt.flush()
-                       self.fmt.nospace = 0
-               for line in lines[-1:]:
-                       self.fmt.addword(line, 0)
-
-       def handle_data(self, text):
-               if self.savetext is not None:
-                       self.savetext = self.savetext + text
-                       return
-               if self.literal:
-                       self.handle_literal(text)
-                       return
-               i = 0
-               n = len(text)
-               while i < n:
-                       j = i + wordprog.match(text, i)
-                       word = text[i:j]
-                       i = j + spaceprog.match(text, j)
-                       self.fmt.addword(word, i-j)
-                       if self.nofill and '\n' in text[j:i]:
-                               self.fmt.flush()
-                               self.fmt.nospace = 0
-                               i = j+1
-                               while text[i-1] <> '\n': i = i+1
-
-       def literal_bgn(self, tag, attrs):
-               if tag == 'plaintext':
-                       self.flush()
-               else:
-                       self.needvspace(1)
-               self.pushfontset(self.stl.stdfontset, FIXED)
-               self.setindent(self.stl.literalindent)
-
-       def literal_end(self, tag):
-               self.needvspace(1)
-               self.popfontset()
-               self.setindent(self.stl.stdindent)
-
-       def start_title(self, attrs):
-               self.flush()
-               self.savetext = ''
-       # NB end_title is unchanged
-
-       def do_p(self, attrs):
-               if self.compact:
-                       self.flush()
-               else:
-                       self.needvspace(1)
-
-       def start_h1(self, attrs):
-               self.needvspace(2)
-               self.setindent(self.stl.h1indent)
-               self.pushfontset(self.stl.h1fontset, BOLD)
-               self.fmt.setjust('c')
-
-       def end_h1(self):
-               self.popfontset()
-               self.needvspace(2)
-               self.setindent(self.stl.stdindent)
-               self.fmt.setjust('l')
-
-       def start_h2(self, attrs):
-               self.needvspace(1)
-               self.setindent(self.stl.h2indent)
-               self.pushfontset(self.stl.h2fontset, BOLD)
-
-       def end_h2(self):
-               self.popfontset()
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
-
-       def start_h3(self, attrs):
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
-               self.pushfontset(self.stl.h3fontset, BOLD)
-
-       def end_h3(self):
-               self.popfontset()
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
-
-       def start_h4(self, attrs):
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
-               self.pushfontset(self.stl.stdfontset, BOLD)
-
-       def end_h4(self):
-               self.popfontset()
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
-
-       start_h5 = start_h4
-       end_h5 = end_h4
-
-       start_h6 = start_h5
-       end_h6 = end_h5
-
-       start_h7 = start_h6
-       end_h7 = end_h6
-
-       def start_ul(self, attrs):
-               self.needvspace(1)
-               for attrname, value in attrs:
-                       if attrname == 'compact':
-                               self.compact = 1
-                               self.setindent(0)
-                               break
-               else:
-                       self.setindent(self.stl.ulindent)
-
-       start_dir = start_menu = start_ol = start_ul
-
-       do_li = do_p
-
-       def end_ul(self):
-               self.compact = 0
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
-
-       end_dir = end_menu = end_ol = end_ul
-
-       def start_dl(self, attrs):
-               for attrname, value in attrs:
-                       if attrname == 'compact':
-                               self.compact = 1
-               self.needvspace(1)
-
-       def end_dl(self):
-               self.compact = 0
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
+from sgmllib import SGMLParser
 
-       def do_dt(self, attrs):
-               if self.compact:
-                       self.flush()
-               else:
-                       self.needvspace(1)
-               self.setindent(self.stl.stdindent)
 
-       def do_dd(self, attrs):
-               self.fmt.addword('', 1)
-               self.setindent(self.stl.ddindent)
+ROMAN = 0
+ITALIC = 1
+BOLD = 2
+FIXED = 3
 
-       def start_address(self, attrs):
-               self.compact = 1
-               self.needvspace(1)
-               self.fmt.setjust('r')
 
-       def end_address(self):
-               self.compact = 0
-               self.needvspace(1)
-               self.setindent(self.stl.stdindent)
-               self.fmt.setjust('l')
+class HTMLParser(SGMLParser):
+
+    def __init__(self):
+       SGMLParser.__init__(self)
+       self.savedata = None
+       self.isindex = 0
+       self.title = ''
+       self.para = None
+       self.lists = []
+       self.styles = []
+       self.nofill = 0
+       self.nospace = 1
+       self.softspace = 0
+
+    # --- Data
+
+    def handle_image(self, src, alt):
+       self.handle_data(alt)
+
+    def handle_data(self, data):
+       if self.nofill:
+           self.handle_literal(data)
+           return
+       data = regsub.gsub('[ \t\n\r]+', ' ', data)
+       if self.nospace and data[:1] == ' ': data = data[1:]
+       if not data: return
+       self.nospace = 0
+       if self.softspace and data[:1] != ' ': data = ' ' + data
+       if data[-1:] == ' ':
+           data = data[:-1]
+           self.softspace = 1
+       self.output_data(data)
 
-       def start_pre(self, attrs):
-               self.needvspace(1)
-               self.nofill = self.nofill + 1
-               self.pushstyle(FIXED)
+    def handle_literal(self, data):
+       self.nospace = 0
+       self.softspace = 0
+       self.output_data(data)
 
-       def end_pre(self):
-               self.popstyle()
-               self.nofill = self.nofill - 1
-               self.needvspace(1)
+    def output_data(self, data):
+       if self.savedata is not None:
+           self.savedata = self.savedata + data
+       else:
+           self.write_data(data)
 
-       start_typewriter = start_pre
-       end_typewriter = end_pre
+    def write_data(self, data):
+       sys.stdout.write(data)
 
-       def do_img(self, attrs):
-               self.fmt.addword('(image)', 0)
+    def save_bgn(self):
+       self.savedata = ''
+       self.nospace = 1
+       self.softspace = 0
 
-       # Physical styles
+    def save_end(self):
+       saved = self.savedata
+       self.savedata = None
+       self.nospace = 1
+       self.softspace = 0
+       return saved
+
+    def new_para(self):
+       pass
+
+    def new_style(self):
+       pass
+
+    # --- Generic style changes
+
+    def para_bgn(self, tag):
+       if not self.nospace:
+           self.handle_literal('\n')
+           self.nospace = 1
+           self.softspace = 0
+       if tag is not None:
+           self.para = tag
+       self.new_para()
+
+    def para_end(self):
+       self.para_bgn('')
+
+    def push_list(self, tag):
+       self.lists.append(tag)
+       self.para_bgn(None)
+
+    def pop_list(self):
+       del self.lists[-1]
+       self.para_end()
+
+    def literal_bgn(self, tag, attrs):
+       self.para_bgn(tag)
+
+    def literal_end(self, tag):
+       self.para_end()
 
-       def start_tt(self, attrs): self.pushstyle(FIXED)
-       def end_tt(self): self.popstyle()
+    def push_style(self, tag):
+       self.styles.append(tag)
+       self.new_style()
 
-       def start_b(self, attrs): self.pushstyle(BOLD)
-       def end_b(self): self.popstyle()
+    def pop_style(self):
+       del self.styles[-1]
+       self.new_style()
+
+    def anchor_bgn(self, href, name, type):
+       self.push_style(href and 'a' or None)
+
+    def anchor_end(self):
+       self.pop_style()
+
+    # --- Top level tags
 
-       def start_i(self, attrs): self.pushstyle(ITALIC)
-       def end_i(self): self.popstyle()
+    def start_html(self, attrs): pass
+    def end_html(self): pass
 
-       def start_u(self, attrs): self.pushstyle(ITALIC) # Underline???
-       def end_u(self): self.popstyle()
+    def start_head(self, attrs): pass
+    def end_head(self): pass
 
-       def start_r(self, attrs): self.pushstyle(ROMAN) # Not official
-       def end_r(self): self.popstyle()
+    def start_body(self, attrs): pass
+    def end_body(self): pass
 
-       # Logical styles
+    def do_isindex(self, attrs):
+       self.isindex = 1
 
-       start_em = start_i
-       end_em = end_i
+    def start_title(self, attrs):
+       self.save_bgn()
 
-       start_strong = start_b
-       end_strong = end_b
+    def end_title(self):
+       self.title = self.save_end()
 
-       start_code = start_tt
-       end_code = end_tt
+    # --- Old HTML 'literal text' tags
 
-       start_samp = start_tt
-       end_samp = end_tt
+    def start_listing(self, attrs):
+       self.setliteral('listing')
+       self.literal_bgn('listing', attrs)
 
-       start_kbd = start_tt
-       end_kbd = end_tt
+    def end_listing(self):
+       self.literal_end('listing')
 
-       start_file = start_tt # unofficial
-       end_file = end_tt
+    def start_xmp(self, attrs):
+       self.setliteral('xmp')
+       self.literal_bgn('xmp', attrs)
 
-       start_var = start_i
-       end_var = end_i
+    def end_xmp(self):
+       self.literal_end('xmp')
 
-       start_dfn = start_i
-       end_dfn = end_i
+    def do_plaintext(self, attrs):
+       self.setnomoretags()
+       self.literal_bgn('plaintext', attrs)
 
-       start_cite = start_i
-       end_cite = end_i
+    # --- Anchors
 
-       start_hp1 = start_i
-       end_hp1 = start_i
+    def start_a(self, attrs):
+       href = ''
+       name = ''
+       type = ''
+       for attrname, value in attrs:
+           if attrname == 'href':
+               href = value
+           if attrname == 'name':
+               name = value
+           if attrname == 'type':
+               type = string.lower(value)
+       if not (href or name):
+           return
+       self.anchor_bgn(href, name, type)
 
-       start_hp2 = start_b
-       end_hp2 = end_b
+    def end_a(self):
+       self.anchor_end()
 
-       def unknown_starttag(self, tag, attrs):
-               print '*** unknown <' + tag + '>'
+    # --- Paragraph tags
 
-       def unknown_endtag(self, tag):
-               print '*** unknown </' + tag + '>'
+    def do_p(self, attrs):
+       self.para_bgn(None)
 
+    def do_br(self, attrs):
+       self.handle_literal('\n')
+       self.nospace = 1
+       self.softspace = 0
 
-# An extension of the formatting parser which formats anchors differently.
-class AnchoringParser(FormattingParser):
+    def do_hr(self, attrs):
+       self.para_bgn(None)
+       self.handle_literal('-'*40)
+       self.para_end()
 
-       def start_a(self, attrs):
-               FormattingParser.start_a(self, attrs)
-               if self.inanchor:
-                       self.fmt.bgn_anchor(self.inanchor)
+    def start_h1(self, attrs):
+       self.para_bgn('h1')
 
-       def end_a(self):
-               if self.inanchor:
-                       self.fmt.end_anchor(self.inanchor)
-                       self.inanchor = 0
+    def start_h2(self, attrs):
+       self.para_bgn('h2')
 
+    def start_h3(self, attrs):
+       self.para_bgn('h3')
 
-# Style sheet -- this is never instantiated, but the attributes
-# of the class object itself are used to specify fonts to be used
-# for various paragraph styles.
-# A font set is a non-empty list of fonts, in the order:
-# [roman, italic, bold, fixed].
-# When a style is not available the nearest lower style is used
+    def start_h4(self, attrs):
+       self.para_bgn('h4')
 
-ROMAN = 0
-ITALIC = 1
-BOLD = 2
-FIXED = 3
+    def start_h5(self, attrs):
+       self.para_bgn('h5')
+
+    def start_h6(self, attrs):
+       self.para_bgn('h6')
+
+    def end_h1(self):
+       self.para_end()
+
+    end_h2 = end_h1
+    end_h3 = end_h2
+    end_h4 = end_h3
+    end_h5 = end_h4
+    end_h6 = end_h5
+
+    def start_ul(self, attrs):
+       self.para_bgn(None)
+       self.push_list('ul')
+
+    def start_ol(self, attrs):
+       self.para_bgn(None)
+       self.push_list('ol')
+
+    def end_ul(self):
+       self.pop_list()
+       self.para_end()
+
+    def do_li(self, attrs):
+       self.para_bgn('li%d' % len(self.lists))
+
+    start_dir = start_menu = start_ul
+    end_dir = end_menu = end_ol = end_ul
+
+    def start_dl(self, attrs):
+       self.para_bgn(None)
+       self.push_list('dl')
+
+    def end_dl(self):
+       self.pop_list()
+       self.para_end()
+
+    def do_dt(self, attrs):
+       self.para_bgn('dt%d' % len(self.lists))
+
+    def do_dd(self, attrs):
+       self.para_bgn('dd%d' % len(self.lists))
+
+    def start_address(self, attrs):
+       self.para_bgn('address')
+
+    def end_address(self):
+       self.para_end()
+
+    def start_pre(self, attrs):
+       self.para_bgn('pre')
+       self.nofill = self.nofill + 1
+
+    def end_pre(self):
+       self.nofill = self.nofill - 1
+       self.para_end()
+
+    start_typewriter = start_pre
+    end_typewriter = end_pre
+
+    def do_img(self, attrs):
+       src = ''
+       alt = ' (image) '
+       for attrname, value in attrs:
+           if attrname == 'alt':
+               alt = value
+           if attrname == 'src':
+               src = value
+       self.handle_image(src, alt)
+
+    # --- Character tags -- physical styles
+
+    def start_tt(self, attrs): self.push_style(FIXED)
+    def end_tt(self): self.pop_style()
+
+    def start_b(self, attrs): self.push_style(BOLD)
+    def end_b(self): self.pop_style()
+
+    def start_i(self, attrs): self.push_style(ITALIC)
+    def end_i(self): self.pop_style()
+
+    def start_u(self, attrs): self.push_style(ITALIC) # Underline???
+    def end_u(self): self.pop_style()
+
+    def start_r(self, attrs): self.push_style(ROMAN) # Not official
+    def end_r(self): self.pop_style()
+
+    # --- Charaacter tags -- logical styles
+
+    start_em = start_i
+    end_em = end_i
+
+    start_strong = start_b
+    end_strong = end_b
+
+    start_code = start_tt
+    end_code = end_tt
+
+    start_samp = start_tt
+    end_samp = end_tt
+
+    start_kbd = start_tt
+    end_kbd = end_tt
+
+    start_file = start_tt # unofficial
+    end_file = end_tt
+
+    start_var = start_i
+    end_var = end_i
+
+    start_dfn = start_i
+    end_dfn = end_i
+
+    start_cite = start_i
+    end_cite = end_i
+
+    start_hp1 = start_i
+    end_hp1 = start_i
+
+    start_hp2 = start_b
+    end_hp2 = end_b
+
+    # --- Form tags
+
+    def start_form(self, attrs):
+       self.para_bgn(None)
+
+    def end_form(self):
+       self.para_end()
+
+    # --- Unhandled tags
+
+    def unknown_starttag(self, tag, attrs):
+       pass
+
+    def unknown_endtag(self, tag):
+       pass
 
-class NullStylesheet:
-       # Fonts -- none
-       stdfontset = [None]
-       h1fontset = [None]
-       h2fontset = [None]
-       h3fontset = [None]
-       # Indents
-       stdindent = 2
-       ddindent = 25
-       ulindent = 4
-       h1indent = 0
-       h2indent = 0
-       literalindent = 0
-
-
-class X11Stylesheet(NullStylesheet):
-       stdfontset = [
-               '-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*',
-               '-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*',
-               '-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*',
-               '-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*',
-               ]
-       h1fontset = [
-               '-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*',
-               '-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*',
-               '-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*',
-               ]
-       h2fontset = [
-               '-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*',
-               '-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*',
-               '-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*',
-               ]
-       h3fontset = [
-               '-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*',
-               '-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*',
-               '-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*',
-               ]
-       ddindent = 40
-
-
-class MacStylesheet(NullStylesheet):
-       stdfontset = [
-               ('Geneva', 'p', 10),
-               ('Geneva', 'i', 10),
-               ('Geneva', 'b', 10),
-               ('Monaco', 'p', 10),
-               ]
-       h1fontset = [
-               ('Geneva', 'p', 18),
-               ('Geneva', 'i', 18),
-               ('Geneva', 'b', 18),
-               ('Monaco', 'p', 18),
-               ]
-       h3fontset = [
-               ('Geneva', 'p', 14),
-               ('Geneva', 'i', 14),
-               ('Geneva', 'b', 14),
-               ('Monaco', 'p', 14),
-               ]
-       h3fontset = [
-               ('Geneva', 'p', 12),
-               ('Geneva', 'i', 12),
-               ('Geneva', 'b', 12),
-               ('Monaco', 'p', 12),
-               ]
-
-
-if os.name == 'mac':
-       StdwinStylesheet = MacStylesheet
-else:
-       StdwinStylesheet = X11Stylesheet
-
-
-class GLStylesheet(NullStylesheet):
-       stdfontset = [
-               'Helvetica 10',
-               'Helvetica-Italic 10',
-               'Helvetica-Bold 10',
-               'Courier 10',
-               ]
-       h1fontset = [
-               'Helvetica 18',
-               'Helvetica-Italic 18',
-               'Helvetica-Bold 18',
-               'Courier 18',
-               ]
-       h2fontset = [
-               'Helvetica 14',
-               'Helvetica-Italic 14',
-               'Helvetica-Bold 14',
-               'Courier 14',
-               ]
-       h3fontset = [
-               'Helvetica 12',
-               'Helvetica-Italic 12',
-               'Helvetica-Bold 12',
-               'Courier 12',
-               ]
-
-
-# Test program -- produces no output but times how long it takes
-# to send a document to a null formatter, exclusive of I/O
 
 def test():
-       import fmt
-       import time
-       if sys.argv[1:]: file = sys.argv[1]
-       else: file = 'test.html'
-       data = open(file, 'r').read()
-       t0 = time.time()
-       fmtr = fmt.WritingFormatter(sys.stdout, 79)
-       p = FormattingParser(fmtr, NullStylesheet)
-       p.feed(data)
-       p.close()
-       t1 = time.time()
-       print
-       print '*** Formatting time:', round(t1-t0, 3), 'seconds.'
-
-
-# Test program using stdwin
-
-def testStdwin():
-       import stdwin, fmt
-       from stdwinevents import *
-       if sys.argv[1:]: file = sys.argv[1]
-       else: file = 'test.html'
-       data = open(file, 'r').read()
-       window = stdwin.open('testStdwin')
-       b = None
-       while 1:
-               etype, ewin, edetail = stdwin.getevent()
-               if etype == WE_CLOSE:
-                       break
-               if etype == WE_SIZE:
-                       window.setdocsize(0, 0)
-                       window.setorigin(0, 0)
-                       window.change((0, 0), (10000, 30000)) # XXX
-               if etype == WE_DRAW:
-                       if not b:
-                               b = fmt.StdwinBackEnd(window, 1)
-                               f = fmt.BaseFormatter(b.d, b)
-                               p = FormattingParser(f, MacStylesheet)
-                               p.feed(data)
-                               p.close()
-                               b.finish()
-                       else:
-                               b.redraw(edetail)
-       window.close()
-
-
-# Test program using GL
-
-def testGL():
-       import gl, GL, fmt
-       if sys.argv[1:]: file = sys.argv[1]
-       else: file = 'test.html'
-       data = open(file, 'r').read()
-       W, H = 600, 600
-       gl.foreground()
-       gl.prefsize(W, H)
-       wid = gl.winopen('testGL')
-       gl.ortho2(0, W, H, 0)
-       gl.color(GL.WHITE)
-       gl.clear()
-       gl.color(GL.BLACK)
-       b = fmt.GLBackEnd(wid)
-       f = fmt.BaseFormatter(b.d, b)
-       p = FormattingParser(f, GLStylesheet)
-       p.feed(data)
-       p.close()
-       b.finish()
-       #
-       import time
-       time.sleep(5)
+    file = 'test.html'
+    f = open(file, 'r')
+    data = f.read()
+    f.close()
+    p = HTMLParser()
+    p.feed(data)
+    p.close()
 
 
 if __name__ == '__main__':
-       test()
+    test()