]> git.ipfire.org Git - people/ms/strongswan.git/blob - doc/utils/html2txt.sed
fc49409910b4ede67ebb149d7ab43c2004105c49
[people/ms/strongswan.git] / doc / utils / html2txt.sed
1 # skip over header material
2 # Copyright (C) 1999 Sandy Harris.
3 #
4 # This program is free software; you can redistribute it and/or modify it
5 # under the terms of the GNU General Public License as published by the
6 # Free Software Foundation; either version 2 of the License, or (at your
7 # option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
8 #
9 # This program is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # for more details.
13 #
14 # RCSID $Id: html2txt.sed,v 1.1 2004/03/15 20:35:24 as Exp $
15 /<head>/,/<\/head>/d
16 /<HEAD>/,/<\/HEAD>/d
17 /<^body$>/d
18 s/<body>//
19 # eliminate possible DOS crud
20 s/\015//
21 #get rid of HTML comments
22 s/<!--.*-->//
23 /<!--/,/-->/d
24 # citations & emphasis -> visible
25 s/<cite>/"/g
26 s/<\/cite>/"/g
27 s/<em>/*/g
28 s/<\/em>/*/g
29 s/<strong>/!->/g
30 s/<\/strong>/<-!/g
31 s/<b>//g
32 s/<\/b>//g
33 s/<blockquote>/Quote -->/
34 s/<\/blockquote>/<-- End Quote/
35 # mark headers
36 s/<h1>/Header 1: /
37 s/<h2>/Header 2: /
38 s/<h3>/Header 3: /
39 s/<h4>/Header 4: /
40 s/<h5>/Header 5: /
41 s/<h6>/Header 6: /
42 # remove some cruft
43 s/<\/h[1-6]>//
44 /^<a name=[a-zA-Z0-9\.]*>$/d
45 s/<a name=[a-zA-Z0-9\.]*>//
46 # definition lists
47 s/<dl>//
48 s/<\/dl>//
49 s/^<dt>$/-----------------------------------------/
50 s/^<dt>/-----------------------------------------\
51 /
52 s/<dd>/\
53 /
54 # other types of lists
55 s/<li>//
56 s/<ol>//
57 s/<ul>//
58 s/<\/ol>//
59 s/<\/ul>//
60 # tables
61 s/<table>//
62 s/<\/table>//
63 s/<tr>//
64 s/<td>/ /g
65 # line break and paragraph markers
66 # different subst depending where they are in line
67 s/^<br>//
68 s/<br>$//
69 s/<br>/\
70 /
71 s/^<p>$//
72 s/<p>$/\
73 /
74 s/^<p>/\
75 /
76 s/<p>/\
77 \
78 /
79 s/<\/p>//
80 # remove more cruft
81 s/<pre>//
82 s/<\/pre>//
83 s/<\/body>//
84 s/<\/html//
85 s/<\/BODY>//
86 s/<\/HTML>//