]>
Commit | Line | Data |
---|---|---|
27e88dec | 1 | #!/usr/bin/env python3 |
53fae556 | 2 | |
03fc62ad WD |
3 | # This script takes a manpage written in markdown and turns it into an html web |
4 | # page and a nroff man page. The input file must have the name of the program | |
5 | # and the section in this format: NAME.NUM.md. The output files are written | |
6 | # into the current directory named NAME.NUM.html and NAME.NUM. The input | |
7 | # format has one extra extension: if a numbered list starts at 0, it is turned | |
8 | # into a description list. The dl's dt tag is taken from the contents of the | |
9 | # first tag inside the li, which is usually a p, code, or strong tag. The | |
10 | # cmarkgfm or commonmark lib is used to transforms the input file into html. | |
11 | # The html.parser is used as a state machine that both tweaks the html and | |
12 | # outputs the nroff data based on the html tags. | |
53fae556 WD |
13 | # |
14 | # Copyright (C) 2020 Wayne Davison | |
15 | # | |
16 | # This program is freely redistributable. | |
17 | ||
58e8ecf4 | 18 | import sys, os, re, argparse, subprocess, time |
53fae556 WD |
19 | from html.parser import HTMLParser |
20 | ||
21 | CONSUMES_TXT = set('h1 h2 p li pre'.split()) | |
22 | ||
23 | HTML_START = """\ | |
24 | <html><head> | |
25 | <title>%s</title> | |
03fc62ad | 26 | <link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet"> |
53fae556 WD |
27 | <style> |
28 | body { | |
03fc62ad | 29 | max-width: 50em; |
53fae556 | 30 | margin: auto; |
03fc62ad WD |
31 | } |
32 | body, b, strong, u { | |
53fae556 WD |
33 | font-family: 'Roboto', sans-serif; |
34 | } | |
03fc62ad WD |
35 | code { |
36 | font-family: 'Roboto Mono', monospace; | |
37 | font-weight: bold; | |
a93ffb1a | 38 | white-space: pre; |
03fc62ad WD |
39 | } |
40 | pre code { | |
41 | display: block; | |
42 | font-weight: normal; | |
43 | } | |
53fae556 | 44 | blockquote pre code { |
03fc62ad | 45 | background: #f1f1f1; |
53fae556 WD |
46 | } |
47 | dd p:first-of-type { | |
48 | margin-block-start: 0em; | |
49 | } | |
50 | </style> | |
51 | </head><body> | |
52 | """ | |
53 | ||
54 | HTML_END = """\ | |
55 | <div style="float: right"><p><i>%s</i></p></div> | |
56 | </body></html> | |
57 | """ | |
58 | ||
59 | MAN_START = r""" | |
03fc62ad | 60 | .TH "%s" "%s" "%s" "%s" "User Commands" |
53fae556 WD |
61 | """.lstrip() |
62 | ||
63 | MAN_END = """\ | |
64 | """ | |
65 | ||
66 | NORM_FONT = ('\1', r"\fP") | |
67 | BOLD_FONT = ('\2', r"\fB") | |
a93ffb1a WD |
68 | UNDR_FONT = ('\3', r"\fI") |
69 | NBR_DASH = ('\4', r"\-") | |
70 | NBR_SPACE = ('\xa0', r"\ ") | |
53fae556 | 71 | |
03fc62ad WD |
72 | md_parser = None |
73 | ||
53fae556 | 74 | def main(): |
53fae556 WD |
75 | fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$', args.mdfile) |
76 | if not fi: | |
77 | die('Failed to parse NAME.NUM.md out of input file:', args.mdfile) | |
78 | fi = argparse.Namespace(**fi.groupdict()) | |
6dc94e39 | 79 | |
53fae556 WD |
80 | if not fi.srcdir: |
81 | fi.srcdir = './' | |
82 | ||
6dc94e39 | 83 | fi.title = fi.prog + '(' + fi.sect + ') man page' |
111225a9 | 84 | fi.mtime = 0 |
6dc94e39 | 85 | |
19617f7b WD |
86 | git_dir = fi.srcdir + '.git' |
87 | if os.path.lexists(git_dir): | |
88 | fi.mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at'])) | |
58e8ecf4 | 89 | |
66bd4774 | 90 | env_subs = { 'prefix': os.environ.get('RSYNC_OVERRIDE_PREFIX', None) } |
53fae556 | 91 | |
111225a9 WD |
92 | if args.test: |
93 | env_subs['VERSION'] = '1.0.0' | |
94 | env_subs['libdir'] = '/usr' | |
95 | else: | |
19617f7b | 96 | for fn in (fi.srcdir + 'NEWS.md', 'Makefile'): |
111225a9 | 97 | try: |
19617f7b | 98 | st = os.lstat(fn) |
111225a9 WD |
99 | except: |
100 | die('Failed to find', fi.srcdir + fn) | |
101 | if not fi.mtime: | |
102 | fi.mtime = st.st_mtime | |
103 | ||
19617f7b | 104 | with open('Makefile', 'r', encoding='utf-8') as fh: |
111225a9 WD |
105 | for line in fh: |
106 | m = re.match(r'^(\w+)=(.+)', line) | |
107 | if not m: | |
108 | continue | |
07a3e1f9 | 109 | var, val = (m.group(1), m.group(2)) |
111225a9 WD |
110 | if var == 'prefix' and env_subs[var] is not None: |
111 | continue | |
112 | while re.search(r'\$\{', val): | |
07a3e1f9 | 113 | val = re.sub(r'\$\{(\w+)\}', lambda m: env_subs[m.group(1)], val) |
111225a9 WD |
114 | env_subs[var] = val |
115 | if var == 'VERSION': | |
116 | break | |
117 | ||
6dc94e39 | 118 | with open(fi.fn, 'r', encoding='utf-8') as fh: |
03fc62ad WD |
119 | txt = fh.read() |
120 | ||
121 | txt = re.sub(r'@VERSION@', env_subs['VERSION'], txt) | |
122 | txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt) | |
d90990d6 | 123 | |
03fc62ad WD |
124 | fi.html_in = md_parser(txt) |
125 | txt = None | |
126 | ||
d90990d6 | 127 | fi.date = time.strftime('%d %b %Y', time.localtime(fi.mtime)) |
03fc62ad | 128 | fi.man_headings = (fi.prog, fi.sect, fi.date, fi.prog + ' ' + env_subs['VERSION']) |
6dc94e39 WD |
129 | |
130 | HtmlToManPage(fi) | |
131 | ||
132 | if args.test: | |
133 | print("The test was successful.") | |
134 | return | |
135 | ||
68c865c9 WD |
136 | for fn, txt in ((fi.name + '.html', fi.html_out), (fi.name, fi.man_out)): |
137 | print("Wrote:", fn) | |
138 | with open(fn, 'w', encoding='utf-8') as fh: | |
139 | fh.write(txt) | |
53fae556 | 140 | |
ae82762c | 141 | |
03fc62ad WD |
142 | def html_via_cmarkgfm(txt): |
143 | return cmarkgfm.markdown_to_html(txt) | |
144 | ||
145 | ||
146 | def html_via_commonmark(txt): | |
147 | return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt)) | |
148 | ||
6dc94e39 WD |
149 | |
150 | class HtmlToManPage(HTMLParser): | |
151 | def __init__(self, fi): | |
53fae556 WD |
152 | HTMLParser.__init__(self, convert_charrefs=True) |
153 | ||
68c865c9 | 154 | st = self.state = argparse.Namespace( |
53fae556 WD |
155 | list_state = [ ], |
156 | p_macro = ".P\n", | |
6dc94e39 WD |
157 | at_first_tag_in_li = False, |
158 | at_first_tag_in_dd = False, | |
53fae556 WD |
159 | dt_from = None, |
160 | in_pre = False, | |
b65b6db3 | 161 | in_code = False, |
68c865c9 | 162 | html_out = [ HTML_START % fi.title ], |
03fc62ad | 163 | man_out = [ MAN_START % fi.man_headings ], |
53fae556 WD |
164 | txt = '', |
165 | ) | |
166 | ||
6dc94e39 WD |
167 | self.feed(fi.html_in) |
168 | fi.html_in = None | |
53fae556 | 169 | |
68c865c9 WD |
170 | st.html_out.append(HTML_END % fi.date) |
171 | st.man_out.append(MAN_END) | |
53fae556 | 172 | |
68c865c9 WD |
173 | fi.html_out = ''.join(st.html_out) |
174 | st.html_out = None | |
53fae556 | 175 | |
68c865c9 WD |
176 | fi.man_out = ''.join(st.man_out) |
177 | st.man_out = None | |
53fae556 | 178 | |
53fae556 WD |
179 | |
180 | def handle_starttag(self, tag, attrs_list): | |
181 | st = self.state | |
182 | if args.debug: | |
68c865c9 | 183 | self.output_debug('START', (tag, attrs_list)) |
6dc94e39 | 184 | if st.at_first_tag_in_li: |
53fae556 WD |
185 | if st.list_state[-1] == 'dl': |
186 | st.dt_from = tag | |
187 | if tag == 'p': | |
188 | tag = 'dt' | |
189 | else: | |
68c865c9 | 190 | st.html_out.append('<dt>') |
d80da9e6 WD |
191 | elif tag == 'p': |
192 | st.at_first_tag_in_dd = True # Kluge to suppress a .P at the start of an li. | |
6dc94e39 | 193 | st.at_first_tag_in_li = False |
53fae556 | 194 | if tag == 'p': |
6dc94e39 | 195 | if not st.at_first_tag_in_dd: |
68c865c9 | 196 | st.man_out.append(st.p_macro) |
53fae556 | 197 | elif tag == 'li': |
6dc94e39 | 198 | st.at_first_tag_in_li = True |
53fae556 WD |
199 | lstate = st.list_state[-1] |
200 | if lstate == 'dl': | |
201 | return | |
202 | if lstate == 'o': | |
68c865c9 | 203 | st.man_out.append(".IP o\n") |
53fae556 | 204 | else: |
68c865c9 | 205 | st.man_out.append(".IP " + str(lstate) + ".\n") |
53fae556 WD |
206 | st.list_state[-1] += 1 |
207 | elif tag == 'blockquote': | |
68c865c9 | 208 | st.man_out.append(".RS 4\n") |
53fae556 WD |
209 | elif tag == 'pre': |
210 | st.in_pre = True | |
68c865c9 | 211 | st.man_out.append(st.p_macro + ".nf\n") |
53fae556 | 212 | elif tag == 'code' and not st.in_pre: |
b65b6db3 | 213 | st.in_code = True |
53fae556 | 214 | st.txt += BOLD_FONT[0] |
03fc62ad | 215 | elif tag == 'strong' or tag == 'b': |
53fae556 | 216 | st.txt += BOLD_FONT[0] |
03fc62ad WD |
217 | elif tag == 'em' or tag == 'i': |
218 | tag = 'u' # Change it into underline to be more like the man page | |
a93ffb1a | 219 | st.txt += UNDR_FONT[0] |
53fae556 WD |
220 | elif tag == 'ol': |
221 | start = 1 | |
222 | for var, val in attrs_list: | |
223 | if var == 'start': | |
224 | start = int(val) # We only support integers. | |
225 | break | |
226 | if st.list_state: | |
68c865c9 | 227 | st.man_out.append(".RS\n") |
53fae556 WD |
228 | if start == 0: |
229 | tag = 'dl' | |
230 | attrs_list = [ ] | |
231 | st.list_state.append('dl') | |
232 | else: | |
233 | st.list_state.append(start) | |
68c865c9 | 234 | st.man_out.append(st.p_macro) |
53fae556 WD |
235 | st.p_macro = ".IP\n" |
236 | elif tag == 'ul': | |
68c865c9 | 237 | st.man_out.append(st.p_macro) |
53fae556 | 238 | if st.list_state: |
68c865c9 | 239 | st.man_out.append(".RS\n") |
53fae556 WD |
240 | st.p_macro = ".IP\n" |
241 | st.list_state.append('o') | |
ae82762c | 242 | st.html_out.append('<' + tag + ''.join(' ' + var + '="' + htmlify(val) + '"' for var, val in attrs_list) + '>') |
6dc94e39 WD |
243 | st.at_first_tag_in_dd = False |
244 | ||
53fae556 WD |
245 | |
246 | def handle_endtag(self, tag): | |
247 | st = self.state | |
248 | if args.debug: | |
68c865c9 | 249 | self.output_debug('END', (tag,)) |
53fae556 WD |
250 | if tag in CONSUMES_TXT or st.dt_from == tag: |
251 | txt = st.txt.strip() | |
252 | st.txt = '' | |
253 | else: | |
254 | txt = None | |
255 | add_to_txt = None | |
256 | if tag == 'h1': | |
68c865c9 WD |
257 | st.man_out.append(st.p_macro + '.SH "' + manify(txt) + '"\n') |
258 | elif tag == 'h2': | |
259 | st.man_out.append(st.p_macro + '.SS "' + manify(txt) + '"\n') | |
53fae556 WD |
260 | elif tag == 'p': |
261 | if st.dt_from == 'p': | |
262 | tag = 'dt' | |
68c865c9 | 263 | st.man_out.append('.IP "' + manify(txt) + '"\n') |
53fae556 | 264 | st.dt_from = None |
68c865c9 WD |
265 | elif txt != '': |
266 | st.man_out.append(manify(txt) + "\n") | |
53fae556 WD |
267 | elif tag == 'li': |
268 | if st.list_state[-1] == 'dl': | |
6dc94e39 | 269 | if st.at_first_tag_in_li: |
53fae556 WD |
270 | die("Invalid 0. -> td translation") |
271 | tag = 'dd' | |
272 | if txt != '': | |
68c865c9 | 273 | st.man_out.append(manify(txt) + "\n") |
6dc94e39 | 274 | st.at_first_tag_in_li = False |
53fae556 | 275 | elif tag == 'blockquote': |
68c865c9 | 276 | st.man_out.append(".RE\n") |
53fae556 WD |
277 | elif tag == 'pre': |
278 | st.in_pre = False | |
68c865c9 | 279 | st.man_out.append(manify(txt) + "\n.fi\n") |
b65b6db3 WD |
280 | elif (tag == 'code' and not st.in_pre): |
281 | st.in_code = False | |
282 | add_to_txt = NORM_FONT[0] | |
283 | elif tag == 'strong' or tag == 'b': | |
03fc62ad WD |
284 | add_to_txt = NORM_FONT[0] |
285 | elif tag == 'em' or tag == 'i': | |
286 | tag = 'u' # Change it into underline to be more like the man page | |
287 | add_to_txt = NORM_FONT[0] | |
53fae556 WD |
288 | elif tag == 'ol' or tag == 'ul': |
289 | if st.list_state.pop() == 'dl': | |
290 | tag = 'dl' | |
291 | if st.list_state: | |
68c865c9 | 292 | st.man_out.append(".RE\n") |
53fae556 WD |
293 | else: |
294 | st.p_macro = ".P\n" | |
6dc94e39 | 295 | st.at_first_tag_in_dd = False |
68c865c9 | 296 | st.html_out.append('</' + tag + '>') |
53fae556 WD |
297 | if add_to_txt: |
298 | if txt is None: | |
299 | st.txt += add_to_txt | |
300 | else: | |
301 | txt += add_to_txt | |
302 | if st.dt_from == tag: | |
68c865c9 WD |
303 | st.man_out.append('.IP "' + manify(txt) + '"\n') |
304 | st.html_out.append('</dt><dd>') | |
6dc94e39 | 305 | st.at_first_tag_in_dd = True |
53fae556 WD |
306 | st.dt_from = None |
307 | elif tag == 'dt': | |
68c865c9 | 308 | st.html_out.append('<dd>') |
6dc94e39 WD |
309 | st.at_first_tag_in_dd = True |
310 | ||
53fae556 | 311 | |
a93ffb1a | 312 | def handle_data(self, txt): |
53fae556 WD |
313 | st = self.state |
314 | if args.debug: | |
a93ffb1a WD |
315 | self.output_debug('DATA', (txt,)) |
316 | if st.in_pre: | |
317 | html = htmlify(txt) | |
318 | else: | |
319 | txt = re.sub(r'\s--(\s)', NBR_SPACE[0] + r'--\1', txt).replace('--', NBR_DASH[0]*2) | |
320 | txt = re.sub(r'(^|\W)-', r'\1' + NBR_DASH[0], txt) | |
321 | html = htmlify(txt) | |
322 | if st.in_code: | |
323 | txt = re.sub(r'\s', NBR_SPACE[0], txt) | |
324 | html = html.replace(NBR_DASH[0], '-').replace(NBR_SPACE[0], ' ') # <code> is non-breaking in CSS | |
325 | st.html_out.append(html.replace(NBR_SPACE[0], ' ').replace(NBR_DASH[0], '-⁠')) | |
326 | st.txt += txt | |
53fae556 WD |
327 | |
328 | ||
68c865c9 WD |
329 | def output_debug(self, event, extra): |
330 | import pprint | |
331 | st = self.state | |
332 | if args.debug < 2: | |
ae82762c | 333 | st = argparse.Namespace(**vars(st)) |
68c865c9 WD |
334 | if len(st.html_out) > 2: |
335 | st.html_out = ['...'] + st.html_out[-2:] | |
336 | if len(st.man_out) > 2: | |
337 | st.man_out = ['...'] + st.man_out[-2:] | |
338 | print(event, extra) | |
339 | pprint.PrettyPrinter(indent=2).pprint(vars(st)) | |
340 | ||
341 | ||
53fae556 WD |
342 | def manify(txt): |
343 | return re.sub(r"^(['.])", r'\&\1', txt.replace('\\', '\\\\') | |
a93ffb1a WD |
344 | .replace(NBR_SPACE[0], NBR_SPACE[1]) |
345 | .replace(NBR_DASH[0], NBR_DASH[1]) | |
53fae556 WD |
346 | .replace(NORM_FONT[0], NORM_FONT[1]) |
347 | .replace(BOLD_FONT[0], BOLD_FONT[1]) | |
a93ffb1a | 348 | .replace(UNDR_FONT[0], UNDR_FONT[1]), flags=re.M) |
53fae556 WD |
349 | |
350 | ||
ae82762c | 351 | def htmlify(txt): |
a93ffb1a | 352 | return txt.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') |
53fae556 WD |
353 | |
354 | ||
355 | def warn(*msg): | |
356 | print(*msg, file=sys.stderr) | |
357 | ||
358 | ||
359 | def die(*msg): | |
360 | warn(*msg) | |
361 | sys.exit(1) | |
362 | ||
363 | ||
364 | if __name__ == '__main__': | |
365 | parser = argparse.ArgumentParser(description='Transform a NAME.NUM.md markdown file into a NAME.NUM.html web page & a NAME.NUM man page.', add_help=False) | |
366 | parser.add_argument('--test', action='store_true', help='Test if we can parse the input w/o updating any files.') | |
ae82762c | 367 | parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing. Repeat for even more.') |
53fae556 WD |
368 | parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.") |
369 | parser.add_argument('mdfile', help="The NAME.NUM.md file to parse.") | |
370 | args = parser.parse_args() | |
371 | ||
372 | try: | |
373 | import cmarkgfm | |
03fc62ad | 374 | md_parser = html_via_cmarkgfm |
53fae556 | 375 | except: |
03fc62ad WD |
376 | try: |
377 | import commonmark | |
378 | md_parser = html_via_commonmark | |
379 | except: | |
380 | die("Failed to find cmarkgfm or commonmark for python3.") | |
53fae556 WD |
381 | |
382 | main() |