]>
Commit | Line | Data |
---|---|---|
27e88dec | 1 | #!/usr/bin/env python3 |
53fae556 | 2 | |
a2b630c0 WD |
3 | # This script transforms markdown files into html and (optionally) nroff. The |
4 | # output files are written into the current directory named for the input file | |
5 | # without the .md suffix and either the .html suffix or no suffix. | |
53fae556 | 6 | # |
a2b630c0 WD |
7 | # If the input .md file has a section number at the end of the name (e.g., |
8 | # rsync.1.md) a nroff file is also output (PROJ.NUM.md -> PROJ.NUM). | |
ec8a05f6 | 9 | # |
a2b630c0 WD |
10 | # The markdown input format has one extra extension: if a numbered list starts |
11 | # at 0, it is turned into a description list. The dl's dt tag is taken from the | |
12 | # contents of the first tag inside the li, which is usually a p, code, or | |
13 | # strong tag. | |
14 | # | |
15 | # The cmarkgfm or commonmark lib is used to transforms the input file into | |
16 | # html. Then, the html.parser is used as a state machine that lets us tweak | |
17 | # the html and (optionally) output nroff data based on the html tags. | |
18 | # | |
19 | # If the string @USE_GFM_PARSER@ exists in the file, the string is removed and | |
20 | # a github-flavored-markup parser is used to parse the file. | |
21 | # | |
22 | # The man-page .md files also get the vars @VERSION@, @BINDIR@, and @LIBDIR@ | |
23 | # substituted. Some of these values depend on the Makefile $(prefix) (see the | |
24 | # generated Makefile). If the maintainer wants to build files for /usr/local | |
25 | # while creating release-ready man-page files for /usr, use the environment to | |
26 | # set RSYNC_OVERRIDE_PREFIX=/usr. | |
27 | ||
28 | # Copyright (C) 2020 - 2021 Wayne Davison | |
53fae556 WD |
29 | # |
30 | # This program is freely redistributable. | |
31 | ||
a2b630c0 | 32 | import os, sys, re, argparse, subprocess, time |
53fae556 WD |
33 | from html.parser import HTMLParser |
34 | ||
d07272d6 WD |
35 | VALID_PAGES = 'README INSTALL COPYING rsync.1 rrsync.1 rsync-ssl.1 rsyncd.conf.5'.split() |
36 | ||
e841944b | 37 | CONSUMES_TXT = set('h1 h2 h3 p li pre'.split()) |
53fae556 WD |
38 | |
39 | HTML_START = """\ | |
40 | <html><head> | |
8898aecb WD |
41 | <title>%TITLE%</title> |
42 | <meta charset="UTF-8"/> | |
03fc62ad | 43 | <link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet"> |
53fae556 WD |
44 | <style> |
45 | body { | |
03fc62ad | 46 | max-width: 50em; |
53fae556 | 47 | margin: auto; |
03fc62ad WD |
48 | } |
49 | body, b, strong, u { | |
53fae556 WD |
50 | font-family: 'Roboto', sans-serif; |
51 | } | |
38ffa522 | 52 | a.tgt { font-face: symbol; font-weight: 400; font-size: 70%; visibility: hidden; text-decoration: none; color: #ddd; padding: 0 4px; border: 0; vertical-align: top; } |
8898aecb WD |
53 | a.tgt:after { content: '🔗'; } |
54 | a.tgt:hover { color: #444; background-color: #eaeaea; } | |
55 | h1:hover > a.tgt, h2:hover > a.tgt, h3:hover > a.tgt, dt:hover > a.tgt { visibility: visible; } | |
03fc62ad WD |
56 | code { |
57 | font-family: 'Roboto Mono', monospace; | |
58 | font-weight: bold; | |
a93ffb1a | 59 | white-space: pre; |
03fc62ad WD |
60 | } |
61 | pre code { | |
62 | display: block; | |
63 | font-weight: normal; | |
64 | } | |
53fae556 | 65 | blockquote pre code { |
03fc62ad | 66 | background: #f1f1f1; |
53fae556 WD |
67 | } |
68 | dd p:first-of-type { | |
69 | margin-block-start: 0em; | |
70 | } | |
71 | </style> | |
72 | </head><body> | |
73 | """ | |
74 | ||
a2b630c0 WD |
75 | TABLE_STYLE = """\ |
76 | table { | |
77 | border-color: grey; | |
78 | border-spacing: 0; | |
79 | } | |
80 | tr { | |
81 | border-top: 1px solid grey; | |
82 | } | |
83 | tr:nth-child(2n) { | |
84 | background-color: #f6f8fa; | |
85 | } | |
86 | th, td { | |
87 | border: 1px solid #dfe2e5; | |
88 | text-align: center; | |
89 | padding-left: 1em; | |
90 | padding-right: 1em; | |
91 | } | |
92 | """ | |
93 | ||
94 | MAN_HTML_END = """\ | |
53fae556 | 95 | <div style="float: right"><p><i>%s</i></p></div> |
a2b630c0 WD |
96 | """ |
97 | ||
98 | HTML_END = """\ | |
53fae556 WD |
99 | </body></html> |
100 | """ | |
101 | ||
102 | MAN_START = r""" | |
03fc62ad | 103 | .TH "%s" "%s" "%s" "%s" "User Commands" |
ec8a05f6 | 104 | .\" prefix=%s |
53fae556 WD |
105 | """.lstrip() |
106 | ||
107 | MAN_END = """\ | |
108 | """ | |
109 | ||
110 | NORM_FONT = ('\1', r"\fP") | |
111 | BOLD_FONT = ('\2', r"\fB") | |
a93ffb1a WD |
112 | UNDR_FONT = ('\3', r"\fI") |
113 | NBR_DASH = ('\4', r"\-") | |
114 | NBR_SPACE = ('\xa0', r"\ ") | |
53fae556 | 115 | |
7e94e521 WD |
116 | FILENAME_RE = re.compile(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+?)(\.(?P<sect>\d+))?)\.md)$') |
117 | ASSIGNMENT_RE = re.compile(r'^(\w+)=(.+)') | |
118 | QUOTED_RE = re.compile(r'"(.+?)"') | |
119 | VAR_REF_RE = re.compile(r'\$\{(\w+)\}') | |
120 | VERSION_RE = re.compile(r' (\d[.\d]+)[, ]') | |
121 | BIN_CHARS_RE = re.compile(r'[\1-\7]+') | |
122 | SPACE_DOUBLE_DASH_RE = re.compile(r'\s--(\s)') | |
123 | NON_SPACE_SINGLE_DASH_RE = re.compile(r'(^|\W)-') | |
124 | WHITESPACE_RE = re.compile(r'\s') | |
d07272d6 | 125 | CODE_BLOCK_RE = re.compile(r'[%s]([^=%s]+)[=%s]' % (BOLD_FONT[0], NORM_FONT[0], NORM_FONT[0])) |
7e94e521 WD |
126 | NBR_DASH_RE = re.compile(r'[%s]' % NBR_DASH[0]) |
127 | INVALID_TARGET_CHARS_RE = re.compile(r'[^-A-Za-z0-9._]') | |
128 | INVALID_START_CHAR_RE = re.compile(r'^([^A-Za-z0-9])') | |
129 | MANIFY_LINESTART_RE = re.compile(r"^(['.])", flags=re.M) | |
130 | ||
03fc62ad | 131 | md_parser = None |
a2b630c0 | 132 | env_subs = { } |
03fc62ad | 133 | |
995ce719 WD |
134 | warning_count = 0 |
135 | ||
53fae556 | 136 | def main(): |
a2b630c0 WD |
137 | for mdfn in args.mdfiles: |
138 | parse_md_file(mdfn) | |
139 | ||
140 | if args.test: | |
141 | print("The test was successful.") | |
142 | ||
143 | ||
144 | def parse_md_file(mdfn): | |
7e94e521 | 145 | fi = FILENAME_RE.match(mdfn) |
53fae556 | 146 | if not fi: |
a2b630c0 | 147 | die('Failed to parse a md input file name:', mdfn) |
53fae556 | 148 | fi = argparse.Namespace(**fi.groupdict()) |
a2b630c0 WD |
149 | fi.want_manpage = not not fi.sect |
150 | if fi.want_manpage: | |
151 | fi.title = fi.prog + '(' + fi.sect + ') man page' | |
152 | else: | |
d07272d6 | 153 | fi.title = fi.prog + ' for rsync' |
a2b630c0 WD |
154 | |
155 | if fi.want_manpage: | |
156 | if not env_subs: | |
157 | find_man_substitutions() | |
158 | prog_ver = 'rsync ' + env_subs['VERSION'] | |
159 | if fi.prog != 'rsync': | |
160 | prog_ver = fi.prog + ' from ' + prog_ver | |
161 | fi.man_headings = (fi.prog, fi.sect, env_subs['date'], prog_ver, env_subs['prefix']) | |
162 | ||
163 | with open(mdfn, 'r', encoding='utf-8') as fh: | |
164 | txt = fh.read() | |
165 | ||
166 | use_gfm_parser = '@USE_GFM_PARSER@' in txt | |
167 | if use_gfm_parser: | |
168 | txt = txt.replace('@USE_GFM_PARSER@', '') | |
169 | ||
170 | if fi.want_manpage: | |
171 | txt = (txt.replace('@VERSION@', env_subs['VERSION']) | |
172 | .replace('@BINDIR@', env_subs['bindir']) | |
173 | .replace('@LIBDIR@', env_subs['libdir'])) | |
174 | ||
175 | if use_gfm_parser: | |
176 | if not gfm_parser: | |
177 | die('Input file requires cmarkgfm parser:', mdfn) | |
178 | fi.html_in = gfm_parser(txt) | |
179 | else: | |
180 | fi.html_in = md_parser(txt) | |
181 | txt = None | |
182 | ||
183 | TransformHtml(fi) | |
184 | ||
185 | if args.test: | |
186 | return | |
187 | ||
188 | output_list = [ (fi.name + '.html', fi.html_out) ] | |
189 | if fi.want_manpage: | |
190 | output_list += [ (fi.name, fi.man_out) ] | |
191 | for fn, txt in output_list: | |
d2cc1149 WD |
192 | if args.dest and args.dest != '.': |
193 | fn = os.path.join(args.dest, fn) | |
a2b630c0 WD |
194 | if os.path.lexists(fn): |
195 | os.unlink(fn) | |
196 | print("Wrote:", fn) | |
197 | with open(fn, 'w', encoding='utf-8') as fh: | |
198 | fh.write(txt) | |
6dc94e39 | 199 | |
53fae556 | 200 | |
a2b630c0 WD |
201 | def find_man_substitutions(): |
202 | srcdir = os.path.dirname(sys.argv[0]) + '/' | |
203 | mtime = 0 | |
6dc94e39 | 204 | |
a2b630c0 | 205 | git_dir = srcdir + '.git' |
19617f7b | 206 | if os.path.lexists(git_dir): |
a2b630c0 | 207 | mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at'])) |
58e8ecf4 | 208 | |
a2b630c0 WD |
209 | # Allow "prefix" to be overridden via the environment: |
210 | env_subs['prefix'] = os.environ.get('RSYNC_OVERRIDE_PREFIX', None) | |
53fae556 | 211 | |
111225a9 WD |
212 | if args.test: |
213 | env_subs['VERSION'] = '1.0.0' | |
ec8a05f6 WD |
214 | env_subs['bindir'] = '/usr/bin' |
215 | env_subs['libdir'] = '/usr/lib/rsync' | |
111225a9 | 216 | else: |
a2b630c0 | 217 | for fn in (srcdir + 'version.h', 'Makefile'): |
111225a9 | 218 | try: |
19617f7b | 219 | st = os.lstat(fn) |
72adf49b | 220 | except OSError: |
a2b630c0 WD |
221 | die('Failed to find', srcdir + fn) |
222 | if not mtime: | |
223 | mtime = st.st_mtime | |
111225a9 | 224 | |
a2b630c0 | 225 | with open(srcdir + 'version.h', 'r', encoding='utf-8') as fh: |
3b4f5fb8 | 226 | txt = fh.read() |
7e94e521 | 227 | m = QUOTED_RE.search(txt) |
3b4f5fb8 WD |
228 | env_subs['VERSION'] = m.group(1) |
229 | ||
19617f7b | 230 | with open('Makefile', 'r', encoding='utf-8') as fh: |
111225a9 | 231 | for line in fh: |
7e94e521 | 232 | m = ASSIGNMENT_RE.match(line) |
111225a9 WD |
233 | if not m: |
234 | continue | |
07a3e1f9 | 235 | var, val = (m.group(1), m.group(2)) |
111225a9 WD |
236 | if var == 'prefix' and env_subs[var] is not None: |
237 | continue | |
7e94e521 WD |
238 | while VAR_REF_RE.search(val): |
239 | val = VAR_REF_RE.sub(lambda m: env_subs[m.group(1)], val) | |
111225a9 | 240 | env_subs[var] = val |
3b4f5fb8 | 241 | if var == 'srcdir': |
111225a9 WD |
242 | break |
243 | ||
a2b630c0 | 244 | env_subs['date'] = time.strftime('%d %b %Y', time.localtime(mtime)) |
53fae556 | 245 | |
ae82762c | 246 | |
03fc62ad WD |
247 | def html_via_commonmark(txt): |
248 | return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt)) | |
249 | ||
6dc94e39 | 250 | |
a2b630c0 | 251 | class TransformHtml(HTMLParser): |
6dc94e39 | 252 | def __init__(self, fi): |
53fae556 WD |
253 | HTMLParser.__init__(self, convert_charrefs=True) |
254 | ||
995ce719 WD |
255 | self.fn = fi.fn |
256 | ||
68c865c9 | 257 | st = self.state = argparse.Namespace( |
53fae556 WD |
258 | list_state = [ ], |
259 | p_macro = ".P\n", | |
6dc94e39 WD |
260 | at_first_tag_in_li = False, |
261 | at_first_tag_in_dd = False, | |
53fae556 WD |
262 | dt_from = None, |
263 | in_pre = False, | |
b65b6db3 | 264 | in_code = False, |
8898aecb | 265 | html_out = [ HTML_START.replace('%TITLE%', fi.title) ], |
a2b630c0 | 266 | man_out = [ ], |
53fae556 | 267 | txt = '', |
a2b630c0 | 268 | want_manpage = fi.want_manpage, |
995ce719 WD |
269 | created_hashtags = set(), |
270 | derived_hashtags = set(), | |
271 | referenced_hashtags = set(), | |
272 | bad_hashtags = set(), | |
d07272d6 | 273 | latest_targets = [ ], |
995ce719 WD |
274 | opt_prefix = 'opt', |
275 | a_txt_start = None, | |
7e94e521 | 276 | target_suf = '', |
53fae556 WD |
277 | ) |
278 | ||
a2b630c0 WD |
279 | if st.want_manpage: |
280 | st.man_out.append(MAN_START % fi.man_headings) | |
281 | ||
282 | if '</table>' in fi.html_in: | |
283 | st.html_out[0] = st.html_out[0].replace('</style>', TABLE_STYLE + '</style>') | |
284 | ||
6dc94e39 WD |
285 | self.feed(fi.html_in) |
286 | fi.html_in = None | |
53fae556 | 287 | |
a2b630c0 WD |
288 | if st.want_manpage: |
289 | st.html_out.append(MAN_HTML_END % env_subs['date']) | |
290 | st.html_out.append(HTML_END) | |
68c865c9 | 291 | st.man_out.append(MAN_END) |
53fae556 | 292 | |
68c865c9 WD |
293 | fi.html_out = ''.join(st.html_out) |
294 | st.html_out = None | |
53fae556 | 295 | |
68c865c9 WD |
296 | fi.man_out = ''.join(st.man_out) |
297 | st.man_out = None | |
53fae556 | 298 | |
d07272d6 WD |
299 | for tgt, txt in st.derived_hashtags: |
300 | derived = txt2target(txt, tgt) | |
995ce719 | 301 | if derived not in st.created_hashtags: |
7e94e521 | 302 | txt = BIN_CHARS_RE.sub('', txt.replace(NBR_DASH[0], '-').replace(NBR_SPACE[0], ' ')) |
d07272d6 | 303 | warn('Unknown derived hashtag link in', self.fn, 'based on:', (tgt, txt)) |
995ce719 WD |
304 | |
305 | for bad in st.bad_hashtags: | |
306 | if bad in st.created_hashtags: | |
307 | warn('Missing "#" in hashtag link in', self.fn + ':', bad) | |
308 | else: | |
309 | warn('Unknown non-hashtag link in', self.fn + ':', bad) | |
310 | ||
311 | for bad in st.referenced_hashtags - st.created_hashtags: | |
312 | warn('Unknown hashtag link in', self.fn + ':', '#' + bad) | |
53fae556 WD |
313 | |
314 | def handle_starttag(self, tag, attrs_list): | |
315 | st = self.state | |
316 | if args.debug: | |
68c865c9 | 317 | self.output_debug('START', (tag, attrs_list)) |
6dc94e39 | 318 | if st.at_first_tag_in_li: |
53fae556 WD |
319 | if st.list_state[-1] == 'dl': |
320 | st.dt_from = tag | |
321 | if tag == 'p': | |
322 | tag = 'dt' | |
323 | else: | |
68c865c9 | 324 | st.html_out.append('<dt>') |
d80da9e6 WD |
325 | elif tag == 'p': |
326 | st.at_first_tag_in_dd = True # Kluge to suppress a .P at the start of an li. | |
6dc94e39 | 327 | st.at_first_tag_in_li = False |
53fae556 | 328 | if tag == 'p': |
6dc94e39 | 329 | if not st.at_first_tag_in_dd: |
68c865c9 | 330 | st.man_out.append(st.p_macro) |
53fae556 | 331 | elif tag == 'li': |
6dc94e39 | 332 | st.at_first_tag_in_li = True |
53fae556 WD |
333 | lstate = st.list_state[-1] |
334 | if lstate == 'dl': | |
335 | return | |
336 | if lstate == 'o': | |
68c865c9 | 337 | st.man_out.append(".IP o\n") |
53fae556 | 338 | else: |
68c865c9 | 339 | st.man_out.append(".IP " + str(lstate) + ".\n") |
53fae556 WD |
340 | st.list_state[-1] += 1 |
341 | elif tag == 'blockquote': | |
68c865c9 | 342 | st.man_out.append(".RS 4\n") |
53fae556 WD |
343 | elif tag == 'pre': |
344 | st.in_pre = True | |
68c865c9 | 345 | st.man_out.append(st.p_macro + ".nf\n") |
53fae556 | 346 | elif tag == 'code' and not st.in_pre: |
b65b6db3 | 347 | st.in_code = True |
53fae556 | 348 | st.txt += BOLD_FONT[0] |
03fc62ad | 349 | elif tag == 'strong' or tag == 'b': |
53fae556 | 350 | st.txt += BOLD_FONT[0] |
03fc62ad | 351 | elif tag == 'em' or tag == 'i': |
a2b630c0 WD |
352 | if st.want_manpage: |
353 | tag = 'u' # Change it into underline to be more like the man page | |
354 | st.txt += UNDR_FONT[0] | |
53fae556 WD |
355 | elif tag == 'ol': |
356 | start = 1 | |
357 | for var, val in attrs_list: | |
358 | if var == 'start': | |
359 | start = int(val) # We only support integers. | |
360 | break | |
361 | if st.list_state: | |
68c865c9 | 362 | st.man_out.append(".RS\n") |
53fae556 WD |
363 | if start == 0: |
364 | tag = 'dl' | |
365 | attrs_list = [ ] | |
366 | st.list_state.append('dl') | |
367 | else: | |
368 | st.list_state.append(start) | |
68c865c9 | 369 | st.man_out.append(st.p_macro) |
53fae556 WD |
370 | st.p_macro = ".IP\n" |
371 | elif tag == 'ul': | |
68c865c9 | 372 | st.man_out.append(st.p_macro) |
53fae556 | 373 | if st.list_state: |
68c865c9 | 374 | st.man_out.append(".RS\n") |
53fae556 WD |
375 | st.p_macro = ".IP\n" |
376 | st.list_state.append('o') | |
a2b630c0 WD |
377 | elif tag == 'hr': |
378 | st.man_out.append(".l\n") | |
379 | st.html_out.append("<hr />") | |
380 | return | |
995ce719 WD |
381 | elif tag == 'a': |
382 | st.a_href = None | |
383 | for var, val in attrs_list: | |
384 | if var == 'href': | |
d07272d6 WD |
385 | if val.startswith(('https://', 'http://', 'mailto:', 'ftp:')): |
386 | pass # nothing to check | |
387 | elif '#' in val: | |
388 | pg, tgt = val.split('#', 2) | |
389 | if pg and pg not in VALID_PAGES or '#' in tgt: | |
390 | st.bad_hashtags.add(val) | |
391 | elif tgt in ('', 'opt', 'dopt'): | |
392 | st.a_href = val | |
393 | elif pg == '': | |
394 | st.referenced_hashtags.add(tgt) | |
395 | if tgt in st.latest_targets: | |
396 | warn('Found link to the current section in', self.fn + ':', val) | |
397 | elif val not in VALID_PAGES: | |
995ce719 WD |
398 | st.bad_hashtags.add(val) |
399 | st.a_txt_start = len(st.txt) | |
ae82762c | 400 | st.html_out.append('<' + tag + ''.join(' ' + var + '="' + htmlify(val) + '"' for var, val in attrs_list) + '>') |
6dc94e39 WD |
401 | st.at_first_tag_in_dd = False |
402 | ||
53fae556 WD |
403 | |
404 | def handle_endtag(self, tag): | |
405 | st = self.state | |
406 | if args.debug: | |
68c865c9 | 407 | self.output_debug('END', (tag,)) |
53fae556 WD |
408 | if tag in CONSUMES_TXT or st.dt_from == tag: |
409 | txt = st.txt.strip() | |
410 | st.txt = '' | |
411 | else: | |
412 | txt = None | |
413 | add_to_txt = None | |
7e94e521 WD |
414 | if tag == 'h1': |
415 | tgt = txt | |
416 | target_suf = '' | |
417 | if tgt.startswith('NEWS for '): | |
418 | m = VERSION_RE.search(tgt) | |
419 | if m: | |
420 | tgt = m.group(1) | |
421 | st.target_suf = '-' + tgt | |
d07272d6 | 422 | self.add_targets(tgt) |
7e94e521 | 423 | elif tag == 'h2': |
68c865c9 | 424 | st.man_out.append(st.p_macro + '.SH "' + manify(txt) + '"\n') |
d07272d6 WD |
425 | self.add_targets(txt, st.target_suf) |
426 | st.opt_prefix = 'dopt' if txt == 'DAEMON OPTIONS' else 'opt' | |
e841944b | 427 | elif tag == 'h3': |
68c865c9 | 428 | st.man_out.append(st.p_macro + '.SS "' + manify(txt) + '"\n') |
d07272d6 | 429 | self.add_targets(txt, st.target_suf) |
53fae556 WD |
430 | elif tag == 'p': |
431 | if st.dt_from == 'p': | |
432 | tag = 'dt' | |
68c865c9 | 433 | st.man_out.append('.IP "' + manify(txt) + '"\n') |
f08505e9 | 434 | if txt.startswith(BOLD_FONT[0]): |
d07272d6 | 435 | self.add_targets(txt) |
53fae556 | 436 | st.dt_from = None |
68c865c9 WD |
437 | elif txt != '': |
438 | st.man_out.append(manify(txt) + "\n") | |
53fae556 WD |
439 | elif tag == 'li': |
440 | if st.list_state[-1] == 'dl': | |
6dc94e39 | 441 | if st.at_first_tag_in_li: |
53fae556 WD |
442 | die("Invalid 0. -> td translation") |
443 | tag = 'dd' | |
444 | if txt != '': | |
68c865c9 | 445 | st.man_out.append(manify(txt) + "\n") |
6dc94e39 | 446 | st.at_first_tag_in_li = False |
53fae556 | 447 | elif tag == 'blockquote': |
68c865c9 | 448 | st.man_out.append(".RE\n") |
53fae556 WD |
449 | elif tag == 'pre': |
450 | st.in_pre = False | |
68c865c9 | 451 | st.man_out.append(manify(txt) + "\n.fi\n") |
b65b6db3 WD |
452 | elif (tag == 'code' and not st.in_pre): |
453 | st.in_code = False | |
454 | add_to_txt = NORM_FONT[0] | |
455 | elif tag == 'strong' or tag == 'b': | |
03fc62ad WD |
456 | add_to_txt = NORM_FONT[0] |
457 | elif tag == 'em' or tag == 'i': | |
a2b630c0 WD |
458 | if st.want_manpage: |
459 | tag = 'u' # Change it into underline to be more like the man page | |
460 | add_to_txt = NORM_FONT[0] | |
53fae556 WD |
461 | elif tag == 'ol' or tag == 'ul': |
462 | if st.list_state.pop() == 'dl': | |
463 | tag = 'dl' | |
464 | if st.list_state: | |
68c865c9 | 465 | st.man_out.append(".RE\n") |
53fae556 WD |
466 | else: |
467 | st.p_macro = ".P\n" | |
6dc94e39 | 468 | st.at_first_tag_in_dd = False |
a2b630c0 WD |
469 | elif tag == 'hr': |
470 | return | |
995ce719 WD |
471 | elif tag == 'a': |
472 | if st.a_href: | |
473 | atxt = st.txt[st.a_txt_start:] | |
474 | find = 'href="' + st.a_href + '"' | |
475 | for j in range(len(st.html_out)-1, 0, -1): | |
476 | if find in st.html_out[j]: | |
d07272d6 WD |
477 | pg, tgt = st.a_href.split('#', 2) |
478 | derived = txt2target(atxt, tgt) | |
479 | if pg == '': | |
480 | if derived in st.latest_targets: | |
481 | warn('Found link to the current section in', self.fn + ':', st.a_href) | |
482 | st.derived_hashtags.add((tgt, atxt)) | |
483 | st.html_out[j] = st.html_out[j].replace(find, 'href="' + pg + '#' + derived + '"') | |
995ce719 WD |
484 | break |
485 | else: | |
486 | die('INTERNAL ERROR: failed to find href in html data:', find) | |
68c865c9 | 487 | st.html_out.append('</' + tag + '>') |
53fae556 WD |
488 | if add_to_txt: |
489 | if txt is None: | |
490 | st.txt += add_to_txt | |
491 | else: | |
492 | txt += add_to_txt | |
493 | if st.dt_from == tag: | |
68c865c9 WD |
494 | st.man_out.append('.IP "' + manify(txt) + '"\n') |
495 | st.html_out.append('</dt><dd>') | |
6dc94e39 | 496 | st.at_first_tag_in_dd = True |
53fae556 WD |
497 | st.dt_from = None |
498 | elif tag == 'dt': | |
68c865c9 | 499 | st.html_out.append('<dd>') |
6dc94e39 WD |
500 | st.at_first_tag_in_dd = True |
501 | ||
53fae556 | 502 | |
a93ffb1a | 503 | def handle_data(self, txt): |
53fae556 | 504 | st = self.state |
995ce719 WD |
505 | if '](' in txt: |
506 | warn('Malformed link in', self.fn + ':', txt) | |
53fae556 | 507 | if args.debug: |
a93ffb1a WD |
508 | self.output_debug('DATA', (txt,)) |
509 | if st.in_pre: | |
510 | html = htmlify(txt) | |
511 | else: | |
7e94e521 WD |
512 | txt = SPACE_DOUBLE_DASH_RE.sub(NBR_SPACE[0] + r'--\1', txt).replace('--', NBR_DASH[0]*2) |
513 | txt = NON_SPACE_SINGLE_DASH_RE.sub(r'\1' + NBR_DASH[0], txt) | |
a93ffb1a WD |
514 | html = htmlify(txt) |
515 | if st.in_code: | |
7e94e521 | 516 | txt = WHITESPACE_RE.sub(NBR_SPACE[0], txt) |
a93ffb1a WD |
517 | html = html.replace(NBR_DASH[0], '-').replace(NBR_SPACE[0], ' ') # <code> is non-breaking in CSS |
518 | st.html_out.append(html.replace(NBR_SPACE[0], ' ').replace(NBR_DASH[0], '-⁠')) | |
519 | st.txt += txt | |
53fae556 WD |
520 | |
521 | ||
d07272d6 | 522 | def add_targets(self, txt, suf=None): |
995ce719 | 523 | st = self.state |
d07272d6 WD |
524 | targets = CODE_BLOCK_RE.findall(txt) |
525 | if not targets: | |
526 | targets = [ txt ] | |
527 | first_one = True | |
528 | for txt in targets: | |
529 | txt = txt2target(txt, st.opt_prefix) | |
530 | if not txt: | |
531 | continue | |
7e94e521 WD |
532 | if suf: |
533 | txt += suf | |
534 | if txt in st.created_hashtags: | |
535 | for j in range(2, 1000): | |
536 | chk = txt + '-' + str(j) | |
537 | if chk not in st.created_hashtags: | |
538 | print('Made link target unique:', chk) | |
539 | txt = chk | |
540 | break | |
d07272d6 WD |
541 | if first_one: |
542 | st.html_out.append('<a id="' + txt + '" href="#' + txt + '" class="tgt"></a>') | |
543 | first_one = False | |
544 | else: | |
545 | st.html_out.append('<span id="' + txt + '"></span>') | |
995ce719 | 546 | st.created_hashtags.add(txt) |
d07272d6 | 547 | st.latest_targets = targets |
995ce719 WD |
548 | |
549 | ||
68c865c9 WD |
550 | def output_debug(self, event, extra): |
551 | import pprint | |
552 | st = self.state | |
553 | if args.debug < 2: | |
ae82762c | 554 | st = argparse.Namespace(**vars(st)) |
68c865c9 WD |
555 | if len(st.html_out) > 2: |
556 | st.html_out = ['...'] + st.html_out[-2:] | |
557 | if len(st.man_out) > 2: | |
558 | st.man_out = ['...'] + st.man_out[-2:] | |
559 | print(event, extra) | |
560 | pprint.PrettyPrinter(indent=2).pprint(vars(st)) | |
561 | ||
562 | ||
995ce719 | 563 | def txt2target(txt, opt_prefix): |
d07272d6 WD |
564 | txt = txt.strip().rstrip(':') |
565 | m = CODE_BLOCK_RE.search(txt) | |
566 | if m: | |
567 | txt = m.group(1) | |
7e94e521 WD |
568 | txt = NBR_DASH_RE.sub('-', txt) |
569 | txt = BIN_CHARS_RE.sub('', txt) | |
570 | txt = INVALID_TARGET_CHARS_RE.sub('_', txt) | |
995ce719 WD |
571 | if opt_prefix and txt.startswith('-'): |
572 | txt = opt_prefix + txt | |
573 | else: | |
7e94e521 | 574 | txt = INVALID_START_CHAR_RE.sub(r't\1', txt) |
995ce719 WD |
575 | return txt |
576 | ||
577 | ||
53fae556 | 578 | def manify(txt): |
7e94e521 | 579 | return MANIFY_LINESTART_RE.sub(r'\&\1', txt.replace('\\', '\\\\') |
a93ffb1a WD |
580 | .replace(NBR_SPACE[0], NBR_SPACE[1]) |
581 | .replace(NBR_DASH[0], NBR_DASH[1]) | |
53fae556 WD |
582 | .replace(NORM_FONT[0], NORM_FONT[1]) |
583 | .replace(BOLD_FONT[0], BOLD_FONT[1]) | |
7e94e521 | 584 | .replace(UNDR_FONT[0], UNDR_FONT[1])) |
53fae556 WD |
585 | |
586 | ||
ae82762c | 587 | def htmlify(txt): |
a93ffb1a | 588 | return txt.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') |
53fae556 WD |
589 | |
590 | ||
591 | def warn(*msg): | |
592 | print(*msg, file=sys.stderr) | |
995ce719 WD |
593 | global warning_count |
594 | warning_count += 1 | |
53fae556 WD |
595 | |
596 | ||
597 | def die(*msg): | |
598 | warn(*msg) | |
599 | sys.exit(1) | |
600 | ||
601 | ||
602 | if __name__ == '__main__': | |
a2b630c0 WD |
603 | parser = argparse.ArgumentParser(description="Output html and (optionally) nroff for markdown pages.", add_help=False) |
604 | parser.add_argument('--test', action='store_true', help="Just test the parsing without outputting any files.") | |
d2cc1149 | 605 | parser.add_argument('--dest', metavar='DIR', help="Put files into DIR instead of the current directory.") |
ae82762c | 606 | parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing. Repeat for even more.') |
53fae556 | 607 | parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.") |
a2b630c0 | 608 | parser.add_argument("mdfiles", nargs='+', help="The source .md files to convert.") |
53fae556 WD |
609 | args = parser.parse_args() |
610 | ||
611 | try: | |
612 | import cmarkgfm | |
491ddb08 | 613 | md_parser = cmarkgfm.markdown_to_html |
a2b630c0 | 614 | gfm_parser = cmarkgfm.github_flavored_markdown_to_html |
53fae556 | 615 | except: |
03fc62ad WD |
616 | try: |
617 | import commonmark | |
618 | md_parser = html_via_commonmark | |
619 | except: | |
620 | die("Failed to find cmarkgfm or commonmark for python3.") | |
a2b630c0 | 621 | gfm_parser = None |
53fae556 WD |
622 | |
623 | main() | |
995ce719 WD |
624 | if warning_count: |
625 | sys.exit(1) |