]> git.ipfire.org Git - thirdparty/rsync.git/blame - md-convert
Fix `--old-args` interaction with a daemon
[thirdparty/rsync.git] / md-convert
CommitLineData
27e88dec 1#!/usr/bin/env python3
53fae556 2
a2b630c0
WD
3# This script transforms markdown files into html and (optionally) nroff. The
4# output files are written into the current directory named for the input file
5# without the .md suffix and either the .html suffix or no suffix.
53fae556 6#
a2b630c0
WD
7# If the input .md file has a section number at the end of the name (e.g.,
8# rsync.1.md) a nroff file is also output (PROJ.NUM.md -> PROJ.NUM).
ec8a05f6 9#
a2b630c0
WD
10# The markdown input format has one extra extension: if a numbered list starts
11# at 0, it is turned into a description list. The dl's dt tag is taken from the
12# contents of the first tag inside the li, which is usually a p, code, or
13# strong tag.
14#
15# The cmarkgfm or commonmark lib is used to transforms the input file into
16# html. Then, the html.parser is used as a state machine that lets us tweak
17# the html and (optionally) output nroff data based on the html tags.
18#
19# If the string @USE_GFM_PARSER@ exists in the file, the string is removed and
20# a github-flavored-markup parser is used to parse the file.
21#
22# The man-page .md files also get the vars @VERSION@, @BINDIR@, and @LIBDIR@
23# substituted. Some of these values depend on the Makefile $(prefix) (see the
24# generated Makefile). If the maintainer wants to build files for /usr/local
25# while creating release-ready man-page files for /usr, use the environment to
26# set RSYNC_OVERRIDE_PREFIX=/usr.
27
28# Copyright (C) 2020 - 2021 Wayne Davison
53fae556
WD
29#
30# This program is freely redistributable.
31
a2b630c0 32import os, sys, re, argparse, subprocess, time
53fae556
WD
33from html.parser import HTMLParser
34
d07272d6
WD
35VALID_PAGES = 'README INSTALL COPYING rsync.1 rrsync.1 rsync-ssl.1 rsyncd.conf.5'.split()
36
e841944b 37CONSUMES_TXT = set('h1 h2 h3 p li pre'.split())
53fae556
WD
38
39HTML_START = """\
40<html><head>
8898aecb
WD
41<title>%TITLE%</title>
42<meta charset="UTF-8"/>
03fc62ad 43<link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet">
53fae556
WD
44<style>
45body {
03fc62ad 46 max-width: 50em;
53fae556 47 margin: auto;
03fc62ad
WD
48}
49body, b, strong, u {
53fae556
WD
50 font-family: 'Roboto', sans-serif;
51}
38ffa522 52a.tgt { font-face: symbol; font-weight: 400; font-size: 70%; visibility: hidden; text-decoration: none; color: #ddd; padding: 0 4px; border: 0; vertical-align: top; }
8898aecb
WD
53a.tgt:after { content: '🔗'; }
54a.tgt:hover { color: #444; background-color: #eaeaea; }
55h1:hover > a.tgt, h2:hover > a.tgt, h3:hover > a.tgt, dt:hover > a.tgt { visibility: visible; }
03fc62ad
WD
56code {
57 font-family: 'Roboto Mono', monospace;
58 font-weight: bold;
a93ffb1a 59 white-space: pre;
03fc62ad
WD
60}
61pre code {
62 display: block;
63 font-weight: normal;
64}
53fae556 65blockquote pre code {
03fc62ad 66 background: #f1f1f1;
53fae556
WD
67}
68dd p:first-of-type {
69 margin-block-start: 0em;
70}
71</style>
72</head><body>
73"""
74
a2b630c0
WD
75TABLE_STYLE = """\
76table {
77 border-color: grey;
78 border-spacing: 0;
79}
80tr {
81 border-top: 1px solid grey;
82}
83tr:nth-child(2n) {
84 background-color: #f6f8fa;
85}
86th, td {
87 border: 1px solid #dfe2e5;
88 text-align: center;
89 padding-left: 1em;
90 padding-right: 1em;
91}
92"""
93
94MAN_HTML_END = """\
53fae556 95<div style="float: right"><p><i>%s</i></p></div>
a2b630c0
WD
96"""
97
98HTML_END = """\
53fae556
WD
99</body></html>
100"""
101
102MAN_START = r"""
03fc62ad 103.TH "%s" "%s" "%s" "%s" "User Commands"
ec8a05f6 104.\" prefix=%s
53fae556
WD
105""".lstrip()
106
107MAN_END = """\
108"""
109
110NORM_FONT = ('\1', r"\fP")
111BOLD_FONT = ('\2', r"\fB")
a93ffb1a
WD
112UNDR_FONT = ('\3', r"\fI")
113NBR_DASH = ('\4', r"\-")
114NBR_SPACE = ('\xa0', r"\ ")
53fae556 115
7e94e521
WD
116FILENAME_RE = re.compile(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+?)(\.(?P<sect>\d+))?)\.md)$')
117ASSIGNMENT_RE = re.compile(r'^(\w+)=(.+)')
118QUOTED_RE = re.compile(r'"(.+?)"')
119VAR_REF_RE = re.compile(r'\$\{(\w+)\}')
120VERSION_RE = re.compile(r' (\d[.\d]+)[, ]')
121BIN_CHARS_RE = re.compile(r'[\1-\7]+')
122SPACE_DOUBLE_DASH_RE = re.compile(r'\s--(\s)')
123NON_SPACE_SINGLE_DASH_RE = re.compile(r'(^|\W)-')
124WHITESPACE_RE = re.compile(r'\s')
d07272d6 125CODE_BLOCK_RE = re.compile(r'[%s]([^=%s]+)[=%s]' % (BOLD_FONT[0], NORM_FONT[0], NORM_FONT[0]))
7e94e521
WD
126NBR_DASH_RE = re.compile(r'[%s]' % NBR_DASH[0])
127INVALID_TARGET_CHARS_RE = re.compile(r'[^-A-Za-z0-9._]')
128INVALID_START_CHAR_RE = re.compile(r'^([^A-Za-z0-9])')
129MANIFY_LINESTART_RE = re.compile(r"^(['.])", flags=re.M)
130
03fc62ad 131md_parser = None
a2b630c0 132env_subs = { }
03fc62ad 133
995ce719
WD
134warning_count = 0
135
53fae556 136def main():
a2b630c0
WD
137 for mdfn in args.mdfiles:
138 parse_md_file(mdfn)
139
140 if args.test:
141 print("The test was successful.")
142
143
144def parse_md_file(mdfn):
7e94e521 145 fi = FILENAME_RE.match(mdfn)
53fae556 146 if not fi:
a2b630c0 147 die('Failed to parse a md input file name:', mdfn)
53fae556 148 fi = argparse.Namespace(**fi.groupdict())
a2b630c0
WD
149 fi.want_manpage = not not fi.sect
150 if fi.want_manpage:
151 fi.title = fi.prog + '(' + fi.sect + ') man page'
152 else:
d07272d6 153 fi.title = fi.prog + ' for rsync'
a2b630c0
WD
154
155 if fi.want_manpage:
156 if not env_subs:
157 find_man_substitutions()
158 prog_ver = 'rsync ' + env_subs['VERSION']
159 if fi.prog != 'rsync':
160 prog_ver = fi.prog + ' from ' + prog_ver
161 fi.man_headings = (fi.prog, fi.sect, env_subs['date'], prog_ver, env_subs['prefix'])
162
163 with open(mdfn, 'r', encoding='utf-8') as fh:
164 txt = fh.read()
165
166 use_gfm_parser = '@USE_GFM_PARSER@' in txt
167 if use_gfm_parser:
168 txt = txt.replace('@USE_GFM_PARSER@', '')
169
170 if fi.want_manpage:
171 txt = (txt.replace('@VERSION@', env_subs['VERSION'])
172 .replace('@BINDIR@', env_subs['bindir'])
173 .replace('@LIBDIR@', env_subs['libdir']))
174
175 if use_gfm_parser:
176 if not gfm_parser:
177 die('Input file requires cmarkgfm parser:', mdfn)
178 fi.html_in = gfm_parser(txt)
179 else:
180 fi.html_in = md_parser(txt)
181 txt = None
182
183 TransformHtml(fi)
184
185 if args.test:
186 return
187
188 output_list = [ (fi.name + '.html', fi.html_out) ]
189 if fi.want_manpage:
190 output_list += [ (fi.name, fi.man_out) ]
191 for fn, txt in output_list:
d2cc1149
WD
192 if args.dest and args.dest != '.':
193 fn = os.path.join(args.dest, fn)
a2b630c0
WD
194 if os.path.lexists(fn):
195 os.unlink(fn)
196 print("Wrote:", fn)
197 with open(fn, 'w', encoding='utf-8') as fh:
198 fh.write(txt)
6dc94e39 199
53fae556 200
a2b630c0
WD
201def find_man_substitutions():
202 srcdir = os.path.dirname(sys.argv[0]) + '/'
203 mtime = 0
6dc94e39 204
a2b630c0 205 git_dir = srcdir + '.git'
19617f7b 206 if os.path.lexists(git_dir):
a2b630c0 207 mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at']))
58e8ecf4 208
a2b630c0
WD
209 # Allow "prefix" to be overridden via the environment:
210 env_subs['prefix'] = os.environ.get('RSYNC_OVERRIDE_PREFIX', None)
53fae556 211
111225a9
WD
212 if args.test:
213 env_subs['VERSION'] = '1.0.0'
ec8a05f6
WD
214 env_subs['bindir'] = '/usr/bin'
215 env_subs['libdir'] = '/usr/lib/rsync'
111225a9 216 else:
a2b630c0 217 for fn in (srcdir + 'version.h', 'Makefile'):
111225a9 218 try:
19617f7b 219 st = os.lstat(fn)
72adf49b 220 except OSError:
a2b630c0
WD
221 die('Failed to find', srcdir + fn)
222 if not mtime:
223 mtime = st.st_mtime
111225a9 224
a2b630c0 225 with open(srcdir + 'version.h', 'r', encoding='utf-8') as fh:
3b4f5fb8 226 txt = fh.read()
7e94e521 227 m = QUOTED_RE.search(txt)
3b4f5fb8
WD
228 env_subs['VERSION'] = m.group(1)
229
19617f7b 230 with open('Makefile', 'r', encoding='utf-8') as fh:
111225a9 231 for line in fh:
7e94e521 232 m = ASSIGNMENT_RE.match(line)
111225a9
WD
233 if not m:
234 continue
07a3e1f9 235 var, val = (m.group(1), m.group(2))
111225a9
WD
236 if var == 'prefix' and env_subs[var] is not None:
237 continue
7e94e521
WD
238 while VAR_REF_RE.search(val):
239 val = VAR_REF_RE.sub(lambda m: env_subs[m.group(1)], val)
111225a9 240 env_subs[var] = val
3b4f5fb8 241 if var == 'srcdir':
111225a9
WD
242 break
243
a2b630c0 244 env_subs['date'] = time.strftime('%d %b %Y', time.localtime(mtime))
53fae556 245
ae82762c 246
03fc62ad
WD
247def html_via_commonmark(txt):
248 return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt))
249
6dc94e39 250
a2b630c0 251class TransformHtml(HTMLParser):
6dc94e39 252 def __init__(self, fi):
53fae556
WD
253 HTMLParser.__init__(self, convert_charrefs=True)
254
995ce719
WD
255 self.fn = fi.fn
256
68c865c9 257 st = self.state = argparse.Namespace(
53fae556
WD
258 list_state = [ ],
259 p_macro = ".P\n",
6dc94e39
WD
260 at_first_tag_in_li = False,
261 at_first_tag_in_dd = False,
53fae556
WD
262 dt_from = None,
263 in_pre = False,
b65b6db3 264 in_code = False,
8898aecb 265 html_out = [ HTML_START.replace('%TITLE%', fi.title) ],
a2b630c0 266 man_out = [ ],
53fae556 267 txt = '',
a2b630c0 268 want_manpage = fi.want_manpage,
995ce719
WD
269 created_hashtags = set(),
270 derived_hashtags = set(),
271 referenced_hashtags = set(),
272 bad_hashtags = set(),
d07272d6 273 latest_targets = [ ],
995ce719
WD
274 opt_prefix = 'opt',
275 a_txt_start = None,
7e94e521 276 target_suf = '',
53fae556
WD
277 )
278
a2b630c0
WD
279 if st.want_manpage:
280 st.man_out.append(MAN_START % fi.man_headings)
281
282 if '</table>' in fi.html_in:
283 st.html_out[0] = st.html_out[0].replace('</style>', TABLE_STYLE + '</style>')
284
6dc94e39
WD
285 self.feed(fi.html_in)
286 fi.html_in = None
53fae556 287
a2b630c0
WD
288 if st.want_manpage:
289 st.html_out.append(MAN_HTML_END % env_subs['date'])
290 st.html_out.append(HTML_END)
68c865c9 291 st.man_out.append(MAN_END)
53fae556 292
68c865c9
WD
293 fi.html_out = ''.join(st.html_out)
294 st.html_out = None
53fae556 295
68c865c9
WD
296 fi.man_out = ''.join(st.man_out)
297 st.man_out = None
53fae556 298
d07272d6
WD
299 for tgt, txt in st.derived_hashtags:
300 derived = txt2target(txt, tgt)
995ce719 301 if derived not in st.created_hashtags:
7e94e521 302 txt = BIN_CHARS_RE.sub('', txt.replace(NBR_DASH[0], '-').replace(NBR_SPACE[0], ' '))
d07272d6 303 warn('Unknown derived hashtag link in', self.fn, 'based on:', (tgt, txt))
995ce719
WD
304
305 for bad in st.bad_hashtags:
306 if bad in st.created_hashtags:
307 warn('Missing "#" in hashtag link in', self.fn + ':', bad)
308 else:
309 warn('Unknown non-hashtag link in', self.fn + ':', bad)
310
311 for bad in st.referenced_hashtags - st.created_hashtags:
312 warn('Unknown hashtag link in', self.fn + ':', '#' + bad)
53fae556
WD
313
314 def handle_starttag(self, tag, attrs_list):
315 st = self.state
316 if args.debug:
68c865c9 317 self.output_debug('START', (tag, attrs_list))
6dc94e39 318 if st.at_first_tag_in_li:
53fae556
WD
319 if st.list_state[-1] == 'dl':
320 st.dt_from = tag
321 if tag == 'p':
322 tag = 'dt'
323 else:
68c865c9 324 st.html_out.append('<dt>')
d80da9e6
WD
325 elif tag == 'p':
326 st.at_first_tag_in_dd = True # Kluge to suppress a .P at the start of an li.
6dc94e39 327 st.at_first_tag_in_li = False
53fae556 328 if tag == 'p':
6dc94e39 329 if not st.at_first_tag_in_dd:
68c865c9 330 st.man_out.append(st.p_macro)
53fae556 331 elif tag == 'li':
6dc94e39 332 st.at_first_tag_in_li = True
53fae556
WD
333 lstate = st.list_state[-1]
334 if lstate == 'dl':
335 return
336 if lstate == 'o':
68c865c9 337 st.man_out.append(".IP o\n")
53fae556 338 else:
68c865c9 339 st.man_out.append(".IP " + str(lstate) + ".\n")
53fae556
WD
340 st.list_state[-1] += 1
341 elif tag == 'blockquote':
68c865c9 342 st.man_out.append(".RS 4\n")
53fae556
WD
343 elif tag == 'pre':
344 st.in_pre = True
68c865c9 345 st.man_out.append(st.p_macro + ".nf\n")
53fae556 346 elif tag == 'code' and not st.in_pre:
b65b6db3 347 st.in_code = True
53fae556 348 st.txt += BOLD_FONT[0]
03fc62ad 349 elif tag == 'strong' or tag == 'b':
53fae556 350 st.txt += BOLD_FONT[0]
03fc62ad 351 elif tag == 'em' or tag == 'i':
a2b630c0
WD
352 if st.want_manpage:
353 tag = 'u' # Change it into underline to be more like the man page
354 st.txt += UNDR_FONT[0]
53fae556
WD
355 elif tag == 'ol':
356 start = 1
357 for var, val in attrs_list:
358 if var == 'start':
359 start = int(val) # We only support integers.
360 break
361 if st.list_state:
68c865c9 362 st.man_out.append(".RS\n")
53fae556
WD
363 if start == 0:
364 tag = 'dl'
365 attrs_list = [ ]
366 st.list_state.append('dl')
367 else:
368 st.list_state.append(start)
68c865c9 369 st.man_out.append(st.p_macro)
53fae556
WD
370 st.p_macro = ".IP\n"
371 elif tag == 'ul':
68c865c9 372 st.man_out.append(st.p_macro)
53fae556 373 if st.list_state:
68c865c9 374 st.man_out.append(".RS\n")
53fae556
WD
375 st.p_macro = ".IP\n"
376 st.list_state.append('o')
a2b630c0
WD
377 elif tag == 'hr':
378 st.man_out.append(".l\n")
379 st.html_out.append("<hr />")
380 return
995ce719
WD
381 elif tag == 'a':
382 st.a_href = None
383 for var, val in attrs_list:
384 if var == 'href':
d07272d6
WD
385 if val.startswith(('https://', 'http://', 'mailto:', 'ftp:')):
386 pass # nothing to check
387 elif '#' in val:
388 pg, tgt = val.split('#', 2)
389 if pg and pg not in VALID_PAGES or '#' in tgt:
390 st.bad_hashtags.add(val)
391 elif tgt in ('', 'opt', 'dopt'):
392 st.a_href = val
393 elif pg == '':
394 st.referenced_hashtags.add(tgt)
395 if tgt in st.latest_targets:
396 warn('Found link to the current section in', self.fn + ':', val)
397 elif val not in VALID_PAGES:
995ce719
WD
398 st.bad_hashtags.add(val)
399 st.a_txt_start = len(st.txt)
ae82762c 400 st.html_out.append('<' + tag + ''.join(' ' + var + '="' + htmlify(val) + '"' for var, val in attrs_list) + '>')
6dc94e39
WD
401 st.at_first_tag_in_dd = False
402
53fae556
WD
403
404 def handle_endtag(self, tag):
405 st = self.state
406 if args.debug:
68c865c9 407 self.output_debug('END', (tag,))
53fae556
WD
408 if tag in CONSUMES_TXT or st.dt_from == tag:
409 txt = st.txt.strip()
410 st.txt = ''
411 else:
412 txt = None
413 add_to_txt = None
7e94e521
WD
414 if tag == 'h1':
415 tgt = txt
416 target_suf = ''
417 if tgt.startswith('NEWS for '):
418 m = VERSION_RE.search(tgt)
419 if m:
420 tgt = m.group(1)
421 st.target_suf = '-' + tgt
d07272d6 422 self.add_targets(tgt)
7e94e521 423 elif tag == 'h2':
68c865c9 424 st.man_out.append(st.p_macro + '.SH "' + manify(txt) + '"\n')
d07272d6
WD
425 self.add_targets(txt, st.target_suf)
426 st.opt_prefix = 'dopt' if txt == 'DAEMON OPTIONS' else 'opt'
e841944b 427 elif tag == 'h3':
68c865c9 428 st.man_out.append(st.p_macro + '.SS "' + manify(txt) + '"\n')
d07272d6 429 self.add_targets(txt, st.target_suf)
53fae556
WD
430 elif tag == 'p':
431 if st.dt_from == 'p':
432 tag = 'dt'
68c865c9 433 st.man_out.append('.IP "' + manify(txt) + '"\n')
f08505e9 434 if txt.startswith(BOLD_FONT[0]):
d07272d6 435 self.add_targets(txt)
53fae556 436 st.dt_from = None
68c865c9
WD
437 elif txt != '':
438 st.man_out.append(manify(txt) + "\n")
53fae556
WD
439 elif tag == 'li':
440 if st.list_state[-1] == 'dl':
6dc94e39 441 if st.at_first_tag_in_li:
53fae556
WD
442 die("Invalid 0. -> td translation")
443 tag = 'dd'
444 if txt != '':
68c865c9 445 st.man_out.append(manify(txt) + "\n")
6dc94e39 446 st.at_first_tag_in_li = False
53fae556 447 elif tag == 'blockquote':
68c865c9 448 st.man_out.append(".RE\n")
53fae556
WD
449 elif tag == 'pre':
450 st.in_pre = False
68c865c9 451 st.man_out.append(manify(txt) + "\n.fi\n")
b65b6db3
WD
452 elif (tag == 'code' and not st.in_pre):
453 st.in_code = False
454 add_to_txt = NORM_FONT[0]
455 elif tag == 'strong' or tag == 'b':
03fc62ad
WD
456 add_to_txt = NORM_FONT[0]
457 elif tag == 'em' or tag == 'i':
a2b630c0
WD
458 if st.want_manpage:
459 tag = 'u' # Change it into underline to be more like the man page
460 add_to_txt = NORM_FONT[0]
53fae556
WD
461 elif tag == 'ol' or tag == 'ul':
462 if st.list_state.pop() == 'dl':
463 tag = 'dl'
464 if st.list_state:
68c865c9 465 st.man_out.append(".RE\n")
53fae556
WD
466 else:
467 st.p_macro = ".P\n"
6dc94e39 468 st.at_first_tag_in_dd = False
a2b630c0
WD
469 elif tag == 'hr':
470 return
995ce719
WD
471 elif tag == 'a':
472 if st.a_href:
473 atxt = st.txt[st.a_txt_start:]
474 find = 'href="' + st.a_href + '"'
475 for j in range(len(st.html_out)-1, 0, -1):
476 if find in st.html_out[j]:
d07272d6
WD
477 pg, tgt = st.a_href.split('#', 2)
478 derived = txt2target(atxt, tgt)
479 if pg == '':
480 if derived in st.latest_targets:
481 warn('Found link to the current section in', self.fn + ':', st.a_href)
482 st.derived_hashtags.add((tgt, atxt))
483 st.html_out[j] = st.html_out[j].replace(find, 'href="' + pg + '#' + derived + '"')
995ce719
WD
484 break
485 else:
486 die('INTERNAL ERROR: failed to find href in html data:', find)
68c865c9 487 st.html_out.append('</' + tag + '>')
53fae556
WD
488 if add_to_txt:
489 if txt is None:
490 st.txt += add_to_txt
491 else:
492 txt += add_to_txt
493 if st.dt_from == tag:
68c865c9
WD
494 st.man_out.append('.IP "' + manify(txt) + '"\n')
495 st.html_out.append('</dt><dd>')
6dc94e39 496 st.at_first_tag_in_dd = True
53fae556
WD
497 st.dt_from = None
498 elif tag == 'dt':
68c865c9 499 st.html_out.append('<dd>')
6dc94e39
WD
500 st.at_first_tag_in_dd = True
501
53fae556 502
a93ffb1a 503 def handle_data(self, txt):
53fae556 504 st = self.state
995ce719
WD
505 if '](' in txt:
506 warn('Malformed link in', self.fn + ':', txt)
53fae556 507 if args.debug:
a93ffb1a
WD
508 self.output_debug('DATA', (txt,))
509 if st.in_pre:
510 html = htmlify(txt)
511 else:
7e94e521
WD
512 txt = SPACE_DOUBLE_DASH_RE.sub(NBR_SPACE[0] + r'--\1', txt).replace('--', NBR_DASH[0]*2)
513 txt = NON_SPACE_SINGLE_DASH_RE.sub(r'\1' + NBR_DASH[0], txt)
a93ffb1a
WD
514 html = htmlify(txt)
515 if st.in_code:
7e94e521 516 txt = WHITESPACE_RE.sub(NBR_SPACE[0], txt)
a93ffb1a
WD
517 html = html.replace(NBR_DASH[0], '-').replace(NBR_SPACE[0], ' ') # <code> is non-breaking in CSS
518 st.html_out.append(html.replace(NBR_SPACE[0], '&nbsp;').replace(NBR_DASH[0], '-&#8288;'))
519 st.txt += txt
53fae556
WD
520
521
d07272d6 522 def add_targets(self, txt, suf=None):
995ce719 523 st = self.state
d07272d6
WD
524 targets = CODE_BLOCK_RE.findall(txt)
525 if not targets:
526 targets = [ txt ]
527 first_one = True
528 for txt in targets:
529 txt = txt2target(txt, st.opt_prefix)
530 if not txt:
531 continue
7e94e521
WD
532 if suf:
533 txt += suf
534 if txt in st.created_hashtags:
535 for j in range(2, 1000):
536 chk = txt + '-' + str(j)
537 if chk not in st.created_hashtags:
538 print('Made link target unique:', chk)
539 txt = chk
540 break
d07272d6
WD
541 if first_one:
542 st.html_out.append('<a id="' + txt + '" href="#' + txt + '" class="tgt"></a>')
543 first_one = False
544 else:
545 st.html_out.append('<span id="' + txt + '"></span>')
995ce719 546 st.created_hashtags.add(txt)
d07272d6 547 st.latest_targets = targets
995ce719
WD
548
549
68c865c9
WD
550 def output_debug(self, event, extra):
551 import pprint
552 st = self.state
553 if args.debug < 2:
ae82762c 554 st = argparse.Namespace(**vars(st))
68c865c9
WD
555 if len(st.html_out) > 2:
556 st.html_out = ['...'] + st.html_out[-2:]
557 if len(st.man_out) > 2:
558 st.man_out = ['...'] + st.man_out[-2:]
559 print(event, extra)
560 pprint.PrettyPrinter(indent=2).pprint(vars(st))
561
562
995ce719 563def txt2target(txt, opt_prefix):
d07272d6
WD
564 txt = txt.strip().rstrip(':')
565 m = CODE_BLOCK_RE.search(txt)
566 if m:
567 txt = m.group(1)
7e94e521
WD
568 txt = NBR_DASH_RE.sub('-', txt)
569 txt = BIN_CHARS_RE.sub('', txt)
570 txt = INVALID_TARGET_CHARS_RE.sub('_', txt)
995ce719
WD
571 if opt_prefix and txt.startswith('-'):
572 txt = opt_prefix + txt
573 else:
7e94e521 574 txt = INVALID_START_CHAR_RE.sub(r't\1', txt)
995ce719
WD
575 return txt
576
577
53fae556 578def manify(txt):
7e94e521 579 return MANIFY_LINESTART_RE.sub(r'\&\1', txt.replace('\\', '\\\\')
a93ffb1a
WD
580 .replace(NBR_SPACE[0], NBR_SPACE[1])
581 .replace(NBR_DASH[0], NBR_DASH[1])
53fae556
WD
582 .replace(NORM_FONT[0], NORM_FONT[1])
583 .replace(BOLD_FONT[0], BOLD_FONT[1])
7e94e521 584 .replace(UNDR_FONT[0], UNDR_FONT[1]))
53fae556
WD
585
586
ae82762c 587def htmlify(txt):
a93ffb1a 588 return txt.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
53fae556
WD
589
590
591def warn(*msg):
592 print(*msg, file=sys.stderr)
995ce719
WD
593 global warning_count
594 warning_count += 1
53fae556
WD
595
596
597def die(*msg):
598 warn(*msg)
599 sys.exit(1)
600
601
602if __name__ == '__main__':
a2b630c0
WD
603 parser = argparse.ArgumentParser(description="Output html and (optionally) nroff for markdown pages.", add_help=False)
604 parser.add_argument('--test', action='store_true', help="Just test the parsing without outputting any files.")
d2cc1149 605 parser.add_argument('--dest', metavar='DIR', help="Put files into DIR instead of the current directory.")
ae82762c 606 parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing. Repeat for even more.')
53fae556 607 parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
a2b630c0 608 parser.add_argument("mdfiles", nargs='+', help="The source .md files to convert.")
53fae556
WD
609 args = parser.parse_args()
610
611 try:
612 import cmarkgfm
491ddb08 613 md_parser = cmarkgfm.markdown_to_html
a2b630c0 614 gfm_parser = cmarkgfm.github_flavored_markdown_to_html
53fae556 615 except:
03fc62ad
WD
616 try:
617 import commonmark
618 md_parser = html_via_commonmark
619 except:
620 die("Failed to find cmarkgfm or commonmark for python3.")
a2b630c0 621 gfm_parser = None
53fae556
WD
622
623 main()
995ce719
WD
624 if warning_count:
625 sys.exit(1)