--- /dev/null
+#!/usr/bin/python3
+# coding: utf-8
+
+"""
+TVH C renderer
+==============
+
+This class renders parsed markdown to TVH C code.
+
+## Authors and License
+
+Copyright (C) 2016 Jaroslav Kysela
+
+License: WTFPL 2
+"""
+
+import sys
+from textwrap import wrap
+from mistune import Markdown, Renderer
+
+NOLANG=[
+ '.',
+ ','
+]
+
+class Object:
+
+ pass
+
+class TVH_C_Renderer(Renderer):
+
+ def get_nolang(self, text):
+ return '"' + text + '",\n'
+
+ def get_lang(self, text):
+ if text in NOLANG:
+ return self.get_nolang(text)
+ return 'LANGPREF N_("' + text + '"),\n'
+
+ def get_block(self, text):
+ type = text[0]
+ p = text.find(':')
+ if p <= 0:
+ return ('', '', '')
+ l = int(text[1:p])
+ t = text[p+1:p+1+l]
+ return (text[p+1+l:], type, t)
+
+ def newline(self):
+ return self.get_nolang('\n')
+
+ def text(self, text):
+ if not text:
+ return ''
+ pre = ''
+ post = ''
+ if ord(text[0]) <= ord(' '):
+ pre = self.get_nolang(' ')
+ if ord(text[-1]) <= ord(' '):
+ post = self.get_nolang(' ')
+ text = text.replace('\n', ' ')
+ text = ' \\\n'.join(wrap(text, 74))
+ return pre + self.get_lang(text) + post
+
+ def linebreak(self):
+ return self.get_nolang('\\n')
+
+ def hrule(self):
+ return self.get_nolang('---\\n')
+
+ def header(self, text, level, raw=None):
+ return self.get_nolang('#'*(level+1)) + \
+ text + \
+ self.get_nolang('\\n\\n')
+
+ def paragraph(self, text):
+ return text + self.get_nolang('\\n\\n')
+
+ def list(self, text, ordered=True):
+ r = ''
+ while text:
+ text, type, t = self.get_block(text)
+ if type == 'l':
+ r += self.get_nolang((ordered and ('# ' + t) or ('* ' + t)) + '\n')
+ return r
+
+ def list_item(self, text):
+ return self.get_nolang('l' + str(len(text)) + ':') + text
+
+ def block_code(self, code, lang=None):
+ return self.get_nolang('```no-highlight\n') + code + self.get_nolang('\n```\n')
+
+ def block_quote(self, text):
+ r = ''
+ for line in text.splitlines():
+ r += self.get_nolang((line and '> ' or '')) + line + self.get_nolang('\n')
+ return r
+
+ def block_html(self, text):
+ fatal('Block HTML not allowed: ' + repr(text))
+
+ def inline_html(self, text):
+ fatal('Inline HTML not allowed: ' + repr(text))
+
+ def _emphasis(self, text, pref):
+ return self.get_nolang(pref) + text + self.get_nolang(pref + ' ')
+
+ def emphasis(self, text):
+ return self._emphasis(text, '_')
+
+ def double_emphasis(self, text):
+ return self._emphasis(text, '__')
+
+ def strikethrough(self, text):
+ return self._emphasis(text, '~~')
+
+ def codespan(self, text):
+ return self.get_nolang('`') + text + self.get_nolang('`')
+
+ def autolink(self, link, is_email=False):
+ return self.get_nolang('<') + link + self.get_nolang('>')
+
+ def link(self, link, title, text, image=False):
+ r = self.get_nolang((image and '!' or '') + '[') + \
+ text + self.get_nolang('](' + link + ')')
+ if title:
+ r += self.get_nolang('"') + title + self.get_nolang('"')
+ return r
+
+ def image(self, src, title, text):
+ self.link(src, title, text, image=True)
+
+ def table(self, header, body):
+ hrows = []
+ while header:
+ header, type, t = self.get_block(header)
+ if type == 'r':
+ flags = {}
+ cols = []
+ while t:
+ t, type2, t2 = self.get_block(t)
+ if type2 == 'f':
+ fl, v = t2.split('=')
+ flags[fl] = v
+ elif type2 == 'c':
+ c = Object()
+ c.flags = flags
+ c.text = t2
+ cols.append(c)
+ hrows.append(cols)
+ brows = []
+ while body:
+ body, type, t = self.get_block(body)
+ if type == 'r':
+ flags = {}
+ cols = []
+ while t:
+ t, type2, t2 = self.get_block(t)
+ if type2 == 'f':
+ fl, v = t2.split('=')
+ flags[fl] = v
+ elif type2 == 'c':
+ c = Object()
+ c.flags = flags
+ c.text = t2
+ cols.append(c)
+ brows.append(cols)
+ colscount = 0
+ colmax = [0] * 100
+ align = [''] * 100
+ for row in hrows + brows:
+ colscount = max(len(row), colscount)
+ i = 0
+ for col in row:
+ colmax[i] = max(len(col.text), colmax[i])
+ if 'align' in col.flags:
+ align[i] = col.flags['align'][0]
+ i += 1
+ r = ''
+ for row in hrows:
+ i = 0
+ for col in row:
+ if i > 0:
+ r += self.get_nolang(' | ')
+ r += col.text.ljust(colmax[i])
+ i += 1
+ r += self.get_nolang('\n')
+ for i in range(colscount):
+ if i > 0:
+ r += self.get_nolang(' | ')
+ if align[i] == 'c':
+ r += self.get_nolang(':' + '-'.ljust(colmax[i]-2, '-') + ':')
+ elif align[i] == 'l':
+ r += self.get_nolang(':' + '-'.ljust(colmax[i]-1, '-'))
+ elif align[i] == 'r':
+ r += self.get_nolang('-'.ljust(colmax[i]-1, '-') + ':')
+ else:
+ r += self.get_nolang('-'.ljust(colmax[i], '-'))
+ r += self.get_nolang('\n')
+ for row in brows:
+ i = 0
+ for col in row:
+ if i > 0:
+ r += self.get_nolang(' | ')
+ r += col.text.ljust(colmax[i])
+ i += 1
+ r += self.get_nolang('\n')
+ return r
+
+ def table_row(self, content):
+ return self.get_nolang('r' + str(len(content)) + ':') + content
+
+ def table_cell(self, content, **flags):
+ content = content.replace('\n', ' ')
+ r = ''
+ for fl in flags:
+ v = flags[fl]
+ if type(v) == type(True):
+ v = v and 1 or 0
+ v = str(v) and str(v) or ''
+ r += self.get_nolang('f' + str(len(fl) + 1 + len(v)) + ':' + fl + '=') + v
+ return r + self.get_nolang('c' + str(len(content)) + ':') + content
+
+ def footnote_ref(self, key, index):
+ return self.get_nolang('[^' + str(index) + ']')
+
+ def footnote_item(self, key, text):
+ r = self.get_nolang('[^' + str(index) + ']:\n')
+ for l in text.split('\n'):
+ r += self.get_nolang(' ') + l.lstrip().rstrip() + self.get_nolang('\n')
+ return r
+
+ def footnotes(self, text):
+ return text
+
+#
+#
+#
+
+def optimize(text):
+ lines = text.splitlines()
+ r = ''
+ prev = ''
+ for line in lines:
+ if prev.startswith('"') and line.startswith('"'):
+ prev = prev[:-2] + line[1:]
+ continue
+ elif prev:
+ r += prev + '\n'
+ prev = line
+ return r + (prev and (prev + '\n') or '')
+
+#
+#
+#
+
+def fatal(msg):
+ sys.stderr.write('FATAL: ' + msg + '\n')
+ sys.exit(1)
+
+def argv_get(what):
+ what = '--' + what
+ for a in sys.argv:
+ if a.startswith(what):
+ a = a[len(what):]
+ if a[0] == '=':
+ return a[1:]
+ else:
+ return True
+ return None
+
+#
+#
+#
+
+input = argv_get('in')
+if not input:
+ fatal('Specify input file.')
+name = argv_get('name')
+if not name:
+ fatal('Specify class name.')
+
+fp = open(input)
+text = fp.read(1024*1024*2)
+fp.close()
+
+renderer = TVH_C_Renderer(parse_html=1)
+md = Markdown(renderer)
+text = md(text)
+text = optimize(text)
+
+print('const char *' + name + '[] = {\n' + text + '\nNULL\n};\n');
--- /dev/null
+#!/usr/bin/python3
+# coding: utf-8
+"""
+ mistune
+ ~~~~~~~
+
+ The fastest markdown parser in pure Python with renderer feature.
+
+ :copyright: (c) 2014 - 2015 by Hsiaoming Yang.
+"""
+
+import re
+import inspect
+
+__version__ = '0.6dev'
+__author__ = 'Hsiaoming Yang <me@lepture.com>'
+__all__ = [
+ 'BlockGrammar', 'BlockLexer',
+ 'InlineGrammar', 'InlineLexer',
+ 'Renderer', 'Markdown',
+ 'markdown', 'escape',
+]
+
+
+_key_pattern = re.compile(r'\s+')
+_escape_pattern = re.compile(r'&(?!#?\w+;)')
+_newline_pattern = re.compile(r'\r\n|\r')
+_inline_tag = (
+ r'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|'
+ r'var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|'
+ r'span|br|wbr|ins|del|img|font'
+)
+_block_tag = r'(?!(?:%s)\b)\w+(?!:/|[^\w\s@]*@)\b' % _inline_tag
+
+
+def _pure_pattern(regex):
+ pattern = regex.pattern
+ if pattern.startswith('^'):
+ pattern = pattern[1:]
+ return pattern
+
+
+def _keyify(key):
+ return _key_pattern.sub(' ', key.lower())
+
+
+def escape(text, quote=False, smart_amp=True):
+ """Replace special characters "&", "<" and ">" to HTML-safe sequences.
+
+ The original cgi.escape will always escape "&", but you can control
+ this one for a smart escape amp.
+
+ :param quote: if set to True, " and ' will be escaped.
+ :param smart_amp: if set to False, & will always be escaped.
+ """
+ if smart_amp:
+ text = _escape_pattern.sub('&', text)
+ else:
+ text = text.replace('&', '&')
+ text = text.replace('<', '<')
+ text = text.replace('>', '>')
+ if quote:
+ text = text.replace('"', '"')
+ text = text.replace("'", ''')
+ return text
+
+
+def preprocessing(text, tab=4):
+ text = _newline_pattern.sub('\n', text)
+ text = text.replace('\t', ' ' * tab)
+ text = text.replace('\u00a0', ' ')
+ text = text.replace('\u2424', '\n')
+ pattern = re.compile(r'^ +$', re.M)
+ return pattern.sub('', text)
+
+
+class BlockGrammar(object):
+ """Grammars for block level tokens."""
+
+ def_links = re.compile(
+ r'^ *\[([^^\]]+)\]: *' # [key]:
+ r'<?([^\s>]+)>?' # <link> or link
+ r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
+ )
+ def_footnotes = re.compile(
+ r'^\[\^([^\]]+)\]: *('
+ r'[^\n]*(?:\n+|$)' # [^key]:
+ r'(?: {1,}[^\n]*(?:\n+|$))*'
+ r')'
+ )
+
+ newline = re.compile(r'^\n+')
+ block_code = re.compile(r'^( {4}[^\n]+\n*)+')
+ fences = re.compile(
+ r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang
+ r'([\s\S]+?)\s*'
+ r'\1 *(?:\n+|$)' # ```
+ )
+ hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
+ heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
+ lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
+ block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
+ list_block = re.compile(
+ r'^( *)([*+-]|\d+\.) [\s\S]+?'
+ r'(?:'
+ r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
+ r'|\n+(?=%s)' # def links
+ r'|\n+(?=%s)' # def footnotes
+ r'|\n{2,}'
+ r'(?! )'
+ r'(?!\1(?:[*+-]|\d+\.) )\n*'
+ r'|'
+ r'\s*$)' % (
+ _pure_pattern(def_links),
+ _pure_pattern(def_footnotes),
+ )
+ )
+ list_item = re.compile(
+ r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
+ r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
+ flags=re.M
+ )
+ list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
+ paragraph = re.compile(
+ r'^((?:[^\n]+\n?(?!'
+ r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
+ r'))+)\n*' % (
+ _pure_pattern(fences).replace(r'\1', r'\2'),
+ _pure_pattern(list_block).replace(r'\1', r'\3'),
+ _pure_pattern(hrule),
+ _pure_pattern(heading),
+ _pure_pattern(lheading),
+ _pure_pattern(block_quote),
+ _pure_pattern(def_links),
+ _pure_pattern(def_footnotes),
+ '<' + _block_tag,
+ )
+ )
+ block_html = re.compile(
+ r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
+ r'<!--[\s\S]*?-->',
+ r'<(%s)[\s\S]+?<\/\1>' % _block_tag,
+ r'''<%s(?:"[^"]*"|'[^']*'|[^'">])*?>''' % _block_tag,
+ )
+ )
+ table = re.compile(
+ r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
+ )
+ nptable = re.compile(
+ r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
+ )
+ text = re.compile(r'^[^\n]+')
+
+
+class BlockLexer(object):
+ """Block level lexer for block grammars."""
+ grammar_class = BlockGrammar
+
+ default_rules = [
+ 'newline', 'hrule', 'block_code', 'fences', 'heading',
+ 'nptable', 'lheading', 'block_quote',
+ 'list_block', 'block_html', 'def_links',
+ 'def_footnotes', 'table', 'paragraph', 'text'
+ ]
+
+ list_rules = (
+ 'newline', 'block_code', 'fences', 'lheading', 'hrule',
+ 'block_quote', 'list_block', 'block_html', 'text',
+ )
+
+ footnote_rules = (
+ 'newline', 'block_code', 'fences', 'heading',
+ 'nptable', 'lheading', 'hrule', 'block_quote',
+ 'list_block', 'block_html', 'table', 'paragraph', 'text'
+ )
+
+ def __init__(self, rules=None, **kwargs):
+ self.tokens = []
+ self.def_links = {}
+ self.def_footnotes = {}
+
+ if not rules:
+ rules = self.grammar_class()
+
+ self.rules = rules
+
+ def __call__(self, text, rules=None):
+ return self.parse(text, rules)
+
+ def parse(self, text, rules=None):
+ text = text.rstrip('\n')
+
+ if not rules:
+ rules = self.default_rules
+
+ def manipulate(text):
+ for key in rules:
+ rule = getattr(self.rules, key)
+ m = rule.match(text)
+ if not m:
+ continue
+ getattr(self, 'parse_%s' % key)(m)
+ return m
+ return False # pragma: no cover
+
+ while text:
+ m = manipulate(text)
+ if m is not False:
+ text = text[len(m.group(0)):]
+ continue
+ if text: # pragma: no cover
+ raise RuntimeError('Infinite loop at: %s' % text)
+ return self.tokens
+
+ def parse_newline(self, m):
+ length = len(m.group(0))
+ if length > 1:
+ self.tokens.append({'type': 'newline'})
+
+ def parse_block_code(self, m):
+ code = m.group(0)
+ pattern = re.compile(r'^ {4}', re.M)
+ code = pattern.sub('', code)
+ self.tokens.append({
+ 'type': 'code',
+ 'lang': None,
+ 'text': code,
+ })
+
+ def parse_fences(self, m):
+ self.tokens.append({
+ 'type': 'code',
+ 'lang': m.group(2),
+ 'text': m.group(3),
+ })
+
+ def parse_heading(self, m):
+ self.tokens.append({
+ 'type': 'heading',
+ 'level': len(m.group(1)),
+ 'text': m.group(2),
+ })
+
+ def parse_lheading(self, m):
+ """Parse setext heading."""
+ self.tokens.append({
+ 'type': 'heading',
+ 'level': 1 if m.group(2) == '=' else 2,
+ 'text': m.group(1),
+ })
+
+ def parse_hrule(self, m):
+ self.tokens.append({'type': 'hrule'})
+
+ def parse_list_block(self, m):
+ bull = m.group(2)
+ self.tokens.append({
+ 'type': 'list_start',
+ 'ordered': '.' in bull,
+ })
+ cap = m.group(0)
+ self._process_list_item(cap, bull)
+ self.tokens.append({'type': 'list_end'})
+
+ def _process_list_item(self, cap, bull):
+ cap = self.rules.list_item.findall(cap)
+
+ _next = False
+ length = len(cap)
+
+ for i in range(length):
+ item = cap[i][0]
+
+ # remove the bullet
+ space = len(item)
+ item = self.rules.list_bullet.sub('', item)
+
+ # outdent
+ if '\n ' in item:
+ space = space - len(item)
+ pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
+ item = pattern.sub('', item)
+
+ # determin whether item is loose or not
+ loose = _next
+ if not loose and re.search(r'\n\n(?!\s*$)', item):
+ loose = True
+
+ rest = len(item)
+ if i != length - 1 and rest:
+ _next = item[rest-1] == '\n'
+ if not loose:
+ loose = _next
+
+ if loose:
+ t = 'loose_item_start'
+ else:
+ t = 'list_item_start'
+
+ self.tokens.append({'type': t})
+ # recurse
+ self.parse(item, self.list_rules)
+ self.tokens.append({'type': 'list_item_end'})
+
+ def parse_block_quote(self, m):
+ self.tokens.append({'type': 'block_quote_start'})
+ cap = m.group(0)
+ pattern = re.compile(r'^ *> ?', flags=re.M)
+ cap = pattern.sub('', cap)
+ self.parse(cap)
+ self.tokens.append({'type': 'block_quote_end'})
+
+ def parse_def_links(self, m):
+ key = _keyify(m.group(1))
+ self.def_links[key] = {
+ 'link': m.group(2),
+ 'title': m.group(3),
+ }
+
+ def parse_def_footnotes(self, m):
+ key = _keyify(m.group(1))
+ if key in self.def_footnotes:
+ # footnote is already defined
+ return
+
+ self.def_footnotes[key] = 0
+
+ self.tokens.append({
+ 'type': 'footnote_start',
+ 'key': key,
+ })
+
+ text = m.group(2)
+
+ if '\n' in text:
+ lines = text.split('\n')
+ whitespace = None
+ for line in lines[1:]:
+ space = len(line) - len(line.lstrip())
+ if space and (not whitespace or space < whitespace):
+ whitespace = space
+ newlines = [lines[0]]
+ for line in lines[1:]:
+ newlines.append(line[whitespace:])
+ text = '\n'.join(newlines)
+
+ self.parse(text, self.footnote_rules)
+
+ self.tokens.append({
+ 'type': 'footnote_end',
+ 'key': key,
+ })
+
+ def parse_table(self, m):
+ item = self._process_table(m)
+
+ cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
+ cells = cells.split('\n')
+ for i, v in enumerate(cells):
+ v = re.sub(r'^ *\| *| *\| *$', '', v)
+ cells[i] = re.split(r' *\| *', v)
+
+ item['cells'] = cells
+ self.tokens.append(item)
+
+ def parse_nptable(self, m):
+ item = self._process_table(m)
+
+ cells = re.sub(r'\n$', '', m.group(3))
+ cells = cells.split('\n')
+ for i, v in enumerate(cells):
+ cells[i] = re.split(r' *\| *', v)
+
+ item['cells'] = cells
+ self.tokens.append(item)
+
+ def _process_table(self, m):
+ header = re.sub(r'^ *| *\| *$', '', m.group(1))
+ header = re.split(r' *\| *', header)
+ align = re.sub(r' *|\| *$', '', m.group(2))
+ align = re.split(r' *\| *', align)
+
+ for i, v in enumerate(align):
+ if re.search(r'^ *-+: *$', v):
+ align[i] = 'right'
+ elif re.search(r'^ *:-+: *$', v):
+ align[i] = 'center'
+ elif re.search(r'^ *:-+ *$', v):
+ align[i] = 'left'
+ else:
+ align[i] = None
+
+ item = {
+ 'type': 'table',
+ 'header': header,
+ 'align': align,
+ }
+ return item
+
+ def parse_block_html(self, m):
+ pre = m.group(1) in ['pre', 'script', 'style']
+ text = m.group(0)
+ self.tokens.append({
+ 'type': 'block_html',
+ 'pre': pre,
+ 'text': text
+ })
+
+ def parse_paragraph(self, m):
+ text = m.group(1).rstrip('\n')
+ self.tokens.append({'type': 'paragraph', 'text': text})
+
+ def parse_text(self, m):
+ text = m.group(0)
+ self.tokens.append({'type': 'text', 'text': text})
+
+
+class InlineGrammar(object):
+ """Grammars for inline level tokens."""
+
+ escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! ....
+ inline_html = re.compile(
+ r'^(?:%s|%s|%s)' % (
+ r'<!--[\s\S]*?-->',
+ r'<(%s)[\s\S]+?<\/\1>' % _inline_tag,
+ r'''<(?:%s)(?:"[^"]*"|'[^']*'|[^'">])*?>''' % _inline_tag,
+ )
+ )
+ autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
+ link = re.compile(
+ r'^!?\[('
+ r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
+ r')\]\('
+ r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
+ r'\)'
+ )
+ reflink = re.compile(
+ r'^!?\[('
+ r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
+ r')\]\s*\[([^^\]]*)\]'
+ )
+ nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
+ url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
+ double_emphasis = re.compile(
+ r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__
+ r'|'
+ r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
+ )
+ emphasis = re.compile(
+ r'^\b_((?:__|[\s\S])+?)_\b' # _word_
+ r'|'
+ r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)' # *word*
+ )
+ code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
+ linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
+ strikethrough = re.compile(r'^~~(?=\S)([\s\S]+?\S)~~') # ~~word~~
+ footnote = re.compile(r'^\[\^([^\]]+)\]')
+ text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
+
+ def hard_wrap(self):
+ """Grammar for hard wrap linebreak. You don't need to add two
+ spaces at the end of a line.
+ """
+ self.linebreak = re.compile(r'^ *\n(?!\s*$)')
+ self.text = re.compile(
+ r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
+ )
+
+
+class InlineLexer(object):
+ """Inline level lexer for inline grammars."""
+ grammar_class = InlineGrammar
+
+ default_rules = [
+ 'escape', 'inline_html', 'autolink', 'url',
+ 'footnote', 'link', 'reflink', 'nolink',
+ 'double_emphasis', 'emphasis', 'code',
+ 'linebreak', 'strikethrough', 'text',
+ ]
+
+ def __init__(self, renderer, rules=None, **kwargs):
+ self.renderer = renderer
+ self.links = {}
+ self.footnotes = {}
+ self.footnote_index = 0
+
+ if not rules:
+ rules = self.grammar_class()
+
+ self.rules = rules
+
+ self._in_link = False
+ self._in_footnote = False
+
+ def __call__(self, text):
+ return self.output(text)
+
+ def setup(self, links, footnotes):
+ self.footnote_index = 0
+ self.links = links or {}
+ self.footnotes = footnotes or {}
+
+ def output(self, text, rules=None):
+ text = text.rstrip('\n')
+ if not rules:
+ rules = list(self.default_rules)
+
+ if self._in_footnote and 'footnote' in rules:
+ rules.remove('footnote')
+
+ output = self.renderer.placeholder()
+
+ def manipulate(text):
+ for key in rules:
+ pattern = getattr(self.rules, key)
+ m = pattern.match(text)
+ if not m:
+ continue
+ self.line_match = m
+ out = getattr(self, 'output_%s' % key)(m)
+ if out is not None:
+ return m, out
+ return False # pragma: no cover
+
+ self.line_started = False
+ while text:
+ ret = manipulate(text)
+ self.line_started = True
+ if ret is not False:
+ m, out = ret
+ output += out
+ text = text[len(m.group(0)):]
+ continue
+ if text: # pragma: no cover
+ raise RuntimeError('Infinite loop at: %s' % text)
+
+ return output
+
+ def output_escape(self, m):
+ return m.group(1)
+
+ def output_autolink(self, m):
+ link = m.group(1)
+ if m.group(2) == '@':
+ is_email = True
+ else:
+ is_email = False
+ return self.renderer.autolink(link, is_email)
+
+ def output_url(self, m):
+ link = m.group(1)
+ if self._in_link:
+ return self.renderer.text(link)
+ return self.renderer.autolink(link, False)
+
+ def output_inline_html(self, m):
+ return self.renderer.inline_html(m.group(0))
+
+ def output_footnote(self, m):
+ key = _keyify(m.group(1))
+ if key not in self.footnotes:
+ return None
+ if self.footnotes[key]:
+ return None
+ self.footnote_index += 1
+ self.footnotes[key] = self.footnote_index
+ return self.renderer.footnote_ref(key, self.footnote_index)
+
+ def output_link(self, m):
+ return self._process_link(m, m.group(3), m.group(4))
+
+ def output_reflink(self, m):
+ key = _keyify(m.group(2) or m.group(1))
+ if key not in self.links:
+ return None
+ ret = self.links[key]
+ return self._process_link(m, ret['link'], ret['title'])
+
+ def output_nolink(self, m):
+ key = _keyify(m.group(1))
+ if key not in self.links:
+ return None
+ ret = self.links[key]
+ return self._process_link(m, ret['link'], ret['title'])
+
+ def _process_link(self, m, link, title=None):
+ line = m.group(0)
+ text = m.group(1)
+ if line[0] == '!':
+ return self.renderer.image(link, title, text)
+
+ self._in_link = True
+ text = self.output(text)
+ self._in_link = False
+ return self.renderer.link(link, title, text)
+
+ def output_double_emphasis(self, m):
+ text = m.group(2) or m.group(1)
+ text = self.output(text)
+ return self.renderer.double_emphasis(text)
+
+ def output_emphasis(self, m):
+ text = m.group(2) or m.group(1)
+ text = self.output(text)
+ return self.renderer.emphasis(text)
+
+ def output_code(self, m):
+ text = m.group(2)
+ return self.renderer.codespan(text)
+
+ def output_linebreak(self, m):
+ return self.renderer.linebreak()
+
+ def output_strikethrough(self, m):
+ text = self.output(m.group(1))
+ return self.renderer.strikethrough(text)
+
+ def output_text(self, m):
+ text = m.group(0)
+ return self.renderer.text(text)
+
+
+class Renderer(object):
+ """The default HTML renderer for rendering Markdown.
+ """
+
+ def __init__(self, **kwargs):
+ self.options = kwargs
+
+ def placeholder(self):
+ """Returns the default, empty output value for the renderer.
+
+ All renderer methods use the '+=' operator to append to this value.
+ Default is a string so rendering HTML can build up a result string with
+ the rendered Markdown.
+
+ Can be overridden by Renderer subclasses to be types like an empty
+ list, allowing the renderer to create a tree-like structure to
+ represent the document (which can then be reprocessed later into a
+ separate format like docx or pdf).
+ """
+ return ''
+
+ def block_code(self, code, lang=None):
+ """Rendering block level code. ``pre > code``.
+
+ :param code: text content of the code block.
+ :param lang: language of the given code.
+ """
+ code = code.rstrip('\n')
+ if not lang:
+ code = escape(code, smart_amp=False)
+ return '<pre><code>%s\n</code></pre>\n' % code
+ code = escape(code, quote=True, smart_amp=False)
+ return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
+
+ def block_quote(self, text):
+ """Rendering <blockquote> with the given text.
+
+ :param text: text content of the blockquote.
+ """
+ return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
+
+ def block_html(self, html):
+ """Rendering block level pure html content.
+
+ :param html: text content of the html snippet.
+ """
+ if self.options.get('skip_style') and \
+ html.lower().startswith('<style'):
+ return ''
+ if self.options.get('escape'):
+ return escape(html)
+ return html
+
+ def header(self, text, level, raw=None):
+ """Rendering header/heading tags like ``<h1>`` ``<h2>``.
+
+ :param text: rendered text content for the header.
+ :param level: a number for the header level, for example: 1.
+ :param raw: raw text content of the header.
+ """
+ return '<h%d>%s</h%d>\n' % (level, text, level)
+
+ def hrule(self):
+ """Rendering method for ``<hr>`` tag."""
+ if self.options.get('use_xhtml'):
+ return '<hr />\n'
+ return '<hr>\n'
+
+ def list(self, body, ordered=True):
+ """Rendering list tags like ``<ul>`` and ``<ol>``.
+
+ :param body: body contents of the list.
+ :param ordered: whether this list is ordered or not.
+ """
+ tag = 'ul'
+ if ordered:
+ tag = 'ol'
+ return '<%s>\n%s</%s>\n' % (tag, body, tag)
+
+ def list_item(self, text):
+ """Rendering list item snippet. Like ``<li>``."""
+ return '<li>%s</li>\n' % text
+
+ def paragraph(self, text):
+ """Rendering paragraph tags. Like ``<p>``."""
+ return '<p>%s</p>\n' % text.strip(' ')
+
+ def table(self, header, body):
+ """Rendering table element. Wrap header and body in it.
+
+ :param header: header part of the table.
+ :param body: body part of the table.
+ """
+ return (
+ '<table>\n<thead>%s</thead>\n'
+ '<tbody>\n%s</tbody>\n</table>\n'
+ ) % (header, body)
+
+ def table_row(self, content):
+ """Rendering a table row. Like ``<tr>``.
+
+ :param content: content of current table row.
+ """
+ return '<tr>\n%s</tr>\n' % content
+
+ def table_cell(self, content, **flags):
+ """Rendering a table cell. Like ``<th>`` ``<td>``.
+
+ :param content: content of current table cell.
+ :param header: whether this is header or not.
+ :param align: align of current table cell.
+ """
+ if flags['header']:
+ tag = 'th'
+ else:
+ tag = 'td'
+ align = flags['align']
+ if not align:
+ return '<%s>%s</%s>\n' % (tag, content, tag)
+ return '<%s style="text-align:%s">%s</%s>\n' % (
+ tag, align, content, tag
+ )
+
+ def double_emphasis(self, text):
+ """Rendering **strong** text.
+
+ :param text: text content for emphasis.
+ """
+ return '<strong>%s</strong>' % text
+
+ def emphasis(self, text):
+ """Rendering *emphasis* text.
+
+ :param text: text content for emphasis.
+ """
+ return '<em>%s</em>' % text
+
+ def codespan(self, text):
+ """Rendering inline `code` text.
+
+ :param text: text content for inline code.
+ """
+ text = escape(text.rstrip(), smart_amp=False)
+ return '<code>%s</code>' % text
+
+ def linebreak(self):
+ """Rendering line break like ``<br>``."""
+ if self.options.get('use_xhtml'):
+ return '<br />\n'
+ return '<br>\n'
+
+ def strikethrough(self, text):
+ """Rendering ~~strikethrough~~ text.
+
+ :param text: text content for strikethrough.
+ """
+ return '<del>%s</del>' % text
+
+ def text(self, text):
+ """Rendering unformatted text.
+
+ :param text: text content.
+ """
+ return escape(text)
+
+ def autolink(self, link, is_email=False):
+ """Rendering a given link or email address.
+
+ :param link: link content or email address.
+ :param is_email: whether this is an email or not.
+ """
+ text = link = escape(link)
+ if is_email:
+ link = 'mailto:%s' % link
+ return '<a href="%s">%s</a>' % (link, text)
+
+ def link(self, link, title, text):
+ """Rendering a given link with content and title.
+
+ :param link: href link for ``<a>`` tag.
+ :param title: title content for `title` attribute.
+ :param text: text content for description.
+ """
+ if link.startswith('javascript:'):
+ link = ''
+ if not title:
+ return '<a href="%s">%s</a>' % (link, text)
+ title = escape(title, quote=True)
+ return '<a href="%s" title="%s">%s</a>' % (link, title, text)
+
+ def image(self, src, title, text):
+ """Rendering a image with title and text.
+
+ :param src: source link of the image.
+ :param title: title text of the image.
+ :param text: alt text of the image.
+ """
+ if src.startswith('javascript:'):
+ src = ''
+ text = escape(text, quote=True)
+ if title:
+ title = escape(title, quote=True)
+ html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
+ else:
+ html = '<img src="%s" alt="%s"' % (src, text)
+ if self.options.get('use_xhtml'):
+ return '%s />' % html
+ return '%s>' % html
+
+ def inline_html(self, html):
+ """Rendering span level pure html content.
+
+ :param html: text content of the html snippet.
+ """
+ if self.options.get('escape'):
+ return escape(html)
+ return html
+
+ def newline(self):
+ """Rendering newline element."""
+ return ''
+
+ def footnote_ref(self, key, index):
+ """Rendering the ref anchor of a footnote.
+
+ :param key: identity key for the footnote.
+ :param index: the index count of current footnote.
+ """
+ html = (
+ '<sup class="footnote-ref" id="fnref-%s">'
+ '<a href="#fn-%s" rel="footnote">%d</a></sup>'
+ ) % (escape(key), escape(key), index)
+ return html
+
+ def footnote_item(self, key, text):
+ """Rendering a footnote item.
+
+ :param key: identity key for the footnote.
+ :param text: text content of the footnote.
+ """
+ back = (
+ '<a href="#fnref-%s" rev="footnote">↩</a>'
+ ) % escape(key)
+ text = text.rstrip()
+ if text.endswith('</p>'):
+ text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
+ else:
+ text = '%s<p>%s</p>' % (text, back)
+ html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
+ return html
+
+ def footnotes(self, text):
+ """Wrapper for all footnotes.
+
+ :param text: contents of all footnotes.
+ """
+ html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
+ return html % (self.hrule(), text)
+
+
+class Markdown(object):
+ """The Markdown parser.
+
+ :param renderer: An instance of ``Renderer``.
+ """
+
+ def __init__(self, renderer=None, inline=None, block=None, **kwargs):
+ if not renderer:
+ renderer = Renderer(**kwargs)
+
+ self.renderer = renderer
+
+ if inline and inspect.isclass(inline):
+ inline = inline(renderer, **kwargs)
+ if block and inspect.isclass(block):
+ block = block(**kwargs)
+
+ if inline:
+ self.inline = inline
+ else:
+ rules = InlineGrammar()
+ if kwargs.get('hard_wrap'):
+ rules.hard_wrap()
+ self.inline = InlineLexer(renderer, rules=rules)
+
+ self.block = block or BlockLexer(BlockGrammar())
+ self.options = kwargs
+ self.footnotes = []
+ self.tokens = []
+
+ def __call__(self, text):
+ return self.parse(text)
+
+ def render(self, text):
+ """Render the Markdown text.
+
+ :param text: markdown formatted text content.
+ """
+ return self.parse(text)
+
+ def parse(self, text):
+ out = self.output(preprocessing(text))
+
+ keys = self.block.def_footnotes
+
+ # reset block
+ self.block.def_links = {}
+ self.block.def_footnotes = {}
+
+ # reset inline
+ self.inline.links = {}
+ self.inline.footnotes = {}
+
+ if not self.footnotes:
+ return out
+
+ footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
+ self.footnotes = sorted(
+ footnotes, key=lambda o: keys.get(o['key']), reverse=True
+ )
+
+ body = self.renderer.placeholder()
+ while self.footnotes:
+ note = self.footnotes.pop()
+ body += self.renderer.footnote_item(
+ note['key'], note['text']
+ )
+
+ out += self.renderer.footnotes(body)
+ return out
+
+ def pop(self):
+ if not self.tokens:
+ return None
+ self.token = self.tokens.pop()
+ return self.token
+
+ def peek(self):
+ if self.tokens:
+ return self.tokens[-1]
+ return None # pragma: no cover
+
+ def output(self, text, rules=None):
+ self.tokens = self.block(text, rules)
+ self.tokens.reverse()
+
+ self.inline.setup(self.block.def_links, self.block.def_footnotes)
+
+ out = self.renderer.placeholder()
+ while self.pop():
+ out += self.tok()
+ return out
+
+ def tok(self):
+ t = self.token['type']
+
+ # sepcial cases
+ if t.endswith('_start'):
+ t = t[:-6]
+
+ return getattr(self, 'output_%s' % t)()
+
+ def tok_text(self):
+ text = self.token['text']
+ while self.peek()['type'] == 'text':
+ text += '\n' + self.pop()['text']
+ return self.inline(text)
+
+ def output_newline(self):
+ return self.renderer.newline()
+
+ def output_hrule(self):
+ return self.renderer.hrule()
+
+ def output_heading(self):
+ return self.renderer.header(
+ self.inline(self.token['text']),
+ self.token['level'],
+ self.token['text'],
+ )
+
+ def output_code(self):
+ return self.renderer.block_code(
+ self.token['text'], self.token['lang']
+ )
+
+ def output_table(self):
+ aligns = self.token['align']
+ aligns_length = len(aligns)
+ cell = self.renderer.placeholder()
+
+ # header part
+ header = self.renderer.placeholder()
+ for i, value in enumerate(self.token['header']):
+ align = aligns[i] if i < aligns_length else None
+ flags = {'header': True, 'align': align}
+ cell += self.renderer.table_cell(self.inline(value), **flags)
+
+ header += self.renderer.table_row(cell)
+
+ # body part
+ body = self.renderer.placeholder()
+ for i, row in enumerate(self.token['cells']):
+ cell = self.renderer.placeholder()
+ for j, value in enumerate(row):
+ align = aligns[j] if j < aligns_length else None
+ flags = {'header': False, 'align': align}
+ cell += self.renderer.table_cell(self.inline(value), **flags)
+ body += self.renderer.table_row(cell)
+
+ return self.renderer.table(header, body)
+
+ def output_block_quote(self):
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'block_quote_end':
+ body += self.tok()
+ return self.renderer.block_quote(body)
+
+ def output_list(self):
+ ordered = self.token['ordered']
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'list_end':
+ body += self.tok()
+ return self.renderer.list(body, ordered)
+
+ def output_list_item(self):
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'list_item_end':
+ if self.token['type'] == 'text':
+ body += self.tok_text()
+ else:
+ body += self.tok()
+
+ return self.renderer.list_item(body)
+
+ def output_loose_item(self):
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'list_item_end':
+ body += self.tok()
+ return self.renderer.list_item(body)
+
+ def output_footnote(self):
+ self.inline._in_footnote = True
+ body = self.renderer.placeholder()
+ key = self.token['key']
+ while self.pop()['type'] != 'footnote_end':
+ body += self.tok()
+ self.footnotes.append({'key': key, 'text': body})
+ self.inline._in_footnote = False
+ return self.renderer.placeholder()
+
+ def output_block_html(self):
+ text = self.token['text']
+ if self.options.get('parse_html') and not self.token.get('pre'):
+ text = self.inline(text)
+ return self.renderer.block_html(text)
+
+ def output_paragraph(self):
+ return self.renderer.paragraph(self.inline(self.token['text']))
+
+ def output_text(self):
+ return self.renderer.paragraph(self.tok_text())
+
+
+def markdown(text, **kwargs):
+ """Render markdown formatted text to html.
+
+ :param text: markdown formatted text content.
+ :param escape: if set to True, all html tags will be escaped.
+ :param use_xhtml: output with xhtml tags.
+ """
+ return Markdown(**kwargs)(text)