#!/usr/bin/env python
+# The following constant specifies the name used in the usage
+# statement displayed for python versions lower than 2.3. (With
+# python2.3 and higher the usage statement is generated by optparse
+# and uses the actual name of the executable called.)
+
+EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"
+
+SPEED_TEST = 0
+
"""
====================================================================
-IF YOU ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
+IF YOA ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
====================================================================
Python-Markdown
License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
-Version: 1.3 (Feb. 28, 2006)
+Version: 1.5a (July 9, 2006)
For changelog, see end of file
"""
-import re, sys, os, random
+import re, sys, os, random, codecs
# set debug level: 3 none, 2 critical, 1 informative, 0 all
(VERBOSE, INFO, CRITICAL, NONE) = range(4)
# all tabs will be expanded to up to this many spaces
TAB_LENGTH = 4
ENABLE_ATTRIBUTES = 1
-
+SMART_EMPHASIS = 1
# --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
-FN_BACKLINK_TEXT = "zz1337820767766393qq"
# a template for html placeholders
HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"
HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"
BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
'dl', 'ol', 'ul', 'script', 'noscript',
'form', 'fieldset', 'iframe', 'math', 'ins',
- 'del', 'hr', 'hr/']
+ 'del', 'hr', 'hr/', 'style']
def is_block_level (tag) :
return ( (tag in BLOCK_LEVEL_ELEMENTS) or
child.parent = self
self.entities = {}
- def createElement(self, tag) :
+ def createElement(self, tag, textNode=None) :
el = Element(tag)
el.doc = self
+ if textNode :
+ el.appendChild(self.createTextNode(textNode))
return el
def createTextNode(self, text) :
def normalizeEntities(self, text) :
- pairs = [ #("&", "&"),
+ pairs = [ ("&", "&"),
("<", "<"),
(">", ">"),
("\"", """)]
+
for old, new in pairs :
text = text.replace(old, new)
return text
class Element :
+ type = "element"
+
def __init__ (self, tag) :
- self.type = "element"
+
self.nodeName = tag
self.attributes = []
self.attribute_values = {}
value = self.attribute_values[attr]
value = self.doc.normalizeEntities(value)
buffer += ' %s="%s"' % (attr, value)
- if self.childNodes :
+ if self.childNodes or self.nodeName in ['blockquote']:
buffer += ">"
for child in self.childNodes :
buffer += child.toxml()
if self.nodeName in ['p', 'li', 'ul', 'ol',
'h1', 'h2', 'h3', 'h4'] :
buffer += "\n"
+
return buffer
class TextNode :
+ type = "text"
+ attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
+
def __init__ (self, text) :
- self.type = "text"
- self.value = text
- self.attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
+ self.value = text
def attributeCallback(self, match) :
self.parent.setAttribute(match.group(1), match.group(2))
class EntityReference:
+ type = "entity_ref"
+
def __init__(self, entity):
- self.type = "entity_ref"
self.entity = entity
def handleAttributes(self):
Preprocessors munge source text before we start doing anything too
complicated.
-Each preprocessor implements a "run" method that takes a pointer to
-a list of lines of the document, modifies it as necessary and
-returns either the same pointer or a pointer to a new list.
+Each preprocessor implements a "run" method that takes a pointer to a list of lines of the document,
+modifies it as necessary and returns either the same pointer or a
+pointer to a new list. Preprocessors must extend
+markdown.Preprocessor.
+
"""
-class HeaderPreprocessor :
+
+class Preprocessor :
+ pass
+
+
+class HeaderPreprocessor (Preprocessor):
"""
Replaces underlined headers with hashed headers to avoid
def run (self, lines) :
- for i in range(len(lines)) :
- if not lines[i] :
+ i = -1
+ while i+1 < len(lines) :
+ i = i+1
+ if not lines[i].strip() :
continue
+ if lines[i].startswith("#") :
+ lines.insert(i+1, "\n")
+
if (i+1 <= len(lines)
and lines[i+1]
and lines[i+1][0] in ['-', '=']) :
lines[i] = "## " + lines[i].strip()
lines[i+1] = ""
+ #for l in lines :
+ # print l.encode('utf8')
+ #sys.exit(0)
+
return lines
HEADER_PREPROCESSOR = HeaderPreprocessor()
-class LinePreprocessor :
+class LinePreprocessor (Preprocessor):
"""Deals with HR lines (needs to be done before processing lists)"""
def run (self, lines) :
LINE_PREPROCESSOR = LinePreprocessor()
-class LineBreaksPreprocessor :
+class LineBreaksPreprocessor (Preprocessor):
"""Replaces double spaces at the end of the lines with <br/ >."""
def run (self, lines) :
LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()
-class HtmlBlockPreprocessor :
+class HtmlBlockPreprocessor (Preprocessor):
"""Removes html blocks from self.lines"""
+
+ def _get_left_tag(self, block):
+ return block[1:].replace(">", " ", 1).split()[0].lower()
+
+
+ def _get_right_tag(self, left_tag, block):
+ return block.rstrip()[-len(left_tag)-2:-1].lower()
+ def _equal_tags(self, left_tag, right_tag):
+ if left_tag in ['?', '?php', 'div'] : # handle PHP, etc.
+ return True
+ if ("/" + left_tag) == right_tag:
+ return True
+ elif left_tag == right_tag[1:] \
+ and right_tag[0] != "<":
+ return True
+ else:
+ return False
+
+ def _is_oneliner(self, tag):
+ return (tag in ['hr', 'hr/'])
+
+
def run (self, lines) :
new_blocks = []
text = "\n".join(lines)
- for block in text.split("\n\n") :
+ text = text.split("\n\n")
+
+ items = []
+ left_tag = ''
+ right_tag = ''
+ in_tag = False # flag
+
+ for block in text:
if block.startswith("\n") :
block = block[1:]
- if ( (block.startswith("<") and block.rstrip().endswith(">"))
- and (block[1] in ["!", "?", "@", "%"]
- or is_block_level( block[1:].replace(">", " ")
- .split()[0].lower()))) :
- new_blocks.append(
- self.stash.store(block.strip()))
- else :
+
+ if not in_tag:
+
+ if block.startswith("<"):
+
+ left_tag = self._get_left_tag(block)
+ right_tag = self._get_right_tag(left_tag, block)
+
+ if not (is_block_level(left_tag) \
+ or block[1] in ["!", "?", "@", "%"]):
+ new_blocks.append(block)
+ continue
+
+ if self._is_oneliner(left_tag):
+ new_blocks.append(block.strip())
+ continue
+
+ if block[1] == "!":
+ # is a comment block
+ left_tag = "--"
+ right_tag = self._get_right_tag(left_tag, block)
+ # keep checking conditions below and maybe just append
+
+ if block.rstrip().endswith(">") \
+ and self._equal_tags(left_tag, right_tag):
+ new_blocks.append(
+ self.stash.store(block.strip()))
+ continue
+ elif not block[1] == "!":
+ # if is block level tag and is not complete
+ items.append(block.strip())
+ in_tag = True
+ continue
+
new_blocks.append(block)
+
+ else:
+ items.append(block.strip())
+
+ right_tag = self._get_right_tag(left_tag, block)
+ if self._equal_tags(left_tag, right_tag):
+ # if find closing tag
+ in_tag = False
+ new_blocks.append(
+ self.stash.store('\n\n'.join(items)))
+ items = []
+
return "\n\n".join(new_blocks).split("\n")
HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
-class ReferencePreprocessor :
+class ReferencePreprocessor (Preprocessor):
def run (self, lines) :
+
new_text = [];
for line in lines:
m = RE.regExp['reference-def'].match(line)
if m:
id = m.group(2).strip().lower()
- title = dequote(m.group(4).strip()) #.replace('"', """)
- self.references[id] = (m.group(3), title)
+ t = m.group(4).strip() # potential title
+ if not t :
+ self.references[id] = (m.group(3), t)
+ elif (len(t) >= 2
+ and (t[0] == t[-1] == "\""
+ or t[0] == t[-1] == "\'"
+ or (t[0] == "(" and t[-1] == ")") ) ) :
+ self.references[id] = (m.group(3), t[1:-1])
+ else :
+ new_text.append(line)
else:
new_text.append(line)
+
return new_text #+ "\n"
REFERENCE_PREPROCESSOR = ReferencePreprocessor()
======================================================================
Inline patterns such as *emphasis* are handled by means of auxiliary
-objects, one per pattern. Each pattern object uses a single regular
+objects, one per pattern. Pattern objects must be instances of classes
+that extend markdown.Pattern. Each pattern object uses a single regular
expression and needs support the following methods:
pattern.getCompiledRegExp() - returns a regular expression
a NanoDom node (as a part of the provided
doc) or None
-All of python markdown's built-in patterns subclass from BasePatter,
+All of python markdown's built-in patterns subclass from Patter,
but you can add additional patterns that don't.
Also note that all the regular expressions used by inline must
capture the whole block. For this reason, they all start with
'^(.*)' and end with '(.*)!'. In case with built-in expression
-BasePattern takes care of adding the "^(.*)" and "(.*)!".
+Pattern takes care of adding the "^(.*)" and "(.*)!".
Finally, the order in which regular expressions are applied is very
important - e.g. if we first replace http://.../ links with <a> tags
DOUBLE_BACKTICK_RE = r'\`\`(.*)\`\`' # ``e=f("`")``
ESCAPE_RE = r'\\(.)' # \<
EMPHASIS_RE = r'\*([^\*]*)\*' # *emphasis*
-EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_
+STRONG_RE = r'\*\*(.*)\*\*' # **strong**
+STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*' # ***strong***
+
+if SMART_EMPHASIS:
+ EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_' # _emphasis_
+else :
+ EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_
+
+STRONG_2_RE = r'__([^_]*)__' # __strong__
+STRONG_EM_2_RE = r'___([^_]*)___' # ___strong___
+
LINK_RE = BRK + r'\s*\(([^\)]*)\)' # [text](url)
LINK_ANGLED_RE = BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>)
IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # 
REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]' # [Google][3]
IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
NOT_STRONG_RE = r'( \* )' # stand-alone * or _
-STRONG_RE = r'\*\*(.*)\*\*' # **strong**
-STRONG_2_RE = r'__([^_]*)__' # __strong__
-STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*' # ***strong***
-STRONG_EM_2_RE = r'___([^_]*)___' # ___strong___
AUTOLINK_RE = r'<(http://[^>]*)>' # <http://www.123.com>
-AUTOMAIL_RE = r'<([^> ]*@[^> ]*)>' # <me@example.com>
-HTML_RE = r'(\<[^\>]*\>)' # <...>
+AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com>
+#HTML_RE = r'(\<[^\>]*\>)' # <...>
+HTML_RE = r'(\<[a-zA-Z/][^\>]*\>)' # <...>
ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &
-class BasePattern:
+class Pattern:
def __init__ (self, pattern) :
self.pattern = pattern
def getCompiledRegExp (self) :
return self.compiled_re
-class SimpleTextPattern (BasePattern) :
+BasePattern = Pattern # for backward compatibility
+
+class SimpleTextPattern (Pattern) :
def handleMatch(self, m, doc) :
return doc.createTextNode(m.group(2))
-class SimpleTagPattern (BasePattern):
+class SimpleTagPattern (Pattern):
def __init__ (self, pattern, tag) :
- BasePattern.__init__(self, pattern)
+ Pattern.__init__(self, pattern)
self.tag = tag
def handleMatch(self, m, doc) :
el.appendChild(doc.createTextNode(m.group(2)))
return el
-class BacktickPattern (BasePattern):
+class BacktickPattern (Pattern):
def __init__ (self, pattern):
- BasePattern.__init__(self, pattern)
+ Pattern.__init__(self, pattern)
self.tag = "code"
def handleMatch(self, m, doc) :
el = doc.createElement(self.tag)
text = m.group(2).strip()
- text = text.replace("&", "&")
+ #text = text.replace("&", "&")
el.appendChild(doc.createTextNode(text))
return el
return el1
-class HtmlPattern (BasePattern):
+class HtmlPattern (Pattern):
def handleMatch (self, m, doc) :
place_holder = self.stash.store(m.group(2))
return doc.createTextNode(place_holder)
-class LinkPattern (BasePattern):
+class LinkPattern (Pattern):
def handleMatch(self, m, doc) :
el = doc.createElement('a')
return el
-class ImagePattern (BasePattern):
+class ImagePattern (Pattern):
def handleMatch(self, m, doc):
el = doc.createElement('img')
el.setAttribute('alt', truealt)
return el
-class ReferencePattern (BasePattern):
+class ReferencePattern (Pattern):
def handleMatch(self, m, doc):
if m.group(9) :
return el
-class AutolinkPattern (BasePattern):
+class AutolinkPattern (Pattern):
def handleMatch(self, m, doc):
el = doc.createElement('a')
el.appendChild(doc.createTextNode(m.group(2)))
return el
-class AutomailPattern (BasePattern):
+class AutomailPattern (Pattern):
def handleMatch(self, m, doc) :
el = doc.createElement('a')
preprocessors in that they need to implement a "run" method. Unlike
pre-processors, they take a NanoDom document as a parameter and work
with that.
-#
+
+Post-Processor should extend markdown.Postprocessor.
+
There are currently no standard post-processors, but the footnote
extension below uses one.
"""
+
+class Postprocessor :
+ pass
+
+
"""
======================================================================
========================== MISC AUXILIARY CLASSES ====================
Markdown text """
- def __init__(self, source=None):
+ def __init__(self, source=None,
+ extensions=[],
+ extension_configs=None,
+ encoding=None,
+ safe_mode = True):
"""Creates a new Markdown instance.
- @param source: The text in Markdown format. """
+ @param source: The text in Markdown format.
+ @param encoding: The character encoding of <text>. """
+ self.safeMode = safe_mode
+ self.encoding = encoding
self.source = source
self.blockGuru = BlockGuru()
self.registeredExtensions = []
+ self.stripTopLevelTags = 1
+ self.docType = ""
self.preprocessors = [ HEADER_PREPROCESSOR,
LINE_PREPROCESSOR,
self.postprocessors = [] # a footnote postprocessor will get
# inserted later
+ self.textPostprocessors = [] # a footnote postprocessor will get
+ # inserted later
+
+ self.prePatterns = []
+
+
self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN,
BACKTICK_PATTERN,
ESCAPE_PATTERN,
# The order of the handlers matters!!!
]
+ self.registerExtensions(extensions = extensions,
+ configs = extension_configs)
+
self.reset()
+
+ def registerExtensions(self, extensions, configs) :
+
+ if not configs :
+ configs = {}
+
+ for ext in extensions :
+
+ extension_module_name = "mdx_" + ext
+
+ try :
+ module = __import__(extension_module_name)
+
+ except :
+ message(CRITICAL,
+ "couldn't load extension %s (looking for %s module)"
+ % (ext, extension_module_name) )
+ else :
+
+ if configs.has_key(ext) :
+ configs_for_ext = configs[ext]
+ else :
+ configs_for_ext = []
+ extension = module.makeExtension(configs_for_ext)
+ extension.extendMarkdown(self, globals())
+
+
+
+
def registerExtension(self, extension) :
+ """ This gets called by the extension """
self.registeredExtensions.append(extension)
def reset(self) :
self.lines = prep.run(self.lines)
# Create a NanoDom tree from the lines and attach it to Document
- self._processSection(self.top_element, self.lines)
+
+
+ buffer = []
+ for line in self.lines :
+ if line.startswith("#") :
+ self._processSection(self.top_element, buffer)
+ buffer = [line]
+ else :
+ buffer.append(line)
+ self._processSection(self.top_element, buffer)
+
+ #self._processSection(self.top_element, self.lines)
# Not sure why I put this in but let's leave it for now.
self.top_element.appendChild(self.doc.createTextNode('\n'))
level = len(m.group(1))
h = self.doc.createElement("h%d" % level)
parent_elem.appendChild(h)
- for item in self._handleInline(m.group(2)) :
+ for item in self._handleInlineWrapper2(m.group(2).strip()) :
h.appendChild(item)
else :
message(CRITICAL, "We've got a problem header!")
elif paragraph :
- list = self._handleInline("\n".join(paragraph))
+ list = self._handleInlineWrapper2("\n".join(paragraph))
if ( parent_elem.nodeName == 'li'
and not (looseList or parent_elem.childNodes)):
break
# Check if the next non-blank line is still a part of the list
- if ( RE.regExp[listexpr].match(next) or
+ if ( RE.regExp['ul'].match(next) or
+ RE.regExp['ol'].match(next) or
RE.regExp['tabbed'].match(next) ):
# get rid of any white space in the line
items[item].append(line.strip())
# Now we need to detect list items (at the current level)
# while also detabing child elements if necessary
- for expr in [listexpr, 'tabbed']:
+ for expr in ['ul', 'ol', 'tabbed']:
m = RE.regExp[expr].match(line)
if m :
- if expr == listexpr : # We are looking at a new item
+ if expr in ['ul', 'ol'] : # We are looking at a new item
if m.group(1) :
items.append([m.group(1)])
item += 1
parent_elem.appendChild(pre)
pre.appendChild(code)
text = "\n".join(detabbed).rstrip()+"\n"
- text = text.replace("&", "&")
+ #text = text.replace("&", "&")
code.appendChild(self.doc.createTextNode(text))
self._processSection(parent_elem, theRest, inList)
- def _handleInline(self, line):
- """Transform a Markdown line with inline elements to an XHTML fragment.
+ def _handleInlineWrapper2 (self, line) :
+
+
+ parts = [line]
- Note that this function works recursively: we look for a
- pattern, which usually splits the paragraph in half, and then
- call this function on the two parts.
+ #if not(line):
+ # return [self.doc.createTextNode(' ')]
+
+ for pattern in self.inlinePatterns :
+
+ #print
+ #print self.inlinePatterns.index(pattern)
+
+ i = 0
+
+ #print parts
+ while i < len(parts) :
+
+ x = parts[i]
+ #print i
+ if isinstance(x, (str, unicode)) :
+ result = self._applyPattern(x, pattern)
+ #print result
+ #print result
+ #print parts, i
+ if result :
+ i -= 1
+ parts.remove(x)
+ for y in result :
+ parts.insert(i+1,y)
+
+ i += 1
+
+ for i in range(len(parts)) :
+ x = parts[i]
+ if isinstance(x, (str, unicode)) :
+ parts[i] = self.doc.createTextNode(x)
+
+ return parts
+
+
+
+ def _handleInlineWrapper (self, line) :
+
+ # A wrapper around _handleInline to avoid recursion
+
+ parts = [line]
+
+ i = 0
+
+ while i < len(parts) :
+ x = parts[i]
+ if isinstance(x, (str, unicode)) :
+ parts.remove(x)
+ result = self._handleInline(x)
+ for y in result :
+ parts.insert(i,y)
+ else :
+ i += 1
+
+ return parts
+
+ def _handleInline(self, line):
+ """Transform a Markdown line with inline elements to an XHTML
+ fragment.
This function uses auxiliary objects called inline patterns.
See notes on inline patterns above.
@param item: A block of Markdown text
- @return: A list of NanoDomnodes """
+ @return: A list of NanoDom nodes """
+
if not(line):
return [self.doc.createTextNode(' ')]
- # two spaces at the end of the line denote a <br/>
- #if line.endswith(' '):
- # list = self._handleInline( line.rstrip())
- # list.append(self.doc.createElement('br'))
- # return list
- #
- # ::TODO:: Replace with a preprocessor
for pattern in self.inlinePatterns :
list = self._applyPattern( line, pattern)
return [self.doc.createTextNode(line)]
- def _applyPattern(self, line, pattern) :
+ def _applyPattern(self, line, pattern) :
""" Given a pattern name, this function checks if the line
- fits the pattern, creates the necessary elements and
- recursively calls _handleInline (via. _inlineRecurse)
-
+ fits the pattern, creates the necessary elements, and returns
+ back a list consisting of NanoDom elements and/or strings.
+
@param line: the text to be processed
@param pattern: the pattern to be checked
# if we got a match let the pattern make us a NanoDom node
# if it doesn't, move on
node = pattern.handleMatch(m, self.doc)
- if not node :
- return None
-
- # determine what we've got to the left and to the right
- left = m.group(1) # the first match group
- left_list = self._handleInline(left)
- right = m.groups()[-1] # the last match group
- right_list = self._handleInline(right)
-
- # put the three parts together
- left_list.append(node)
- left_list.extend(right_list)
-
- return left_list
+ if node :
+ # Those are in the reverse order!
+ return ( m.groups()[-1], # the string to the left
+ node, # the new node
+ m.group(1)) # the string to the right of the match
+ else :
+ return None
- def __str__(self):
+ def __str__(self, source = None):
"""Return the document in XHTML format.
@returns: A serialized XHTML body."""
#try :
+
+ if source :
+ self.source = source
+
doc = self._transform()
xml = doc.toxml()
+
#finally:
# doc.unlink()
# Let's stick in all the raw html pieces
for i in range(self.htmlStash.html_counter) :
+ html = self.htmlStash.rawHtmlBlocks[i]
+ if self.safeMode :
+ html = "[HTML_REMOVED]"
+
xml = xml.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
- self.htmlStash.rawHtmlBlocks[i] + "\n")
+ html + "\n")
xml = xml.replace(HTML_PLACEHOLDER % i,
- self.htmlStash.rawHtmlBlocks[i])
-
- xml = xml.replace(FN_BACKLINK_TEXT, "↩")
+ html)
# And return everything but the top level tag
- xml = xml.strip()[23:-7]
- return xml
+ if self.stripTopLevelTags :
+ xml = xml.strip()[23:-7] + "\n"
+ for pp in self.textPostprocessors :
+ xml = pp.run(xml)
- toString = __str__
+ return self.docType + xml
-"""
-========================= FOOTNOTES =================================
-
-This section adds footnote handling to markdown. It can be used as
-an example for extending python-markdown with relatively complex
-functionality. While in this case the extension is included inside
-the module itself, it could just as easily be added from outside the
-module. Not that all markdown classes above are ignorant about
-footnotes. All footnote functionality is provided separately and
-then added to the markdown instance at the run time.
-
-Footnote functionality is attached by calling extendMarkdown()
-method of FootnoteExtension. The method also registers the
-extension to allow it's state to be reset by a call to reset()
-method.
-"""
-
-class FootnoteExtension :
-
- def __init__ (self) :
- self.DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
- self.SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
- self.reset()
-
- def extendMarkdown(self, md) :
-
- self.md = md
-
- # Stateless extensions do not need to be registered
- md.registerExtension(self)
-
- # Insert a preprocessor before ReferencePreprocessor
- index = md.preprocessors.index(REFERENCE_PREPROCESSOR)
- preprocessor = FootnotePreprocessor(self)
- preprocessor.md = md
- md.preprocessors.insert(index, preprocessor)
-
- # Insert an inline pattern before ImageReferencePattern
- FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
- index = md.inlinePatterns.index(IMAGE_REFERENCE_PATTERN)
- md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
-
- # Insert a post-processor that would actually add the footnote div
- md.postprocessors.append(FootnotePostprocessor(self))
-
- def reset(self) :
- # May be called by Markdown is state reset is desired
-
- self.footnote_suffix = "-" + str(int(random.random()*1000000000))
- self.used_footnotes={}
- self.footnotes = {}
-
- def setFootnote(self, id, text) :
- self.footnotes[id] = text
-
- def makeFootnoteId(self, num) :
- return 'fn%d%s' % (num, self.footnote_suffix)
-
- def makeFootnoteRefId(self, num) :
- return 'fnr%d%s' % (num, self.footnote_suffix)
-
- def makeFootnotesDiv (self, doc) :
- """Creates the div with class='footnote' and populates it with
- the text of the footnotes.
-
- @returns: the footnote div as a dom element """
-
- if not self.footnotes.keys() :
- return None
-
- div = doc.createElement("div")
- div.setAttribute('class', 'footnote')
- hr = doc.createElement("hr")
- div.appendChild(hr)
- ol = doc.createElement("ol")
- div.appendChild(ol)
-
- footnotes = [(self.used_footnotes[id], id)
- for id in self.footnotes.keys()]
- footnotes.sort()
-
- for i, id in footnotes :
- li = doc.createElement('li')
- li.setAttribute('id', self.makeFootnoteId(i))
-
- self.md._processSection(li, self.footnotes[id].split("\n"))
-
- #li.appendChild(doc.createTextNode(self.footnotes[id]))
-
- backlink = doc.createElement('a')
- backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
- backlink.setAttribute('class', 'footnoteBackLink')
- backlink.setAttribute('title',
- 'Jump back to footnote %d in the text' % 1)
- backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
-
- if li.childNodes :
- node = li.childNodes[-1]
- if node.type == "text" :
- node = li
- node.appendChild(backlink)
-
- ol.appendChild(li)
-
- return div
-
-
-class FootnotePreprocessor :
-
- def __init__ (self, footnotes) :
- self.footnotes = footnotes
-
- def run(self, lines) :
-
- self.blockGuru = BlockGuru()
- lines = self._handleFootnoteDefinitions (lines)
-
- # Make a hash of all footnote marks in the text so that we
- # know in what order they are supposed to appear. (This
- # function call doesn't really substitute anything - it's just
- # a way to get a callback for each occurence.
-
- text = "\n".join(lines)
- self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
-
- return text.split("\n")
-
-
- def recordFootnoteUse(self, match) :
-
- id = match.group(1)
- id = id.strip()
- nextNum = len(self.footnotes.used_footnotes.keys()) + 1
- self.footnotes.used_footnotes[id] = nextNum
-
-
- def _handleFootnoteDefinitions(self, lines) :
- """Recursively finds all footnote definitions in the lines.
+ toString = __str__
- @param lines: a list of lines of text
- @returns: a string representing the text with footnote
- definitions removed """
- i, id, footnote = self._findFootnoteDefinition(lines)
+ def __unicode__(self):
+ """Return the document in XHTML format as a Unicode object.
+ """
+ return str(self)#.decode(self.encoding)
- if id :
- plain = lines[:i]
+ toUnicode = __unicode__
- detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
- self.footnotes.setFootnote(id,
- footnote + "\n"
- + "\n".join(detabbed))
- more_plain = self._handleFootnoteDefinitions(theRest)
- return plain + [""] + more_plain
- else :
- return lines
+# ====================================================================
- def _findFootnoteDefinition(self, lines) :
- """Finds the first line of a footnote definition.
+def markdownFromFile(input = None,
+ output = None,
+ extensions = [],
+ encoding = None,
+ message_threshold = CRITICAL,
+ safe = False) :
- @param lines: a list of lines of text
- @returns: the index of the line containing a footnote definition """
+ global MESSAGE_THRESHOLD
+ MESSAGE_THRESHOLD = message_threshold
- counter = 0
- for line in lines :
- m = self.footnotes.DEF_RE.match(line)
- if m :
- return counter, m.group(2), m.group(3)
- counter += 1
- return counter, None, None
+ message(VERBOSE, "input file: %s" % input)
-class FootnotePattern (BasePattern) :
+ if not encoding :
+ encoding = "utf-8"
- def __init__ (self, pattern, footnotes) :
+ input_file = codecs.open(input, mode="r", encoding="utf-8")
+ text = input_file.read()
+ input_file.close()
- BasePattern.__init__(self, pattern)
- self.footnotes = footnotes
+ new_text = markdown(text, extensions, encoding, safe_mode = safe)
- def handleMatch(self, m, doc) :
- sup = doc.createElement('sup')
- a = doc.createElement('a')
- sup.appendChild(a)
- id = m.group(2)
- num = self.footnotes.used_footnotes[id]
- sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
- a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
- a.appendChild(doc.createTextNode(str(num)))
- return sup
-
-class FootnotePostprocessor :
-
- def __init__ (self, footnotes) :
- self.footnotes = footnotes
-
- def run(self, doc) :
- footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
- if footnotesDiv :
- doc.documentElement.appendChild(footnotesDiv)
+ if output :
+ output_file = codecs.open(output, "w", encoding=encoding)
+ output_file.write(new_text)
+ output_file.close()
-# ====================================================================
+ else :
+ sys.stdout.write(new_text.encode(encoding))
-def markdown(text) :
- message(VERBOSE, "in markdown.py, received text:\n%s" % text)
- return str(Markdown(text))
-
-def markdownWithFootnotes(text):
- message(VERBOSE, "Running markdown with footnotes, "
- + "received text:\n%s" % text)
- md = Markdown()
- footnoteExtension = FootnoteExtension()
- footnoteExtension.extendMarkdown(md)
- md.source = text
-
- return str(md)
-
-def test_markdown(args):
- """test markdown at the command line.
- in each test, arg 0 is the module name"""
- print "\nTEST 1: no arguments on command line"
- cmd_line(["markdown.py"])
- print "\nTEST 2a: 1 argument on command line: a good option"
- cmd_line(["markdown.py","-footnotes"])
- print "\nTEST 2b: 1 argument on command line: a bad option"
- cmd_line(["markdown.py","-foodnotes"])
- print "\nTEST 3: 1 argument on command line: non-existent input file"
- cmd_line(["markdown.py","junk.txt"])
- print "\nTEST 4: 1 argument on command line: existing input file"
- lines = """
-Markdown text with[^1]:
-
-2. **bold text**,
-3. *italic text*.
-
-Then more:
-
- beginning of code block;
- another line of code block.
+def markdown(text,
+ extensions = [],
+ encoding = None,
+ safe_mode = False) :
- a second paragraph of code block.
+ message(VERBOSE, "in markdown.markdown(), received text:\n%s" % text)
-more text to end our file.
-
-[^1]: "italic" means emphasis.
-"""
- fid = "markdown-test.txt"
- f1 = open(fid, 'w+')
- f1.write(lines)
- f1.close()
- cmd_line(["markdown.py",fid])
- print "\nTEST 5: 2 arguments on command line: nofootnotes and input file"
- cmd_line(["markdown.py","-nofootnotes", fid])
- print "\nTEST 6: 2 arguments on command line: footnotes and input file"
- cmd_line(["markdown.py","-footnotes", fid])
- print "\nTEST 7: 3 arguments on command line: nofootnotes,inputfile, outputfile"
- fidout = "markdown-test.html"
- cmd_line(["markdown.py","-nofootnotes", fid, fidout])
-
-
-def get_vars(args):
- """process the command-line args received; return usable variables"""
- #firstly get the variables
-
- message(VERBOSE, "in get_vars(), args: %s" % args)
-
- if len(args) <= 1:
- option, inFile, outFile = (None, None, None)
- elif len(args) >= 4:
- option, inFile, outFile = args[1:4]
- elif len(args) == 3:
- temp1, temp2 = args[1:3]
- if temp1[0] == '-':
- #then we have an option and inFile
- option, inFile, outFile = temp1, temp2, None
- else:
- #we have no option, so we must have inFile and outFile
- option, inFile, outFile = None, temp1, temp2
- else:
- #len(args) = 2
- #we have only one usable arg: might be an option or a file
- temp1 = args[1]
+ extension_names = []
+ extension_configs = {}
+
+ for ext in extensions :
+ pos = ext.find("(")
+ if pos == -1 :
+ extension_names.append(ext)
+ else :
+ name = ext[:pos]
+ extension_names.append(name)
+ pairs = [x.split("=") for x in ext[pos+1:-1].split(",")]
+ configs = [(x.strip(), y.strip()) for (x, y) in pairs]
+ extension_configs[name] = configs
+ #print configs
+
+ md = Markdown(text, extensions=extension_names,
+ extension_configs=extension_configs,
+ safe_mode = safe_mode)
+
+ return md.toString()
- message(VERBOSE, "our single arg is: %s" % str(temp1))
- if temp1[0] == '-':
- #then we have an option
- option, inFile, outFile = temp1, None, None
- else:
- #we have no option, so we must have inFile
- option, inFile, outFile = None, temp1, None
-
- message(VERBOSE,
- "prior to validation, option: %s, inFile: %s, outFile: %s" %
- (str(option), str(inFile), str(outFile),))
-
- return option, inFile, outFile
+class Extension :
+ def __init__(self, configs = {}) :
+ self.config = configs
-USAGE = """
-\nUsing markdown.py:
+ def getConfig(self, key) :
+ if self.config.has_key(key) :
+ #print self.config[key][0]
+ return self.config[key][0]
+ else :
+ return ""
- python markdown.py [option] input_file_with_markdown.txt [output_file.html]
+ def getConfigInfo(self) :
+ return [(key, self.config[key][1]) for key in self.config.keys()]
-Options:
+ def setConfig(self, key, value) :
+ self.config[key][0] = value
- -footnotes or -fn : generate markdown with footnotes
- -test or -t : run a self-test
- -help or -h : print this message
-"""
-
-VALID_OPTIONS = ['footnotes','nofootnotes', 'fn', 'test', 't', 'f',
- 'help', 'h']
+OPTPARSE_WARNING = """
+Python 2.3 or higher required for advanced command line options.
+For lower versions of Python use:
-EXPANDED_OPTIONS = { "fn" : "footnotes",
- "t" : "test",
- "h" : "help" }
+ %s INPUT_FILE > OUTPUT_FILE
+
+""" % EXECUTABLE_NAME_FOR_USAGE
+def parse_options() :
-def validate_option(option) :
+ try :
+ optparse = __import__("optparse")
+ except :
+ if len(sys.argv) == 2 :
+ return {'input' : sys.argv[1],
+ 'output' : None,
+ 'message_threshold' : CRITICAL,
+ 'safe' : False,
+ 'extensions' : [],
+ 'encoding' : None }
- """ Check if the option makes sense and print an appropriate message
- if it isn't.
-
- @return: valid option string or None
- """
-
- #now validate the variables
- if (option is not None):
- if (len(option) > 1 and option[1:] in VALID_OPTIONS) :
- option = option[1:]
-
- if option in EXPANDED_OPTIONS.keys() :
- option = EXPANDED_OPTIONS[option]
- return option
- else:
- message(CRITICAL,
- "\nSorry, I don't understand option %s" % option)
- message(CRITICAL, USAGE)
+ else :
+ print OPTPARSE_WARNING
return None
-
-def validate_input_file(inFile) :
- """ Check if the input file is specified and exists.
-
- @return: valid input file path or None
- """
-
- if not inFile :
- message(CRITICAL,
- "\nI need an input filename.\n")
- message(CRITICAL, USAGE)
- return None
-
-
- if os.access(inFile, os.R_OK):
- return inFile
- else :
- message(CRITICAL, "Sorry, I can't find input file %s" % str(inFile))
- return None
-
+ parser = optparse.OptionParser(usage="%prog INPUTFILE [options]")
+
+ parser.add_option("-f", "--file", dest="filename",
+ help="write output to OUTPUT_FILE",
+ metavar="OUTPUT_FILE")
+ parser.add_option("-e", "--encoding", dest="encoding",
+ help="encoding for input and output files",)
+ parser.add_option("-q", "--quiet", default = CRITICAL,
+ action="store_const", const=NONE, dest="verbose",
+ help="suppress all messages")
+ parser.add_option("-v", "--verbose",
+ action="store_const", const=INFO, dest="verbose",
+ help="print info messages")
+ parser.add_option("-s", "--safe",
+ action="store_const", const=True, dest="safe",
+ help="same mode (strip user's HTML tag)")
-
-
-def cmd_line(args):
-
- message(VERBOSE, "in cmd_line with args: %s" % args)
-
- option, inFile, outFile = get_vars(args)
+ parser.add_option("--noisy",
+ action="store_const", const=VERBOSE, dest="verbose",
+ help="print debug messages")
+ parser.add_option("-x", "--extension", action="append", dest="extensions",
+ help = "load extension EXTENSION", metavar="EXTENSION")
- if option :
- option = validate_option(option)
- if not option : return
+ (options, args) = parser.parse_args()
- if option == "help" :
- message(CRITICAL, USAGE)
- return
- elif option == "test" :
- test_markdown(None)
- return
-
- inFile = validate_input_file(inFile)
- if not inFile :
- return
+ if not len(args) == 1 :
+ parser.print_help()
+ return None
else :
- input = file(inFile).read()
-
- message(VERBOSE, "Validated command line parameters:" +
- "\n\toption: %s, \n\tinFile: %s, \n\toutFile: %s" % (
- str(option), str(inFile), str(outFile),))
+ input_file = args[0]
- if option == "footnotes" :
- md_function = markdownWithFootnotes
- else :
- md_function = markdown
-
- if outFile is None:
- print md_function(input)
- else:
- output = md_function(input)
- f1 = open(outFile, "w+")
- f1.write(output)
- f1.close()
-
- if os.access(outFile, os.F_OK):
- message(INFO, "Successfully wrote %s" % outFile)
- else:
- message(INFO, "Failed to write %s" % outFile)
+ if not options.extensions :
+ options.extensions = []
+ return {'input' : input_file,
+ 'output' : options.filename,
+ 'message_threshold' : options.verbose,
+ 'safe' : options.safe,
+ 'extensions' : options.extensions,
+ 'encoding' : options.encoding }
if __name__ == '__main__':
- """ Run Markdown from the command line.
- Set debug = 3 at top of file to get diagnostic output"""
- args = sys.argv
-
- #set testing=1 to test the command-line response of markdown.py
- testing = 0
- if testing:
- test_markdown(args)
- else:
- cmd_line(args)
-
-"""
-CHANGELOG
-=========
-
-Feb. 28, 2006: Clean-up and command-line handling by Stewart
-Midwinter. (Version 1.3)
+ """ Run Markdown from the command line. """
-Feb. 24, 2006: Fixed a bug with the last line of the list appearing
-again as a separate paragraph. Incorporated Chris Clark's "mailto"
-patch. Added support for <br /> at the end of lines ending in two or
-more spaces. Fixed a crashing bug when using ImageReferencePattern.
-Added several utility methods to Nanodom. (Version 1.2)
+ options = parse_options()
-Jan. 31, 2006: Added "hr" and "hr/" to BLOCK_LEVEL_ELEMENTS and
-changed <hr/> to <hr />. (Thanks to Sergej Chodarev.)
+ #if os.access(inFile, os.R_OK):
-Nov. 26, 2005: Fixed a bug with certain tabbed lines inside lists
-getting wrapped in <pre><code>. (v. 1.1)
-
-Nov. 19, 2005: Made "<!...", "<?...", etc. behave like block-level
-HTML tags.
-
-Nov. 14, 2005: Added entity code and email autolink fix by Tiago
-Cogumbreiro. Fixed some small issues with backticks to get 100%
-compliance with John's test suite. (v. 1.0)
-
-Nov. 7, 2005: Added an unlink method for documents to aid with memory
-collection (per Doug Sauder's suggestion).
-
-Oct. 29, 2005: Restricted a set of html tags that get treated as
-block-level elements.
-
-Sept. 18, 2005: Refactored the whole script to make it easier to
-customize it and made footnote functionality into an extension.
-(v. 0.9)
-
-Sept. 5, 2005: Fixed a bug with multi-paragraph footnotes. Added
-attribute support.
-
-Sept. 1, 2005: Changed the way headers are handled to allow inline
-syntax in headers (e.g. links) and got the lists to use p-tags
-correctly (v. 0.8)
-
-Aug. 29, 2005: Added flexible tabs, fixed a few small issues, added
-basic support for footnotes. Got rid of xml.dom.minidom and added
-pretty-printing. (v. 0.7)
-
-Aug. 13, 2005: Fixed a number of small bugs in order to conform to the
-test suite. (v. 0.6)
-
-Aug. 11, 2005: Added support for inline html and entities, inline
-images, autolinks, underscore emphasis. Cleaned up and refactored the
-code, added some more comments.
-
-Feb. 19, 2005: Rewrote the handling of high-level elements to allow
-multi-line list items and all sorts of nesting.
+ if not options :
+ sys.exit(0)
+
+ markdownFromFile(**options)
-Feb. 3, 2005: Reference-style links, single-line lists, backticks,
-escape, emphasis in the beginning of the paragraph.
-Nov. 2004: Added links, blockquotes, html blocks to Manfred
-Stienstra's code
-Apr. 2004: Manfred's version at http://www.dwerg.net/projects/markdown/
-"""