From: Ben Darnell Date: Sun, 19 Jun 2011 20:29:31 +0000 (-0700) Subject: Remove old markdown version of overview page X-Git-Tag: v2.0.0~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c1b832a92146edc69133f1320f5b4970c5ae90f9;p=thirdparty%2Ftornado.git Remove old markdown version of overview page --- diff --git a/website/app.yaml b/website/app.yaml index d0ee858a3..9484a9113 100644 --- a/website/app.yaml +++ b/website/app.yaml @@ -21,10 +21,10 @@ handlers: static_files: static/favicon.ico upload: static/favicon.ico -- url: /documentation/reference/ +- url: /documentation/? script: website.py -- url: /documentation/reference +- url: /documentation static_dir: sphinx/build/html - url: /.* diff --git a/website/markdown/__init__.py b/website/markdown/__init__.py deleted file mode 100644 index 0d1c50497..000000000 --- a/website/markdown/__init__.py +++ /dev/null @@ -1,603 +0,0 @@ -""" -Python Markdown -=============== - -Python Markdown converts Markdown to HTML and can be used as a library or -called from the command line. - -## Basic usage as a module: - - import markdown - md = Markdown() - html = md.convert(your_text_string) - -## Basic use from the command line: - - python markdown.py source.txt > destination.html - -Run "python markdown.py --help" to see more options. - -## Extensions - -See for more -information and instructions on how to extend the functionality of -Python Markdown. Read that before you try modifying this file. - -## Authors and License - -Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and -maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan -Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com). - -Contact: markdown@freewisdom.org - -Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) -Copyright 200? Django Software Foundation (OrderedDict implementation) -Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) -Copyright 2004 Manfred Stienstra (the original version) - -License: BSD (see docs/LICENSE for details). -""" - -version = "2.0" -version_info = (2,0,0, "Final") - -import re -import codecs -import sys -import warnings -import logging -from logging import DEBUG, INFO, WARN, ERROR, CRITICAL - - -""" -CONSTANTS -============================================================================= -""" - -""" -Constants you might want to modify ------------------------------------------------------------------------------ -""" - -# default logging level for command-line use -COMMAND_LINE_LOGGING_LEVEL = CRITICAL -TAB_LENGTH = 4 # expand tabs to this many spaces -ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz"> -SMART_EMPHASIS = True # this_or_that does not become thisorthat -DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output -HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode -BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" - "|script|noscript|form|fieldset|iframe|math" - "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody" - "|tr|th|td") -DOC_TAG = "div" # Element used to wrap document - later removed - -# Placeholders -STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder -ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder -INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" -INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX -AMP_SUBSTITUTE = STX+"amp"+ETX - - -""" -Constants you probably do not need to change ------------------------------------------------------------------------------ -""" - -RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), - # Hebrew (0590-05FF), Arabic (0600-06FF), - # Syriac (0700-074F), Arabic supplement (0750-077F), - # Thaana (0780-07BF), Nko (07C0-07FF). - (u'\u2D30', u'\u2D7F'), # Tifinagh - ) - - -""" -AUXILIARY GLOBAL FUNCTIONS -============================================================================= -""" - - -def message(level, text): - """ A wrapper method for logging debug messages. """ - logger = logging.getLogger('MARKDOWN') - if logger.handlers: - # The logger is configured - logger.log(level, text) - if level > WARN: - sys.exit(0) - elif level > WARN: - raise MarkdownException, text - else: - warnings.warn(text, MarkdownWarning) - - -def isBlockLevel(tag): - """Check if the tag is a block level HTML tag.""" - return BLOCK_LEVEL_ELEMENTS.match(tag) - -""" -MISC AUXILIARY CLASSES -============================================================================= -""" - -class AtomicString(unicode): - """A string which should not be further processed.""" - pass - - -class MarkdownException(Exception): - """ A Markdown Exception. """ - pass - - -class MarkdownWarning(Warning): - """ A Markdown Warning. """ - pass - - -""" -OVERALL DESIGN -============================================================================= - -Markdown processing takes place in four steps: - -1. A bunch of "preprocessors" munge the input text. -2. BlockParser() parses the high-level structural elements of the - pre-processed text into an ElementTree. -3. A bunch of "treeprocessors" are run against the ElementTree. One such - treeprocessor runs InlinePatterns against the ElementTree, detecting inline - markup. -4. Some post-processors are run against the text after the ElementTree has - been serialized into text. -5. The output is written to a string. - -Those steps are put together by the Markdown() class. - -""" - -import preprocessors -import blockprocessors -import treeprocessors -import inlinepatterns -import postprocessors -import blockparser -import etree_loader -import odict - -# Extensions should use "markdown.etree" instead of "etree" (or do `from -# markdown import etree`). Do not import it by yourself. - -etree = etree_loader.importETree() - -# Adds the ability to output html4 -import html4 - - -class Markdown: - """Convert Markdown to HTML.""" - - def __init__(self, - extensions=[], - extension_configs={}, - safe_mode = False, - output_format=DEFAULT_OUTPUT_FORMAT): - """ - Creates a new Markdown instance. - - Keyword arguments: - - * extensions: A list of extensions. - If they are of type string, the module mdx_name.py will be loaded. - If they are a subclass of markdown.Extension, they will be used - as-is. - * extension-configs: Configuration setting for extensions. - * safe_mode: Disallow raw html. One of "remove", "replace" or "escape". - * output_format: Format of output. Supported formats are: - * "xhtml1": Outputs XHTML 1.x. Default. - * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1). - * "html4": Outputs HTML 4 - * "html": Outputs latest supported version of HTML (currently HTML 4). - Note that it is suggested that the more specific formats ("xhtml1" - and "html4") be used as "xhtml" or "html" may change in the future - if it makes sense at that time. - - """ - - self.safeMode = safe_mode - self.registeredExtensions = [] - self.docType = "" - self.stripTopLevelTags = True - - # Preprocessors - self.preprocessors = odict.OrderedDict() - self.preprocessors["html_block"] = \ - preprocessors.HtmlBlockPreprocessor(self) - self.preprocessors["reference"] = \ - preprocessors.ReferencePreprocessor(self) - # footnote preprocessor will be inserted with "amp_substitute" - - # Map format keys to serializers - self.output_formats = { - 'html' : html4.to_html_string, - 'html4' : html4.to_html_string, - 'xhtml' : etree.tostring, - 'xhtml1': etree.tostring, - } - - self.references = {} - self.htmlStash = preprocessors.HtmlStash() - self.registerExtensions(extensions = extensions, - configs = extension_configs) - self.set_output_format(output_format) - self.reset() - - def registerExtensions(self, extensions, configs): - """ - Register extensions with this instance of Markdown. - - Keyword aurguments: - - * extensions: A list of extensions, which can either - be strings or objects. See the docstring on Markdown. - * configs: A dictionary mapping module names to config options. - - """ - for ext in extensions: - if isinstance(ext, basestring): - ext = load_extension(ext, configs.get(ext, [])) - try: - ext.extendMarkdown(self, globals()) - except AttributeError: - message(ERROR, "Incorrect type! Extension '%s' is " - "neither a string or an Extension." %(repr(ext))) - - - def registerExtension(self, extension): - """ This gets called by the extension """ - self.registeredExtensions.append(extension) - - def reset(self): - """ - Resets all state variables so that we can start with a new text. - """ - self.htmlStash.reset() - self.references.clear() - - for extension in self.registeredExtensions: - extension.reset() - - def set_output_format(self, format): - """ Set the output format for the class instance. """ - try: - self.serializer = self.output_formats[format.lower()] - except KeyError: - message(CRITICAL, 'Invalid Output Format: "%s". Use one of %s.' \ - % (format, self.output_formats.keys())) - - def convert(self, source): - """ - Convert markdown to serialized XHTML or HTML. - - Keyword arguments: - - * source: Source text as a Unicode string. - - """ - - # Fixup the source text - if not source.strip(): - return u"" # a blank unicode string - try: - source = unicode(source) - except UnicodeDecodeError: - message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.') - return u"" - - source = source.replace(STX, "").replace(ETX, "") - source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" - source = re.sub(r'\n\s+\n', '\n\n', source) - source = source.expandtabs(TAB_LENGTH) - - # Split into lines and run the line preprocessors. - self.lines = source.split("\n") - for prep in self.preprocessors.values(): - self.lines = prep.run(self.lines) - - # Parse the high-level elements. - root = self.parser.parseDocument(self.lines).getroot() - - # Run the tree-processors - for treeprocessor in self.treeprocessors.values(): - newRoot = treeprocessor.run(root) - if newRoot: - root = newRoot - - # Serialize _properly_. Strip top-level tags. - output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf8")) - if self.stripTopLevelTags: - start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2 - end = output.rindex(''%DOC_TAG) - output = output[start:end].strip() - - # Run the text post-processors - for pp in self.postprocessors.values(): - output = pp.run(output) - - return output.strip() - - def convertFile(self, input=None, output=None, encoding=None): - """Converts a markdown file and returns the HTML as a unicode string. - - Decodes the file using the provided encoding (defaults to utf-8), - passes the file content to markdown, and outputs the html to either - the provided stream or the file with provided name, using the same - encoding as the source file. - - **Note:** This is the only place that decoding and encoding of unicode - takes place in Python-Markdown. (All other code is unicode-in / - unicode-out.) - - Keyword arguments: - - * input: Name of source text file. - * output: Name of output file. Writes to stdout if `None`. - * encoding: Encoding of input and output files. Defaults to utf-8. - - """ - - encoding = encoding or "utf-8" - - # Read the source - input_file = codecs.open(input, mode="r", encoding=encoding) - text = input_file.read() - input_file.close() - text = text.lstrip(u'\ufeff') # remove the byte-order mark - - # Convert - html = self.convert(text) - - # Write to file or stdout - if isinstance(output, (str, unicode)): - output_file = codecs.open(output, "w", encoding=encoding) - output_file.write(html) - output_file.close() - else: - output.write(html.encode(encoding)) - - -""" -Extensions ------------------------------------------------------------------------------ -""" - -class Extension: - """ Base class for extensions to subclass. """ - def __init__(self, configs = {}): - """Create an instance of an Extention. - - Keyword arguments: - - * configs: A dict of configuration setting used by an Extension. - """ - self.config = configs - - def getConfig(self, key): - """ Return a setting for the given key or an empty string. """ - if key in self.config: - return self.config[key][0] - else: - return "" - - def getConfigInfo(self): - """ Return all config settings as a list of tuples. """ - return [(key, self.config[key][1]) for key in self.config.keys()] - - def setConfig(self, key, value): - """ Set a config setting for `key` with the given `value`. """ - self.config[key][0] = value - - def extendMarkdown(self, md, md_globals): - """ - Add the various proccesors and patterns to the Markdown Instance. - - This method must be overriden by every extension. - - Keyword arguments: - - * md: The Markdown instance. - - * md_globals: Global variables in the markdown module namespace. - - """ - pass - - -def load_extension(ext_name, configs = []): - """Load extension by name, then return the module. - - The extension name may contain arguments as part of the string in the - following format: "extname(key1=value1,key2=value2)" - - """ - - # Parse extensions config params (ignore the order) - configs = dict(configs) - pos = ext_name.find("(") # find the first "(" - if pos > 0: - ext_args = ext_name[pos+1:-1] - ext_name = ext_name[:pos] - pairs = [x.split("=") for x in ext_args.split(",")] - configs.update([(x.strip(), y.strip()) for (x, y) in pairs]) - - # Setup the module names - ext_module = 'markdown.extensions' - module_name_new_style = '.'.join([ext_module, ext_name]) - module_name_old_style = '_'.join(['mdx', ext_name]) - - # Try loading the extention first from one place, then another - try: # New style (markdown.extensons.) - module = __import__(module_name_new_style, {}, {}, [ext_module]) - except ImportError: - try: # Old style (mdx.) - module = __import__(module_name_old_style) - except ImportError: - message(WARN, "Failed loading extension '%s' from '%s' or '%s'" - % (ext_name, module_name_new_style, module_name_old_style)) - # Return None so we don't try to initiate none-existant extension - return None - - # If the module is loaded successfully, we expect it to define a - # function called makeExtension() - try: - return module.makeExtension(configs.items()) - except AttributeError: - message(CRITICAL, "Failed to initiate extension '%s'" % ext_name) - - -def load_extensions(ext_names): - """Loads multiple extensions""" - extensions = [] - for ext_name in ext_names: - extension = load_extension(ext_name) - if extension: - extensions.append(extension) - return extensions - - -""" -EXPORTED FUNCTIONS -============================================================================= - -Those are the two functions we really mean to export: markdown() and -markdownFromFile(). -""" - -def markdown(text, - extensions = [], - safe_mode = False, - output_format = DEFAULT_OUTPUT_FORMAT): - """Convert a markdown string to HTML and return HTML as a unicode string. - - This is a shortcut function for `Markdown` class to cover the most - basic use case. It initializes an instance of Markdown, loads the - necessary extensions and runs the parser on the given text. - - Keyword arguments: - - * text: Markdown formatted text as Unicode or ASCII string. - * extensions: A list of extensions or extension names (may contain config args). - * safe_mode: Disallow raw html. One of "remove", "replace" or "escape". - * output_format: Format of output. Supported formats are: - * "xhtml1": Outputs XHTML 1.x. Default. - * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1). - * "html4": Outputs HTML 4 - * "html": Outputs latest supported version of HTML (currently HTML 4). - Note that it is suggested that the more specific formats ("xhtml1" - and "html4") be used as "xhtml" or "html" may change in the future - if it makes sense at that time. - - Returns: An HTML document as a string. - - """ - md = Markdown(extensions=load_extensions(extensions), - safe_mode=safe_mode, - output_format=output_format) - return md.convert(text) - - -def markdownFromFile(input = None, - output = None, - extensions = [], - encoding = None, - safe_mode = False, - output_format = DEFAULT_OUTPUT_FORMAT): - """Read markdown code from a file and write it to a file or a stream.""" - md = Markdown(extensions=load_extensions(extensions), - safe_mode=safe_mode, - output_format=output_format) - md.convertFile(input, output, encoding) - - - diff --git a/website/markdown/blockparser.py b/website/markdown/blockparser.py deleted file mode 100644 index e18b33848..000000000 --- a/website/markdown/blockparser.py +++ /dev/null @@ -1,95 +0,0 @@ - -import markdown - -class State(list): - """ Track the current and nested state of the parser. - - This utility class is used to track the state of the BlockParser and - support multiple levels if nesting. It's just a simple API wrapped around - a list. Each time a state is set, that state is appended to the end of the - list. Each time a state is reset, that state is removed from the end of - the list. - - Therefore, each time a state is set for a nested block, that state must be - reset when we back out of that level of nesting or the state could be - corrupted. - - While all the methods of a list object are available, only the three - defined below need be used. - - """ - - def set(self, state): - """ Set a new state. """ - self.append(state) - - def reset(self): - """ Step back one step in nested state. """ - self.pop() - - def isstate(self, state): - """ Test that top (current) level is of given state. """ - if len(self): - return self[-1] == state - else: - return False - -class BlockParser: - """ Parse Markdown blocks into an ElementTree object. - - A wrapper class that stitches the various BlockProcessors together, - looping through them and creating an ElementTree object. - """ - - def __init__(self): - self.blockprocessors = markdown.odict.OrderedDict() - self.state = State() - - def parseDocument(self, lines): - """ Parse a markdown document into an ElementTree. - - Given a list of lines, an ElementTree object (not just a parent Element) - is created and the root element is passed to the parser as the parent. - The ElementTree object is returned. - - This should only be called on an entire document, not pieces. - - """ - # Create a ElementTree from the lines - self.root = markdown.etree.Element(markdown.DOC_TAG) - self.parseChunk(self.root, '\n'.join(lines)) - return markdown.etree.ElementTree(self.root) - - def parseChunk(self, parent, text): - """ Parse a chunk of markdown text and attach to given etree node. - - While the ``text`` argument is generally assumed to contain multiple - blocks which will be split on blank lines, it could contain only one - block. Generally, this method would be called by extensions when - block parsing is required. - - The ``parent`` etree Element passed in is altered in place. - Nothing is returned. - - """ - self.parseBlocks(parent, text.split('\n\n')) - - def parseBlocks(self, parent, blocks): - """ Process blocks of markdown text and attach to given etree node. - - Given a list of ``blocks``, each blockprocessor is stepped through - until there are no blocks left. While an extension could potentially - call this method directly, it's generally expected to be used internally. - - This is a public method as an extension may need to add/alter additional - BlockProcessors which call this method to recursively parse a nested - block. - - """ - while blocks: - for processor in self.blockprocessors.values(): - if processor.test(parent, blocks[0]): - processor.run(parent, blocks) - break - - diff --git a/website/markdown/blockprocessors.py b/website/markdown/blockprocessors.py deleted file mode 100644 index 79f4db93b..000000000 --- a/website/markdown/blockprocessors.py +++ /dev/null @@ -1,460 +0,0 @@ -""" -CORE MARKDOWN BLOCKPARSER -============================================================================= - -This parser handles basic parsing of Markdown blocks. It doesn't concern itself -with inline elements such as **bold** or *italics*, but rather just catches -blocks, lists, quotes, etc. - -The BlockParser is made up of a bunch of BlockProssors, each handling a -different type of block. Extensions may add/replace/remove BlockProcessors -as they need to alter how markdown blocks are parsed. - -""" - -import re -import markdown - -class BlockProcessor: - """ Base class for block processors. - - Each subclass will provide the methods below to work with the source and - tree. Each processor will need to define it's own ``test`` and ``run`` - methods. The ``test`` method should return True or False, to indicate - whether the current block should be processed by this processor. If the - test passes, the parser will call the processors ``run`` method. - - """ - - def __init__(self, parser=None): - self.parser = parser - - def lastChild(self, parent): - """ Return the last child of an etree element. """ - if len(parent): - return parent[-1] - else: - return None - - def detab(self, text): - """ Remove a tab from the front of each line of the given text. """ - newtext = [] - lines = text.split('\n') - for line in lines: - if line.startswith(' '*markdown.TAB_LENGTH): - newtext.append(line[markdown.TAB_LENGTH:]) - elif not line.strip(): - newtext.append('') - else: - break - return '\n'.join(newtext), '\n'.join(lines[len(newtext):]) - - def looseDetab(self, text, level=1): - """ Remove a tab from front of lines but allowing dedented lines. """ - lines = text.split('\n') - for i in range(len(lines)): - if lines[i].startswith(' '*markdown.TAB_LENGTH*level): - lines[i] = lines[i][markdown.TAB_LENGTH*level:] - return '\n'.join(lines) - - def test(self, parent, block): - """ Test for block type. Must be overridden by subclasses. - - As the parser loops through processors, it will call the ``test`` method - on each to determine if the given block of text is of that type. This - method must return a boolean ``True`` or ``False``. The actual method of - testing is left to the needs of that particular block type. It could - be as simple as ``block.startswith(some_string)`` or a complex regular - expression. As the block type may be different depending on the parent - of the block (i.e. inside a list), the parent etree element is also - provided and may be used as part of the test. - - Keywords: - - * ``parent``: A etree element which will be the parent of the block. - * ``block``: A block of text from the source which has been split at - blank lines. - """ - pass - - def run(self, parent, blocks): - """ Run processor. Must be overridden by subclasses. - - When the parser determines the appropriate type of a block, the parser - will call the corresponding processor's ``run`` method. This method - should parse the individual lines of the block and append them to - the etree. - - Note that both the ``parent`` and ``etree`` keywords are pointers - to instances of the objects which should be edited in place. Each - processor must make changes to the existing objects as there is no - mechanism to return new/different objects to replace them. - - This means that this method should be adding SubElements or adding text - to the parent, and should remove (``pop``) or add (``insert``) items to - the list of blocks. - - Keywords: - - * ``parent``: A etree element which is the parent of the current block. - * ``blocks``: A list of all remaining blocks of the document. - """ - pass - - -class ListIndentProcessor(BlockProcessor): - """ Process children of list items. - - Example: - * a list item - process this part - - or this part - - """ - - INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH) - ITEM_TYPES = ['li'] - LIST_TYPES = ['ul', 'ol'] - - def test(self, parent, block): - return block.startswith(' '*markdown.TAB_LENGTH) and \ - not self.parser.state.isstate('detabbed') and \ - (parent.tag in self.ITEM_TYPES or \ - (len(parent) and parent[-1] and \ - (parent[-1].tag in self.LIST_TYPES) - ) - ) - - def run(self, parent, blocks): - block = blocks.pop(0) - level, sibling = self.get_level(parent, block) - block = self.looseDetab(block, level) - - self.parser.state.set('detabbed') - if parent.tag in self.ITEM_TYPES: - # The parent is already a li. Just parse the child block. - self.parser.parseBlocks(parent, [block]) - elif sibling.tag in self.ITEM_TYPES: - # The sibling is a li. Use it as parent. - self.parser.parseBlocks(sibling, [block]) - elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES: - # The parent is a list (``ol`` or ``ul``) which has children. - # Assume the last child li is the parent of this block. - if sibling[-1].text: - # If the parent li has text, that text needs to be moved to a p - block = '%s\n\n%s' % (sibling[-1].text, block) - sibling[-1].text = '' - self.parser.parseChunk(sibling[-1], block) - else: - self.create_item(sibling, block) - self.parser.state.reset() - - def create_item(self, parent, block): - """ Create a new li and parse the block with it as the parent. """ - li = markdown.etree.SubElement(parent, 'li') - self.parser.parseBlocks(li, [block]) - - def get_level(self, parent, block): - """ Get level of indent based on list level. """ - # Get indent level - m = self.INDENT_RE.match(block) - if m: - indent_level = len(m.group(1))/markdown.TAB_LENGTH - else: - indent_level = 0 - if self.parser.state.isstate('list'): - # We're in a tightlist - so we already are at correct parent. - level = 1 - else: - # We're in a looselist - so we need to find parent. - level = 0 - # Step through children of tree to find matching indent level. - while indent_level > level: - child = self.lastChild(parent) - if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES): - if child.tag in self.LIST_TYPES: - level += 1 - parent = child - else: - # No more child levels. If we're short of indent_level, - # we have a code block. So we stop here. - break - return level, parent - - -class CodeBlockProcessor(BlockProcessor): - """ Process code blocks. """ - - def test(self, parent, block): - return block.startswith(' '*markdown.TAB_LENGTH) - - def run(self, parent, blocks): - sibling = self.lastChild(parent) - block = blocks.pop(0) - theRest = '' - if sibling and sibling.tag == "pre" and len(sibling) \ - and sibling[0].tag == "code": - # The previous block was a code block. As blank lines do not start - # new code blocks, append this block to the previous, adding back - # linebreaks removed from the split into a list. - code = sibling[0] - block, theRest = self.detab(block) - code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip())) - else: - # This is a new codeblock. Create the elements and insert text. - pre = markdown.etree.SubElement(parent, 'pre') - code = markdown.etree.SubElement(pre, 'code') - block, theRest = self.detab(block) - code.text = markdown.AtomicString('%s\n' % block.rstrip()) - if theRest: - # This block contained unindented line(s) after the first indented - # line. Insert these lines as the first block of the master blocks - # list for future processing. - blocks.insert(0, theRest) - - -class BlockQuoteProcessor(BlockProcessor): - - RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') - - def test(self, parent, block): - return bool(self.RE.search(block)) - - def run(self, parent, blocks): - block = blocks.pop(0) - m = self.RE.search(block) - if m: - before = block[:m.start()] # Lines before blockquote - # Pass lines before blockquote in recursively for parsing forst. - self.parser.parseBlocks(parent, [before]) - # Remove ``> `` from begining of each line. - block = '\n'.join([self.clean(line) for line in - block[m.start():].split('\n')]) - sibling = self.lastChild(parent) - if sibling and sibling.tag == "blockquote": - # Previous block was a blockquote so set that as this blocks parent - quote = sibling - else: - # This is a new blockquote. Create a new parent element. - quote = markdown.etree.SubElement(parent, 'blockquote') - # Recursively parse block with blockquote as parent. - self.parser.parseChunk(quote, block) - - def clean(self, line): - """ Remove ``>`` from beginning of a line. """ - m = self.RE.match(line) - if line.strip() == ">": - return "" - elif m: - return m.group(2) - else: - return line - -class OListProcessor(BlockProcessor): - """ Process ordered list blocks. """ - - TAG = 'ol' - # Detect an item (``1. item``). ``group(1)`` contains contents of item. - RE = re.compile(r'^[ ]{0,3}\d+\.[ ](.*)') - # Detect items on secondary lines. they can be of either list type. - CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ](.*)') - # Detect indented (nested) items of either type - INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ].*') - - def test(self, parent, block): - return bool(self.RE.match(block)) - - def run(self, parent, blocks): - # Check fr multiple items in one block. - items = self.get_items(blocks.pop(0)) - sibling = self.lastChild(parent) - if sibling and sibling.tag in ['ol', 'ul']: - # Previous block was a list item, so set that as parent - lst = sibling - # make sure previous item is in a p. - if len(lst) and lst[-1].text and not len(lst[-1]): - p = markdown.etree.SubElement(lst[-1], 'p') - p.text = lst[-1].text - lst[-1].text = '' - # parse first block differently as it gets wrapped in a p. - li = markdown.etree.SubElement(lst, 'li') - self.parser.state.set('looselist') - firstitem = items.pop(0) - self.parser.parseBlocks(li, [firstitem]) - self.parser.state.reset() - else: - # This is a new list so create parent with appropriate tag. - lst = markdown.etree.SubElement(parent, self.TAG) - self.parser.state.set('list') - # Loop through items in block, recursively parsing each with the - # appropriate parent. - for item in items: - if item.startswith(' '*markdown.TAB_LENGTH): - # Item is indented. Parse with last item as parent - self.parser.parseBlocks(lst[-1], [item]) - else: - # New item. Create li and parse with it as parent - li = markdown.etree.SubElement(lst, 'li') - self.parser.parseBlocks(li, [item]) - self.parser.state.reset() - - def get_items(self, block): - """ Break a block into list items. """ - items = [] - for line in block.split('\n'): - m = self.CHILD_RE.match(line) - if m: - # This is a new item. Append - items.append(m.group(3)) - elif self.INDENT_RE.match(line): - # This is an indented (possibly nested) item. - if items[-1].startswith(' '*markdown.TAB_LENGTH): - # Previous item was indented. Append to that item. - items[-1] = '%s\n%s' % (items[-1], line) - else: - items.append(line) - else: - # This is another line of previous item. Append to that item. - items[-1] = '%s\n%s' % (items[-1], line) - return items - - -class UListProcessor(OListProcessor): - """ Process unordered list blocks. """ - - TAG = 'ul' - RE = re.compile(r'^[ ]{0,3}[*+-][ ](.*)') - - -class HashHeaderProcessor(BlockProcessor): - """ Process Hash Headers. """ - - # Detect a header at start of any line in block - RE = re.compile(r'(^|\n)(?P#{1,6})(?P
.*?)#*(\n|$)') - - def test(self, parent, block): - return bool(self.RE.search(block)) - - def run(self, parent, blocks): - block = blocks.pop(0) - m = self.RE.search(block) - if m: - before = block[:m.start()] # All lines before header - after = block[m.end():] # All lines after header - if before: - # As the header was not the first line of the block and the - # lines before the header must be parsed first, - # recursively parse this lines as a block. - self.parser.parseBlocks(parent, [before]) - # Create header using named groups from RE - h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level'))) - h.text = m.group('header').strip() - if after: - # Insert remaining lines as first block for future parsing. - blocks.insert(0, after) - else: - # This should never happen, but just in case... - message(CRITICAL, "We've got a problem header!") - - -class SetextHeaderProcessor(BlockProcessor): - """ Process Setext-style Headers. """ - - # Detect Setext-style header. Must be first 2 lines of block. - RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE) - - def test(self, parent, block): - return bool(self.RE.match(block)) - - def run(self, parent, blocks): - lines = blocks.pop(0).split('\n') - # Determine level. ``=`` is 1 and ``-`` is 2. - if lines[1].startswith('='): - level = 1 - else: - level = 2 - h = markdown.etree.SubElement(parent, 'h%d' % level) - h.text = lines[0].strip() - if len(lines) > 2: - # Block contains additional lines. Add to master blocks for later. - blocks.insert(0, '\n'.join(lines[2:])) - - -class HRProcessor(BlockProcessor): - """ Process Horizontal Rules. """ - - RE = r'[ ]{0,3}(?P[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*' - # Detect hr on any line of a block. - SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE) - # Match a hr on a single line of text. - MATCH_RE = re.compile(r'^%s$' % RE) - - def test(self, parent, block): - return bool(self.SEARCH_RE.search(block)) - - def run(self, parent, blocks): - lines = blocks.pop(0).split('\n') - prelines = [] - # Check for lines in block before hr. - for line in lines: - m = self.MATCH_RE.match(line) - if m: - break - else: - prelines.append(line) - if len(prelines): - # Recursively parse lines before hr so they get parsed first. - self.parser.parseBlocks(parent, ['\n'.join(prelines)]) - # create hr - hr = markdown.etree.SubElement(parent, 'hr') - # check for lines in block after hr. - lines = lines[len(prelines)+1:] - if len(lines): - # Add lines after hr to master blocks for later parsing. - blocks.insert(0, '\n'.join(lines)) - - -class EmptyBlockProcessor(BlockProcessor): - """ Process blocks and start with an empty line. """ - - # Detect a block that only contains whitespace - # or only whitespace on the first line. - RE = re.compile(r'^\s*\n') - - def test(self, parent, block): - return bool(self.RE.match(block)) - - def run(self, parent, blocks): - block = blocks.pop(0) - m = self.RE.match(block) - if m: - # Add remaining line to master blocks for later. - blocks.insert(0, block[m.end():]) - sibling = self.lastChild(parent) - if sibling and sibling.tag == 'pre' and sibling[0] and \ - sibling[0].tag == 'code': - # Last block is a codeblock. Append to preserve whitespace. - sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text ) - - -class ParagraphProcessor(BlockProcessor): - """ Process Paragraph blocks. """ - - def test(self, parent, block): - return True - - def run(self, parent, blocks): - block = blocks.pop(0) - if block.strip(): - # Not a blank block. Add to parent, otherwise throw it away. - if self.parser.state.isstate('list'): - # The parent is a tight-list. Append to parent.text - if parent.text: - parent.text = '%s\n%s' % (parent.text, block) - else: - parent.text = block.lstrip() - else: - # Create a regular paragraph - p = markdown.etree.SubElement(parent, 'p') - p.text = block.lstrip() diff --git a/website/markdown/commandline.py b/website/markdown/commandline.py deleted file mode 100644 index 1eedc6dbb..000000000 --- a/website/markdown/commandline.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -COMMAND-LINE SPECIFIC STUFF -============================================================================= - -The rest of the code is specifically for handling the case where Python -Markdown is called from the command line. -""" - -import markdown -import sys -import logging -from logging import DEBUG, INFO, WARN, ERROR, CRITICAL - -EXECUTABLE_NAME_FOR_USAGE = "python markdown.py" -""" The name used in the usage statement displayed for python versions < 2.3. -(With python 2.3 and higher the usage statement is generated by optparse -and uses the actual name of the executable called.) """ - -OPTPARSE_WARNING = """ -Python 2.3 or higher required for advanced command line options. -For lower versions of Python use: - - %s INPUT_FILE > OUTPUT_FILE - -""" % EXECUTABLE_NAME_FOR_USAGE - -def parse_options(): - """ - Define and parse `optparse` options for command-line usage. - """ - - try: - optparse = __import__("optparse") - except: - if len(sys.argv) == 2: - return {'input': sys.argv[1], - 'output': None, - 'safe': False, - 'extensions': [], - 'encoding': None }, CRITICAL - else: - print OPTPARSE_WARNING - return None, None - - parser = optparse.OptionParser(usage="%prog INPUTFILE [options]") - parser.add_option("-f", "--file", dest="filename", default=sys.stdout, - help="write output to OUTPUT_FILE", - metavar="OUTPUT_FILE") - parser.add_option("-e", "--encoding", dest="encoding", - help="encoding for input and output files",) - parser.add_option("-q", "--quiet", default = CRITICAL, - action="store_const", const=CRITICAL+10, dest="verbose", - help="suppress all messages") - parser.add_option("-v", "--verbose", - action="store_const", const=INFO, dest="verbose", - help="print info messages") - parser.add_option("-s", "--safe", dest="safe", default=False, - metavar="SAFE_MODE", - help="safe mode ('replace', 'remove' or 'escape' user's HTML tag)") - parser.add_option("-o", "--output_format", dest="output_format", - default='xhtml1', metavar="OUTPUT_FORMAT", - help="Format of output. One of 'xhtml1' (default) or 'html4'.") - parser.add_option("--noisy", - action="store_const", const=DEBUG, dest="verbose", - help="print debug messages") - parser.add_option("-x", "--extension", action="append", dest="extensions", - help = "load extension EXTENSION", metavar="EXTENSION") - - (options, args) = parser.parse_args() - - if not len(args) == 1: - parser.print_help() - return None, None - else: - input_file = args[0] - - if not options.extensions: - options.extensions = [] - - return {'input': input_file, - 'output': options.filename, - 'safe_mode': options.safe, - 'extensions': options.extensions, - 'encoding': options.encoding, - 'output_format': options.output_format}, options.verbose - -def run(): - """Run Markdown from the command line.""" - - # Parse options and adjust logging level if necessary - options, logging_level = parse_options() - if not options: sys.exit(0) - if logging_level: logging.getLogger('MARKDOWN').setLevel(logging_level) - - # Run - markdown.markdownFromFile(**options) diff --git a/website/markdown/etree_loader.py b/website/markdown/etree_loader.py deleted file mode 100644 index e2599b2cb..000000000 --- a/website/markdown/etree_loader.py +++ /dev/null @@ -1,33 +0,0 @@ - -from markdown import message, CRITICAL -import sys - -## Import -def importETree(): - """Import the best implementation of ElementTree, return a module object.""" - etree_in_c = None - try: # Is it Python 2.5+ with C implemenation of ElementTree installed? - import xml.etree.cElementTree as etree_in_c - except ImportError: - try: # Is it Python 2.5+ with Python implementation of ElementTree? - import xml.etree.ElementTree as etree - except ImportError: - try: # An earlier version of Python with cElementTree installed? - import cElementTree as etree_in_c - except ImportError: - try: # An earlier version of Python with Python ElementTree? - import elementtree.ElementTree as etree - except ImportError: - message(CRITICAL, "Failed to import ElementTree") - sys.exit(1) - if etree_in_c and etree_in_c.VERSION < "1.0": - message(CRITICAL, "For cElementTree version 1.0 or higher is required.") - sys.exit(1) - elif etree_in_c : - return etree_in_c - elif etree.VERSION < "1.1": - message(CRITICAL, "For ElementTree version 1.1 or higher is required") - sys.exit(1) - else : - return etree - diff --git a/website/markdown/extensions/__init__.py b/website/markdown/extensions/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/website/markdown/extensions/toc.py b/website/markdown/extensions/toc.py deleted file mode 100644 index 3afaea048..000000000 --- a/website/markdown/extensions/toc.py +++ /dev/null @@ -1,140 +0,0 @@ -""" -Table of Contents Extension for Python-Markdown -* * * - -(c) 2008 [Jack Miller](http://codezen.org) - -Dependencies: -* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) - -""" -import markdown -from markdown import etree -import re - -class TocTreeprocessor(markdown.treeprocessors.Treeprocessor): - # Iterator wrapper to get parent and child all at once - def iterparent(self, root): - for parent in root.getiterator(): - for child in parent: - yield parent, child - - def run(self, doc): - div = etree.Element("div") - div.attrib["class"] = "toc" - last_li = None - - # Add title to the div - if self.config["title"][0]: - header = etree.SubElement(div, "span") - header.attrib["class"] = "toctitle" - header.text = self.config["title"][0] - - level = 0 - list_stack=[div] - header_rgx = re.compile("[Hh][123456]") - - # Get a list of id attributes - used_ids = [] - for c in doc.getiterator(): - if "id" in c.attrib: - used_ids.append(c.attrib["id"]) - - for (p, c) in self.iterparent(doc): - if not c.text: - continue - - # To keep the output from screwing up the - # validation by putting a
inside of a

- # we actually replace the

in its entirety. - # We do not allow the marker inside a header as that - # would causes an enless loop of placing a new TOC - # inside previously generated TOC. - - if c.text.find(self.config["marker"][0]) > -1 and not header_rgx.match(c.tag): - for i in range(len(p)): - if p[i] == c: - p[i] = div - break - - if header_rgx.match(c.tag): - tag_level = int(c.tag[-1]) - - # Regardless of how many levels we jumped - # only one list should be created, since - # empty lists containing lists are illegal. - - if tag_level < level: - list_stack.pop() - level = tag_level - - if tag_level > level: - newlist = etree.Element("ul") - if last_li: - last_li.append(newlist) - else: - list_stack[-1].append(newlist) - list_stack.append(newlist) - level = tag_level - - # Do not override pre-existing ids - if not "id" in c.attrib: - id = self.config["slugify"][0](c.text) - if id in used_ids: - ctr = 1 - while "%s_%d" % (id, ctr) in used_ids: - ctr += 1 - id = "%s_%d" % (id, ctr) - used_ids.append(id) - c.attrib["id"] = id - else: - id = c.attrib["id"] - - # List item link, to be inserted into the toc div - last_li = etree.Element("li") - link = etree.SubElement(last_li, "a") - link.text = c.text - link.attrib["href"] = '#' + id - - if int(self.config["anchorlink"][0]): - anchor = etree.SubElement(c, "a") - anchor.text = c.text - anchor.attrib["href"] = "#" + id - anchor.attrib["class"] = "toclink" - c.text = "" - - list_stack[-1].append(last_li) - -class TocExtension(markdown.Extension): - def __init__(self, configs): - self.config = { "marker" : ["[TOC]", - "Text to find and replace with Table of Contents -" - "Defaults to \"[TOC]\""], - "slugify" : [self.slugify, - "Function to generate anchors based on header text-" - "Defaults to a built in slugify function."], - "title" : [None, - "Title to insert into TOC

- " - "Defaults to None"], - "anchorlink" : [0, - "1 if header should be a self link" - "Defaults to 0"]} - - for key, value in configs: - self.setConfig(key, value) - - # This is exactly the same as Django's slugify - def slugify(self, value): - """ Slugify a string, to make it URL friendly. """ - import unicodedata - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) - return re.sub('[-\s]+','-',value) - - def extendMarkdown(self, md, md_globals): - tocext = TocTreeprocessor(md) - tocext.config = self.config - md.treeprocessors.add("toc", tocext, "_begin") - -def makeExtension(configs={}): - return TocExtension(configs=configs) diff --git a/website/markdown/html4.py b/website/markdown/html4.py deleted file mode 100644 index 08f241d57..000000000 --- a/website/markdown/html4.py +++ /dev/null @@ -1,274 +0,0 @@ -# markdown/html4.py -# -# Add html4 serialization to older versions of Elementree -# Taken from ElementTree 1.3 preview with slight modifications -# -# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2007 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - - -import markdown -ElementTree = markdown.etree.ElementTree -QName = markdown.etree.QName -Comment = markdown.etree.Comment -PI = markdown.etree.PI -ProcessingInstruction = markdown.etree.ProcessingInstruction - -HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", - "img", "input", "isindex", "link", "meta" "param") - -try: - HTML_EMPTY = set(HTML_EMPTY) -except NameError: - pass - -_namespace_map = { - # "well-known" namespace prefixes - "http://www.w3.org/XML/1998/namespace": "xml", - "http://www.w3.org/1999/xhtml": "html", - "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", - "http://schemas.xmlsoap.org/wsdl/": "wsdl", - # xml schema - "http://www.w3.org/2001/XMLSchema": "xs", - "http://www.w3.org/2001/XMLSchema-instance": "xsi", - # dublic core - "http://purl.org/dc/elements/1.1/": "dc", -} - - -def _raise_serialization_error(text): - raise TypeError( - "cannot serialize %r (type %s)" % (text, type(text).__name__) - ) - -def _encode(text, encoding): - try: - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_cdata(text, encoding): - # escape character data - try: - # it's worth avoiding do-nothing calls for strings that are - # shorter than 500 character, or so. assume that's, by far, - # the most common case in most applications. - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - - -def _escape_attrib(text, encoding): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - if "\n" in text: - text = text.replace("\n", " ") - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_attrib_html(text, encoding): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - - -def _serialize_html(write, elem, encoding, qnames, namespaces): - tag = elem.tag - text = elem.text - if tag is Comment: - write("" % _escape_cdata(text, encoding)) - elif tag is ProcessingInstruction: - write("" % _escape_cdata(text, encoding)) - else: - tag = qnames[tag] - if tag is None: - if text: - write(_escape_cdata(text, encoding)) - for e in elem: - _serialize_html(write, e, encoding, qnames, None) - else: - write("<" + tag) - items = elem.items() - if items or namespaces: - items.sort() # lexical order - for k, v in items: - if isinstance(k, QName): - k = k.text - if isinstance(v, QName): - v = qnames[v.text] - else: - v = _escape_attrib_html(v, encoding) - # FIXME: handle boolean attributes - write(" %s=\"%s\"" % (qnames[k], v)) - if namespaces: - items = namespaces.items() - items.sort(key=lambda x: x[1]) # sort on prefix - for v, k in items: - if k: - k = ":" + k - write(" xmlns%s=\"%s\"" % ( - k.encode(encoding), - _escape_attrib(v, encoding) - )) - write(">") - tag = tag.lower() - if text: - if tag == "script" or tag == "style": - write(_encode(text, encoding)) - else: - write(_escape_cdata(text, encoding)) - for e in elem: - _serialize_html(write, e, encoding, qnames, None) - if tag not in HTML_EMPTY: - write("") - if elem.tail: - write(_escape_cdata(elem.tail, encoding)) - -def write_html(root, f, - # keyword arguments - encoding="us-ascii", - default_namespace=None): - assert root is not None - if not hasattr(f, "write"): - f = open(f, "wb") - write = f.write - if not encoding: - encoding = "us-ascii" - qnames, namespaces = _namespaces( - root, encoding, default_namespace - ) - _serialize_html( - write, root, encoding, qnames, namespaces - ) - -# -------------------------------------------------------------------- -# serialization support - -def _namespaces(elem, encoding, default_namespace=None): - # identify namespaces used in this tree - - # maps qnames to *encoded* prefix:local names - qnames = {None: None} - - # maps uri:s to prefixes - namespaces = {} - if default_namespace: - namespaces[default_namespace] = "" - - def encode(text): - return text.encode(encoding) - - def add_qname(qname): - # calculate serialized qname representation - try: - if qname[:1] == "{": - uri, tag = qname[1:].split("}", 1) - prefix = namespaces.get(uri) - if prefix is None: - prefix = _namespace_map.get(uri) - if prefix is None: - prefix = "ns%d" % len(namespaces) - if prefix != "xml": - namespaces[uri] = prefix - if prefix: - qnames[qname] = encode("%s:%s" % (prefix, tag)) - else: - qnames[qname] = encode(tag) # default element - else: - if default_namespace: - # FIXME: can this be handled in XML 1.0? - raise ValueError( - "cannot use non-qualified names with " - "default_namespace option" - ) - qnames[qname] = encode(qname) - except TypeError: - _raise_serialization_error(qname) - - # populate qname and namespaces table - try: - iterate = elem.iter - except AttributeError: - iterate = elem.getiterator # cET compatibility - for elem in iterate(): - tag = elem.tag - if isinstance(tag, QName) and tag.text not in qnames: - add_qname(tag.text) - elif isinstance(tag, basestring): - if tag not in qnames: - add_qname(tag) - elif tag is not None and tag is not Comment and tag is not PI: - _raise_serialization_error(tag) - for key, value in elem.items(): - if isinstance(key, QName): - key = key.text - if key not in qnames: - add_qname(key) - if isinstance(value, QName) and value.text not in qnames: - add_qname(value.text) - text = elem.text - if isinstance(text, QName) and text.text not in qnames: - add_qname(text.text) - return qnames, namespaces - -def to_html_string(element, encoding=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - write_html(ElementTree(element).getroot(),file,encoding) - return "".join(data) diff --git a/website/markdown/inlinepatterns.py b/website/markdown/inlinepatterns.py deleted file mode 100644 index 89fa3b2ef..000000000 --- a/website/markdown/inlinepatterns.py +++ /dev/null @@ -1,371 +0,0 @@ -""" -INLINE PATTERNS -============================================================================= - -Inline patterns such as *emphasis* are handled by means of auxiliary -objects, one per pattern. Pattern objects must be instances of classes -that extend markdown.Pattern. Each pattern object uses a single regular -expression and needs support the following methods: - - pattern.getCompiledRegExp() # returns a regular expression - - pattern.handleMatch(m) # takes a match object and returns - # an ElementTree element or just plain text - -All of python markdown's built-in patterns subclass from Pattern, -but you can add additional patterns that don't. - -Also note that all the regular expressions used by inline must -capture the whole block. For this reason, they all start with -'^(.*)' and end with '(.*)!'. In case with built-in expression -Pattern takes care of adding the "^(.*)" and "(.*)!". - -Finally, the order in which regular expressions are applied is very -important - e.g. if we first replace http://.../ links with tags -and _then_ try to replace inline html, we would end up with a mess. -So, we apply the expressions in the following order: - -* escape and backticks have to go before everything else, so - that we can preempt any markdown patterns by escaping them. - -* then we handle auto-links (must be done before inline html) - -* then we handle inline HTML. At this point we will simply - replace all inline HTML strings with a placeholder and add - the actual HTML to a hash. - -* then inline images (must be done before links) - -* then bracketed links, first regular then reference-style - -* finally we apply strong and emphasis -""" - -import markdown -import re -from urlparse import urlparse, urlunparse -import sys -if sys.version >= "3.0": - from html import entities as htmlentitydefs -else: - import htmlentitydefs - -""" -The actual regular expressions for patterns ------------------------------------------------------------------------------ -""" - -NOBRACKET = r'[^\]\[]*' -BRK = ( r'\[(' - + (NOBRACKET + r'(\[')*6 - + (NOBRACKET+ r'\])*')*6 - + NOBRACKET + r')\]' ) -NOIMG = r'(?|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*)\12)?\)''' -# [text](url) or [text]() - -IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^\)]*))\)' -# ![alttxt](http://x.com/) or ![alttxt]() -REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]' # [Google][3] -IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2] -NOT_STRONG_RE = r'( \* )' # stand-alone * or _ -AUTOLINK_RE = r'<((?:f|ht)tps?://[^>]*)>' # -AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # - -HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...> -ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # & -LINE_BREAK_RE = r' \n' # two spaces at end of line -LINE_BREAK_2_RE = r' $' # two spaces at end of text - - -def dequote(string): - """Remove quotes from around a string.""" - if ( ( string.startswith('"') and string.endswith('"')) - or (string.startswith("'") and string.endswith("'")) ): - return string[1:-1] - else: - return string - -ATTR_RE = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123} - -def handleAttributes(text, parent): - """Set values of an element based on attribute definitions ({@id=123}).""" - def attributeCallback(match): - parent.set(match.group(1), match.group(2).replace('\n', ' ')) - return ATTR_RE.sub(attributeCallback, text) - - -""" -The pattern classes ------------------------------------------------------------------------------ -""" - -class Pattern: - """Base class that inline patterns subclass. """ - - def __init__ (self, pattern, markdown_instance=None): - """ - Create an instant of an inline pattern. - - Keyword arguments: - - * pattern: A regular expression that matches a pattern - - """ - self.pattern = pattern - self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, re.DOTALL) - - # Api for Markdown to pass safe_mode into instance - self.safe_mode = False - if markdown_instance: - self.markdown = markdown_instance - - def getCompiledRegExp (self): - """ Return a compiled regular expression. """ - return self.compiled_re - - def handleMatch(self, m): - """Return a ElementTree element from the given match. - - Subclasses should override this method. - - Keyword arguments: - - * m: A re match object containing a match of the pattern. - - """ - pass - - def type(self): - """ Return class name, to define pattern type """ - return self.__class__.__name__ - -BasePattern = Pattern # for backward compatibility - -class SimpleTextPattern (Pattern): - """ Return a simple text of group(2) of a Pattern. """ - def handleMatch(self, m): - text = m.group(2) - if text == markdown.INLINE_PLACEHOLDER_PREFIX: - return None - return text - -class SimpleTagPattern (Pattern): - """ - Return element of type `tag` with a text attribute of group(3) - of a Pattern. - - """ - def __init__ (self, pattern, tag): - Pattern.__init__(self, pattern) - self.tag = tag - - def handleMatch(self, m): - el = markdown.etree.Element(self.tag) - el.text = m.group(3) - return el - - -class SubstituteTagPattern (SimpleTagPattern): - """ Return a eLement of type `tag` with no children. """ - def handleMatch (self, m): - return markdown.etree.Element(self.tag) - - -class BacktickPattern (Pattern): - """ Return a `` element containing the matching text. """ - def __init__ (self, pattern): - Pattern.__init__(self, pattern) - self.tag = "code" - - def handleMatch(self, m): - el = markdown.etree.Element(self.tag) - el.text = markdown.AtomicString(m.group(3).strip()) - return el - - -class DoubleTagPattern (SimpleTagPattern): - """Return a ElementTree element nested in tag2 nested in tag1. - - Useful for strong emphasis etc. - - """ - def handleMatch(self, m): - tag1, tag2 = self.tag.split(",") - el1 = markdown.etree.Element(tag1) - el2 = markdown.etree.SubElement(el1, tag2) - el2.text = m.group(3) - return el1 - - -class HtmlPattern (Pattern): - """ Store raw inline html and return a placeholder. """ - def handleMatch (self, m): - rawhtml = m.group(2) - inline = True - place_holder = self.markdown.htmlStash.store(rawhtml) - return place_holder - - -class LinkPattern (Pattern): - """ Return a link element from the given match. """ - def handleMatch(self, m): - el = markdown.etree.Element("a") - el.text = m.group(2) - title = m.group(11) - href = m.group(9) - - if href: - if href[0] == "<": - href = href[1:-1] - el.set("href", self.sanitize_url(href.strip())) - else: - el.set("href", "") - - if title: - title = dequote(title) #.replace('"', """) - el.set("title", title) - return el - - def sanitize_url(self, url): - """ - Sanitize a url against xss attacks in "safe_mode". - - Rather than specifically blacklisting `javascript:alert("XSS")` and all - its aliases (see ), we whitelist known - safe url formats. Most urls contain a network location, however some - are known not to (i.e.: mailto links). Script urls do not contain a - location. Additionally, for `javascript:...`, the scheme would be - "javascript" but some aliases will appear to `urlparse()` to have no - scheme. On top of that relative links (i.e.: "foo/bar.html") have no - scheme. Therefore we must check "path", "parameters", "query" and - "fragment" for any literal colons. We don't check "scheme" for colons - because it *should* never have any and "netloc" must allow the form: - `username:password@host:port`. - - """ - locless_schemes = ['', 'mailto', 'news'] - scheme, netloc, path, params, query, fragment = url = urlparse(url) - safe_url = False - if netloc != '' or scheme in locless_schemes: - safe_url = True - - for part in url[2:]: - if ":" in part: - safe_url = False - - if self.markdown.safeMode and not safe_url: - return '' - else: - return urlunparse(url) - -class ImagePattern(LinkPattern): - """ Return a img element from the given match. """ - def handleMatch(self, m): - el = markdown.etree.Element("img") - src_parts = m.group(9).split() - if src_parts: - src = src_parts[0] - if src[0] == "<" and src[-1] == ">": - src = src[1:-1] - el.set('src', self.sanitize_url(src)) - else: - el.set('src', "") - if len(src_parts) > 1: - el.set('title', dequote(" ".join(src_parts[1:]))) - - if markdown.ENABLE_ATTRIBUTES: - truealt = handleAttributes(m.group(2), el) - else: - truealt = m.group(2) - - el.set('alt', truealt) - return el - -class ReferencePattern(LinkPattern): - """ Match to a stored reference and return link element. """ - def handleMatch(self, m): - if m.group(9): - id = m.group(9).lower() - else: - # if we got something like "[Google][]" - # we'll use "google" as the id - id = m.group(2).lower() - - if not id in self.markdown.references: # ignore undefined refs - return None - href, title = self.markdown.references[id] - - text = m.group(2) - return self.makeTag(href, title, text) - - def makeTag(self, href, title, text): - el = markdown.etree.Element('a') - - el.set('href', self.sanitize_url(href)) - if title: - el.set('title', title) - - el.text = text - return el - - -class ImageReferencePattern (ReferencePattern): - """ Match to a stored reference and return img element. """ - def makeTag(self, href, title, text): - el = markdown.etree.Element("img") - el.set("src", self.sanitize_url(href)) - if title: - el.set("title", title) - el.set("alt", text) - return el - - -class AutolinkPattern (Pattern): - """ Return a link Element given an autolink (``). """ - def handleMatch(self, m): - el = markdown.etree.Element("a") - el.set('href', m.group(2)) - el.text = markdown.AtomicString(m.group(2)) - return el - -class AutomailPattern (Pattern): - """ - Return a mailto link Element given an automail link (``). - """ - def handleMatch(self, m): - el = markdown.etree.Element('a') - email = m.group(2) - if email.startswith("mailto:"): - email = email[len("mailto:"):] - - def codepoint2name(code): - """Return entity definition by code, or the code if not defined.""" - entity = htmlentitydefs.codepoint2name.get(code) - if entity: - return "%s%s;" % (markdown.AMP_SUBSTITUTE, entity) - else: - return "%s#%d;" % (markdown.AMP_SUBSTITUTE, code) - - letters = [codepoint2name(ord(letter)) for letter in email] - el.text = markdown.AtomicString(''.join(letters)) - - mailto = "mailto:" + email - mailto = "".join([markdown.AMP_SUBSTITUTE + '#%d;' % - ord(letter) for letter in mailto]) - el.set('href', mailto) - return el - diff --git a/website/markdown/odict.py b/website/markdown/odict.py deleted file mode 100644 index bf3ef0718..000000000 --- a/website/markdown/odict.py +++ /dev/null @@ -1,162 +0,0 @@ -class OrderedDict(dict): - """ - A dictionary that keeps its keys in the order in which they're inserted. - - Copied from Django's SortedDict with some modifications. - - """ - def __new__(cls, *args, **kwargs): - instance = super(OrderedDict, cls).__new__(cls, *args, **kwargs) - instance.keyOrder = [] - return instance - - def __init__(self, data=None): - if data is None: - data = {} - super(OrderedDict, self).__init__(data) - if isinstance(data, dict): - self.keyOrder = data.keys() - else: - self.keyOrder = [] - for key, value in data: - if key not in self.keyOrder: - self.keyOrder.append(key) - - def __deepcopy__(self, memo): - from copy import deepcopy - return self.__class__([(key, deepcopy(value, memo)) - for key, value in self.iteritems()]) - - def __setitem__(self, key, value): - super(OrderedDict, self).__setitem__(key, value) - if key not in self.keyOrder: - self.keyOrder.append(key) - - def __delitem__(self, key): - super(OrderedDict, self).__delitem__(key) - self.keyOrder.remove(key) - - def __iter__(self): - for k in self.keyOrder: - yield k - - def pop(self, k, *args): - result = super(OrderedDict, self).pop(k, *args) - try: - self.keyOrder.remove(k) - except ValueError: - # Key wasn't in the dictionary in the first place. No problem. - pass - return result - - def popitem(self): - result = super(OrderedDict, self).popitem() - self.keyOrder.remove(result[0]) - return result - - def items(self): - return zip(self.keyOrder, self.values()) - - def iteritems(self): - for key in self.keyOrder: - yield key, super(OrderedDict, self).__getitem__(key) - - def keys(self): - return self.keyOrder[:] - - def iterkeys(self): - return iter(self.keyOrder) - - def values(self): - return [super(OrderedDict, self).__getitem__(k) for k in self.keyOrder] - - def itervalues(self): - for key in self.keyOrder: - yield super(OrderedDict, self).__getitem__(key) - - def update(self, dict_): - for k, v in dict_.items(): - self.__setitem__(k, v) - - def setdefault(self, key, default): - if key not in self.keyOrder: - self.keyOrder.append(key) - return super(OrderedDict, self).setdefault(key, default) - - def value_for_index(self, index): - """Return the value of the item at the given zero-based index.""" - return self[self.keyOrder[index]] - - def insert(self, index, key, value): - """Insert the key, value pair before the item with the given index.""" - if key in self.keyOrder: - n = self.keyOrder.index(key) - del self.keyOrder[n] - if n < index: - index -= 1 - self.keyOrder.insert(index, key) - super(OrderedDict, self).__setitem__(key, value) - - def copy(self): - """Return a copy of this object.""" - # This way of initializing the copy means it works for subclasses, too. - obj = self.__class__(self) - obj.keyOrder = self.keyOrder[:] - return obj - - def __repr__(self): - """ - Replace the normal dict.__repr__ with a version that returns the keys - in their sorted order. - """ - return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self.items()]) - - def clear(self): - super(OrderedDict, self).clear() - self.keyOrder = [] - - def index(self, key): - """ Return the index of a given key. """ - return self.keyOrder.index(key) - - def index_for_location(self, location): - """ Return index or None for a given location. """ - if location == '_begin': - i = 0 - elif location == '_end': - i = None - elif location.startswith('<') or location.startswith('>'): - i = self.index(location[1:]) - if location.startswith('>'): - if i >= len(self): - # last item - i = None - else: - i += 1 - else: - raise ValueError('Not a valid location: "%s". Location key ' - 'must start with a ">" or "<".' % location) - return i - - def add(self, key, value, location): - """ Insert by key location. """ - i = self.index_for_location(location) - if i is not None: - self.insert(i, key, value) - else: - self.__setitem__(key, value) - - def link(self, key, location): - """ Change location of an existing item. """ - n = self.keyOrder.index(key) - del self.keyOrder[n] - i = self.index_for_location(location) - try: - if i is not None: - self.keyOrder.insert(i, key) - else: - self.keyOrder.append(key) - except Error: - # restore to prevent data loss and reraise - self.keyOrder.insert(n, key) - raise Error diff --git a/website/markdown/postprocessors.py b/website/markdown/postprocessors.py deleted file mode 100644 index 80227bb90..000000000 --- a/website/markdown/postprocessors.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -POST-PROCESSORS -============================================================================= - -Markdown also allows post-processors, which are similar to preprocessors in -that they need to implement a "run" method. However, they are run after core -processing. - -""" - - -import markdown - -class Processor: - def __init__(self, markdown_instance=None): - if markdown_instance: - self.markdown = markdown_instance - -class Postprocessor(Processor): - """ - Postprocessors are run after the ElementTree it converted back into text. - - Each Postprocessor implements a "run" method that takes a pointer to a - text string, modifies it as necessary and returns a text string. - - Postprocessors must extend markdown.Postprocessor. - - """ - - def run(self, text): - """ - Subclasses of Postprocessor should implement a `run` method, which - takes the html document as a single text string and returns a - (possibly modified) string. - - """ - pass - - -class RawHtmlPostprocessor(Postprocessor): - """ Restore raw html to the document. """ - - def run(self, text): - """ Iterate over html stash and restore "safe" html. """ - for i in range(self.markdown.htmlStash.html_counter): - html, safe = self.markdown.htmlStash.rawHtmlBlocks[i] - if self.markdown.safeMode and not safe: - if str(self.markdown.safeMode).lower() == 'escape': - html = self.escape(html) - elif str(self.markdown.safeMode).lower() == 'remove': - html = '' - else: - html = markdown.HTML_REMOVED_TEXT - if safe or not self.markdown.safeMode: - text = text.replace("

%s

" % - (markdown.preprocessors.HTML_PLACEHOLDER % i), - html + "\n") - text = text.replace(markdown.preprocessors.HTML_PLACEHOLDER % i, - html) - return text - - def escape(self, html): - """ Basic html escaping """ - html = html.replace('&', '&') - html = html.replace('<', '<') - html = html.replace('>', '>') - return html.replace('"', '"') - - -class AndSubstitutePostprocessor(Postprocessor): - """ Restore valid entities """ - def __init__(self): - pass - - def run(self, text): - text = text.replace(markdown.AMP_SUBSTITUTE, "&") - return text diff --git a/website/markdown/preprocessors.py b/website/markdown/preprocessors.py deleted file mode 100644 index 712a1e875..000000000 --- a/website/markdown/preprocessors.py +++ /dev/null @@ -1,214 +0,0 @@ - -""" -PRE-PROCESSORS -============================================================================= - -Preprocessors work on source text before we start doing anything too -complicated. -""" - -import re -import markdown - -HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:" -HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX - -class Processor: - def __init__(self, markdown_instance=None): - if markdown_instance: - self.markdown = markdown_instance - -class Preprocessor (Processor): - """ - Preprocessors are run after the text is broken into lines. - - Each preprocessor implements a "run" method that takes a pointer to a - list of lines of the document, modifies it as necessary and returns - either the same pointer or a pointer to a new list. - - Preprocessors must extend markdown.Preprocessor. - - """ - def run(self, lines): - """ - Each subclass of Preprocessor should override the `run` method, which - takes the document as a list of strings split by newlines and returns - the (possibly modified) list of lines. - - """ - pass - -class HtmlStash: - """ - This class is used for stashing HTML objects that we extract - in the beginning and replace with place-holders. - """ - - def __init__ (self): - """ Create a HtmlStash. """ - self.html_counter = 0 # for counting inline html segments - self.rawHtmlBlocks=[] - - def store(self, html, safe=False): - """ - Saves an HTML segment for later reinsertion. Returns a - placeholder string that needs to be inserted into the - document. - - Keyword arguments: - - * html: an html segment - * safe: label an html segment as safe for safemode - - Returns : a placeholder string - - """ - self.rawHtmlBlocks.append((html, safe)) - placeholder = HTML_PLACEHOLDER % self.html_counter - self.html_counter += 1 - return placeholder - - def reset(self): - self.html_counter = 0 - self.rawHtmlBlocks = [] - - -class HtmlBlockPreprocessor(Preprocessor): - """Remove html blocks from the text and store them for later retrieval.""" - - right_tag_patterns = ["", "%s>"] - - def _get_left_tag(self, block): - return block[1:].replace(">", " ", 1).split()[0].lower() - - def _get_right_tag(self, left_tag, block): - for p in self.right_tag_patterns: - tag = p % left_tag - i = block.rfind(tag) - if i > 2: - return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag) - return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block) - - def _equal_tags(self, left_tag, right_tag): - if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc. - return True - if ("/" + left_tag) == right_tag: - return True - if (right_tag == "--" and left_tag == "--"): - return True - elif left_tag == right_tag[1:] \ - and right_tag[0] != "<": - return True - else: - return False - - def _is_oneliner(self, tag): - return (tag in ['hr', 'hr/']) - - def run(self, lines): - text = "\n".join(lines) - new_blocks = [] - text = text.split("\n\n") - items = [] - left_tag = '' - right_tag = '' - in_tag = False # flag - - while text: - block = text[0] - if block.startswith("\n"): - block = block[1:] - text = text[1:] - - if block.startswith("\n"): - block = block[1:] - - if not in_tag: - if block.startswith("<"): - left_tag = self._get_left_tag(block) - right_tag, data_index = self._get_right_tag(left_tag, block) - - if data_index < len(block): - text.insert(0, block[data_index:]) - block = block[:data_index] - - if not (markdown.isBlockLevel(left_tag) \ - or block[1] in ["!", "?", "@", "%"]): - new_blocks.append(block) - continue - - if self._is_oneliner(left_tag): - new_blocks.append(block.strip()) - continue - - if block[1] == "!": - # is a comment block - left_tag = "--" - right_tag, data_index = self._get_right_tag(left_tag, block) - # keep checking conditions below and maybe just append - - if block.rstrip().endswith(">") \ - and self._equal_tags(left_tag, right_tag): - new_blocks.append( - self.markdown.htmlStash.store(block.strip())) - continue - else: #if not block[1] == "!": - # if is block level tag and is not complete - - if markdown.isBlockLevel(left_tag) or left_tag == "--" \ - and not block.rstrip().endswith(">"): - items.append(block.strip()) - in_tag = True - else: - new_blocks.append( - self.markdown.htmlStash.store(block.strip())) - - continue - - new_blocks.append(block) - - else: - items.append(block.strip()) - - right_tag, data_index = self._get_right_tag(left_tag, block) - - if self._equal_tags(left_tag, right_tag): - # if find closing tag - in_tag = False - new_blocks.append( - self.markdown.htmlStash.store('\n\n'.join(items))) - items = [] - - if items: - new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) - new_blocks.append('\n') - - new_text = "\n\n".join(new_blocks) - return new_text.split("\n") - - -class ReferencePreprocessor(Preprocessor): - """ Remove reference definitions from text and store for later use. """ - - RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL) - - def run (self, lines): - new_text = []; - for line in lines: - m = self.RE.match(line) - if m: - id = m.group(2).strip().lower() - t = m.group(4).strip() # potential title - if not t: - self.markdown.references[id] = (m.group(3), t) - elif (len(t) >= 2 - and (t[0] == t[-1] == "\"" - or t[0] == t[-1] == "\'" - or (t[0] == "(" and t[-1] == ")") ) ): - self.markdown.references[id] = (m.group(3), t[1:-1]) - else: - new_text.append(line) - else: - new_text.append(line) - - return new_text #+ "\n" diff --git a/website/markdown/treeprocessors.py b/website/markdown/treeprocessors.py deleted file mode 100644 index 1dc612a95..000000000 --- a/website/markdown/treeprocessors.py +++ /dev/null @@ -1,329 +0,0 @@ -import markdown -import re - -def isString(s): - """ Check if it's string """ - return isinstance(s, unicode) or isinstance(s, str) - -class Processor: - def __init__(self, markdown_instance=None): - if markdown_instance: - self.markdown = markdown_instance - -class Treeprocessor(Processor): - """ - Treeprocessors are run on the ElementTree object before serialization. - - Each Treeprocessor implements a "run" method that takes a pointer to an - ElementTree, modifies it as necessary and returns an ElementTree - object. - - Treeprocessors must extend markdown.Treeprocessor. - - """ - def run(self, root): - """ - Subclasses of Treeprocessor should implement a `run` method, which - takes a root ElementTree. This method can return another ElementTree - object, and the existing root ElementTree will be replaced, or it can - modify the current tree and return None. - """ - pass - - -class InlineProcessor(Treeprocessor): - """ - A Treeprocessor that traverses a tree, applying inline patterns. - """ - - def __init__ (self, md): - self.__placeholder_prefix = markdown.INLINE_PLACEHOLDER_PREFIX - self.__placeholder_suffix = markdown.ETX - self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ - + len(self.__placeholder_suffix) - self.__placeholder_re = re.compile(markdown.INLINE_PLACEHOLDER % r'([0-9]{4})') - self.markdown = md - - def __makePlaceholder(self, type): - """ Generate a placeholder """ - id = "%04d" % len(self.stashed_nodes) - hash = markdown.INLINE_PLACEHOLDER % id - return hash, id - - def __findPlaceholder(self, data, index): - """ - Extract id from data string, start from index - - Keyword arguments: - - * data: string - * index: index, from which we start search - - Returns: placeholder id and string index, after the found placeholder. - """ - - m = self.__placeholder_re.search(data, index) - if m: - return m.group(1), m.end() - else: - return None, index + 1 - - def __stashNode(self, node, type): - """ Add node to stash """ - placeholder, id = self.__makePlaceholder(type) - self.stashed_nodes[id] = node - return placeholder - - def __handleInline(self, data, patternIndex=0): - """ - Process string with inline patterns and replace it - with placeholders - - Keyword arguments: - - * data: A line of Markdown text - * patternIndex: The index of the inlinePattern to start with - - Returns: String with placeholders. - - """ - if not isinstance(data, markdown.AtomicString): - startIndex = 0 - while patternIndex < len(self.markdown.inlinePatterns): - data, matched, startIndex = self.__applyPattern( - self.markdown.inlinePatterns.value_for_index(patternIndex), - data, patternIndex, startIndex) - if not matched: - patternIndex += 1 - return data - - def __processElementText(self, node, subnode, isText=True): - """ - Process placeholders in Element.text or Element.tail - of Elements popped from self.stashed_nodes. - - Keywords arguments: - - * node: parent node - * subnode: processing node - * isText: bool variable, True - it's text, False - it's tail - - Returns: None - - """ - if isText: - text = subnode.text - subnode.text = None - else: - text = subnode.tail - subnode.tail = None - - childResult = self.__processPlaceholders(text, subnode) - - if not isText and node is not subnode: - pos = node.getchildren().index(subnode) - node.remove(subnode) - else: - pos = 0 - - childResult.reverse() - for newChild in childResult: - node.insert(pos, newChild) - - def __processPlaceholders(self, data, parent): - """ - Process string with placeholders and generate ElementTree tree. - - Keyword arguments: - - * data: string with placeholders instead of ElementTree elements. - * parent: Element, which contains processing inline data - - Returns: list with ElementTree elements with applied inline patterns. - """ - def linkText(text): - if text: - if result: - if result[-1].tail: - result[-1].tail += text - else: - result[-1].tail = text - else: - if parent.text: - parent.text += text - else: - parent.text = text - - result = [] - strartIndex = 0 - while data: - index = data.find(self.__placeholder_prefix, strartIndex) - if index != -1: - id, phEndIndex = self.__findPlaceholder(data, index) - - if id in self.stashed_nodes: - node = self.stashed_nodes.get(id) - - if index > 0: - text = data[strartIndex:index] - linkText(text) - - if not isString(node): # it's Element - for child in [node] + node.getchildren(): - if child.tail: - if child.tail.strip(): - self.__processElementText(node, child, False) - if child.text: - if child.text.strip(): - self.__processElementText(child, child) - else: # it's just a string - linkText(node) - strartIndex = phEndIndex - continue - - strartIndex = phEndIndex - result.append(node) - - else: # wrong placeholder - end = index + len(prefix) - linkText(data[strartIndex:end]) - strartIndex = end - else: - text = data[strartIndex:] - linkText(text) - data = "" - - return result - - def __applyPattern(self, pattern, data, patternIndex, startIndex=0): - """ - Check if the line fits the pattern, create the necessary - elements, add it to stashed_nodes. - - Keyword arguments: - - * data: the text to be processed - * pattern: the pattern to be checked - * patternIndex: index of current pattern - * startIndex: string index, from which we starting search - - Returns: String with placeholders instead of ElementTree elements. - - """ - match = pattern.getCompiledRegExp().match(data[startIndex:]) - leftData = data[:startIndex] - - if not match: - return data, False, 0 - - node = pattern.handleMatch(match) - - if node is None: - return data, True, len(leftData) + match.span(len(match.groups()))[0] - - if not isString(node): - if not isinstance(node.text, markdown.AtomicString): - # We need to process current node too - for child in [node] + node.getchildren(): - if not isString(node): - if child.text: - child.text = self.__handleInline(child.text, - patternIndex + 1) - if child.tail: - child.tail = self.__handleInline(child.tail, - patternIndex) - - placeholder = self.__stashNode(node, pattern.type()) - - return "%s%s%s%s" % (leftData, - match.group(1), - placeholder, match.groups()[-1]), True, 0 - - def run(self, tree): - """Apply inline patterns to a parsed Markdown tree. - - Iterate over ElementTree, find elements with inline tag, apply inline - patterns and append newly created Elements to tree. If you don't - want process your data with inline paterns, instead of normal string, - use subclass AtomicString: - - node.text = markdown.AtomicString("data won't be processed with inline patterns") - - Arguments: - - * markdownTree: ElementTree object, representing Markdown tree. - - Returns: ElementTree object with applied inline patterns. - - """ - self.stashed_nodes = {} - - stack = [tree] - - while stack: - currElement = stack.pop() - insertQueue = [] - for child in currElement.getchildren(): - if child.text and not isinstance(child.text, markdown.AtomicString): - text = child.text - child.text = None - lst = self.__processPlaceholders(self.__handleInline( - text), child) - stack += lst - insertQueue.append((child, lst)) - - if child.getchildren(): - stack.append(child) - - for element, lst in insertQueue: - if element.text: - element.text = \ - markdown.inlinepatterns.handleAttributes(element.text, - element) - i = 0 - for newChild in lst: - # Processing attributes - if newChild.tail: - newChild.tail = \ - markdown.inlinepatterns.handleAttributes(newChild.tail, - element) - if newChild.text: - newChild.text = \ - markdown.inlinepatterns.handleAttributes(newChild.text, - newChild) - element.insert(i, newChild) - i += 1 - return tree - - -class PrettifyTreeprocessor(Treeprocessor): - """ Add linebreaks to the html document. """ - - def _prettifyETree(self, elem): - """ Recursively add linebreaks to ElementTree children. """ - - i = "\n" - if markdown.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: - if (not elem.text or not elem.text.strip()) \ - and len(elem) and markdown.isBlockLevel(elem[0].tag): - elem.text = i - for e in elem: - if markdown.isBlockLevel(e.tag): - self._prettifyETree(e) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - if not elem.tail or not elem.tail.strip(): - elem.tail = i - - def run(self, root): - """ Add linebreaks to ElementTree root object. """ - - self._prettifyETree(root) - # Do
's seperately as they are often in the middle of - # inline content and missed by _prettifyETree. - brs = root.getiterator('br') - for br in brs: - if not br.tail or not br.tail.strip(): - br.tail = '\n' - else: - br.tail = '\n%s' % br.tail diff --git a/website/templates/overview.html b/website/templates/overview.html deleted file mode 100644 index aae2718bc..000000000 --- a/website/templates/overview.html +++ /dev/null @@ -1,9 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Tornado Web Server Overview{% end %} - -{% block headertitle %}

overview

{% end %} - -{% block body %} - {% raw markdown("overview.txt", toc=True) %} -{% end %} diff --git a/website/templates/overview.txt b/website/templates/overview.txt deleted file mode 100644 index 6e206c57b..000000000 --- a/website/templates/overview.txt +++ /dev/null @@ -1,1042 +0,0 @@ -Overview --------- -[FriendFeed](http://friendfeed.com/)'s web server is a relatively simple, -non-blocking web server written in Python. The FriendFeed application is -written using a web framework that looks a bit like -[web.py](http://webpy.org/) or Google's -[webapp](http://code.google.com/appengine/docs/python/tools/webapp/), -but with additional tools and optimizations to take advantage of the -non-blocking web server and tools. - -[Tornado](http://github.com/facebook/tornado) is an open source -version of this web server and some of the tools we use most often at -FriendFeed. The framework is distinct from most mainstream web server -frameworks (and certainly most Python frameworks) because it is -non-blocking and reasonably fast. Because it is non-blocking -and uses [epoll](http://www.kernel.org/doc/man-pages/online/pages/man4/epoll.4.html), it can handle 1000s of simultaneous standing connections, -which means the framework is ideal for real-time web services. We built the -web server specifically to handle FriendFeed's real-time features — -every active user of FriendFeed maintains an open connection to the -FriendFeed servers. (For more information on scaling servers to support -thousands of clients, see -[The C10K problem](http://www.kegel.com/c10k.html).) - -Here is the canonical "Hello, world" example app: - - import tornado.ioloop - import tornado.web - - class MainHandler(tornado.web.RequestHandler): - def get(self): - self.write("Hello, world") - - application = tornado.web.Application([ - (r"/", MainHandler), - ]) - - if __name__ == "__main__": - application.listen(8888) - tornado.ioloop.IOLoop.instance().start() - -See [Tornado walkthrough](#tornado-walkthrough) below for a detailed -walkthrough of the `tornado.web` package. - -We attempted to clean up the code base to reduce interdependencies between -modules, so you should (theoretically) be able to use any of the modules -independently in your project without using the whole package. - - -Download and install --------------------- -
- -

Manual installation: Download tornado-1.2.1.tar.gz

-
tar xvzf tornado-1.2.1.tar.gz
-cd tornado-1.2.1
-python setup.py build
-sudo python setup.py install
-

The Tornado source code is hosted on GitHub. On Python 2.6+, it is also possible to simply add the tornado directory to your PYTHONPATH instead of building with setup.py, since the standard library includes epoll support.

- -

Prerequisites

-

Tornado has been tested on Python 2.5, 2.6, and 2.7. To use all of the features of Tornado, you need to have PycURL (version 7.18.2 or higher) and (for Python 2.5 only) simplejson installed (Python 2.6 includes JSON support in the standard library so simplejson is not needed). Complete installation instructions for Mac OS X and Ubuntu are included below for convenience.

-

Mac OS X 10.6 (Python 2.6+)

-
sudo easy_install setuptools pycurl
- -

Ubuntu Linux (Python 2.6+)

-
sudo apt-get install python-pycurl
- -

Ubuntu Linux (Python 2.5)

-
sudo apt-get install python-dev python-pycurl python-simplejson
- - -Module index ------------- -The most important module is [`web`](http://github.com/facebook/tornado/blob/master/tornado/web.py), which is the web framework -that includes most of the meat of the Tornado package. The other modules -are tools that make `web` more useful. See -[Tornado walkthrough](#tornado-walkthrough) below for a detailed -walkthrough of the `web` package. - -### Main modules - * [`web`](http://github.com/facebook/tornado/blob/master/tornado/web.py) - The web framework on which FriendFeed is built. `web` incorporates most of the important features of Tornado - * [`escape`](http://github.com/facebook/tornado/blob/master/tornado/escape.py) - XHTML, JSON, and URL encoding/decoding methods - * [`database`](http://github.com/facebook/tornado/blob/master/tornado/database.py) - A simple wrapper around `MySQLdb` to make MySQL easier to use - * [`template`](http://github.com/facebook/tornado/blob/master/tornado/template.py) - A Python-based web templating language - * [`httpclient`](http://github.com/facebook/tornado/blob/master/tornado/httpclient.py) - A non-blocking HTTP client designed to work with `web` and `httpserver` - * [`auth`](http://github.com/facebook/tornado/blob/master/tornado/auth.py) - Implementation of third party authentication and authorization schemes (Google OpenID/OAuth, Facebook Platform, Yahoo BBAuth, FriendFeed OpenID/OAuth, Twitter OAuth) - * [`locale`](http://github.com/facebook/tornado/blob/master/tornado/locale.py) - Localization/translation support - * [`options`](http://github.com/facebook/tornado/blob/master/tornado/options.py) - Command line and config file parsing, optimized for server environments - -### Low-level modules - * [`httpserver`](http://github.com/facebook/tornado/blob/master/tornado/httpserver.py) - A very simple HTTP server built on which `web` is built - * [`iostream`](http://github.com/facebook/tornado/blob/master/tornado/iostream.py) - A simple wrapper around non-blocking sockets to aide common reading and writing patterns - * [`ioloop`](http://github.com/facebook/tornado/blob/master/tornado/ioloop.py) - Core I/O loop - - -Tornado walkthrough -------------------- - -### Request handlers and request arguments - -A Tornado web application maps URLs or URL patterns to subclasses of -`tornado.web.RequestHandler`. Those classes define `get()` or `post()` -methods to handle HTTP `GET` or `POST` requests to that URL. - -This code maps the root URL `/` to `MainHandler` and the URL pattern -`/story/([0-9]+)` to `StoryHandler`. Regular expression groups are passed -as arguments to the `RequestHandler` methods: - - class MainHandler(tornado.web.RequestHandler): - def get(self): - self.write("You requested the main page") - - class StoryHandler(tornado.web.RequestHandler): - def get(self, story_id): - self.write("You requested the story " + story_id) - - application = tornado.web.Application([ - (r"/", MainHandler), - (r"/story/([0-9]+)", StoryHandler), - ]) - -You can get query string arguments and parse `POST` bodies with the -`get_argument()` method: - - class MainHandler(tornado.web.RequestHandler): - def get(self): - self.write('
' - '' - '' - '
') - - def post(self): - self.set_header("Content-Type", "text/plain") - self.write("You wrote " + self.get_argument("message")) - -Uploaded files are available in `self.request.files`, which maps names -(the name of the HTML `` element) to a list of -files. Each file is a dictionary of the form `{"filename":..., -"content_type":..., "body":...}`. - -If you want to send an error response to the client, e.g., 403 Unauthorized, -you can just raise a `tornado.web.HTTPError` exception: - - if not self.user_is_logged_in(): - raise tornado.web.HTTPError(403) - -The request handler can access the object representing the current request -with `self.request`. The `HTTPRequest` object includes a number of useful -attributes, including: - - * `arguments` - all of the `GET` and `POST` arguments - * `files` - all of the uploaded files (via `multipart/form-data` POST requests) - * `path` - the request path (everything before the `?`) - * `headers` - the request headers - -See the class definition for `HTTPRequest` in `httpserver` for a complete list -of attributes. - -### Overriding RequestHandler methods - -In addition to `get()`/`post()`/etc, certain other methods in `RequestHandler` -are designed to be overridden by subclasses when necessary. On every request, -the following sequence of calls takes place: - - 1. A new RequestHandler object is created on each request - 2. `initialize()` is called with keyword arguments from the `Application` - configuration. (the `initialize` method is new in Tornado 1.1; in older - versions subclasses would override `__init__` instead). `initialize` - should typically just save the arguments passed into member variables; - it may not produce any output or call methods like `send_error`. - 3. `prepare()` is called. This is most useful in a base class shared - by all of your handler subclasses, as `prepare` is called no matter - which HTTP method is used. `prepare` may produce output; if it calls - `finish` (or `send_error`, etc), processing stops here. - 4. One of the HTTP methods is called: `get()`, `post()`, `put()`, etc. - If the URL regular expression contains capturing groups, they are - passed as arguments to this method. - -Here is an example demonstrating the `initialize()` method: - - class ProfileHandler(RequestHandler): - def initialize(self, database): - self.database = database - - def get(self, username): - ... - - app = Application([ - (r'/user/(.*)', ProfileHandler, dict(database=database)), - ]) - -Other methods designed for overriding include: - - * `get_error_html(self, status_code, exception=None, **kwargs)` - returns - HTML (as a string) for use on error pages. - * `get_current_user(self)` - see - [User Authentication](#user-authentication) below - * `get_user_locale(self)` - returns `locale` object to use for the current - user - * `get_login_url(self)` - returns login url to be used by the - `@authenticated` decorator (default is in `Application` settings) - * `get_template_path(self)` - returns location of template files (default is - in `Application` settings) - -### Redirection - -There are two main ways you can redirect requests in Tornado: -`self.redirect` and with the `RedirectHandler`. - -You can use `self.redirect` within a -`RequestHandler` method (like `get`) to redirect users elsewhere. -There is also an optional -parameter `permanent` which you can use to indicate -that the redirection is considered permanent. - -This triggers a `301 Moved Permanently` -HTTP status, which is useful for e.g. -redirecting to a canonical URL for a page in an SEO-friendly manner. - -The default value of `permanent` is `False`, which is apt for things like redirecting -users on successful POST requests. - - self.redirect('/some-canonical-page', permanent=True) - -`RedirectHandler` is available for your use when you initialize `Application`. - -For example, notice how we redirect to a longer download URL on this website: - - application = tornado.wsgi.WSGIApplication([ - (r"/([a-z]*)", ContentHandler), - (r"/static/tornado-0.2.tar.gz", tornado.web.RedirectHandler, - dict(url="http://github.com/downloads/facebook/tornado/tornado-0.2.tar.gz")), - ], **settings) - -The default `RedirectHandler` status code is `301 Moved Permanently`, but to use -`302 Found` instead, set `permanent` to `False`. - - application = tornado.wsgi.WSGIApplication([ - (r"/foo", tornado.web.RedirectHandler, {"url":"/bar", "permanent":False}), - ], **settings) - -Note that the default value of `permanent` is different in `self.redirect` than in `RedirectHandler`. -This should make some sense if you consider that `self.redirect` is used in your methods -and is probably invoked by logic involving environment, authentication, or form submission, -but `RedirectHandler` patterns are going to fire 100% of the time they match the request URL. - -### Templates - -You can use any template language supported by Python, but Tornado ships -with its own templating language that is a lot faster and more flexible -than many of the most popular templating systems out there. See the -[`template`](http://github.com/facebook/tornado/blob/master/tornado/template.py) module documentation for complete documentation. - -A Tornado template is just HTML (or any other text-based format) with -Python control sequences and expressions embedded within the markup: - - - - {{ title }} - - -
    - {% for item in items %} -
  • {{ escape(item) }}
  • - {% end %} -
- - - -If you saved this template as "template.html" and put it in the same -directory as your Python file, you could render this template with: - - class MainHandler(tornado.web.RequestHandler): - def get(self): - items = ["Item 1", "Item 2", "Item 3"] - self.render("template.html", title="My title", items=items) - -Tornado templates support *control statements* and *expressions*. Control -statements are surronded by `{%` and `%}`, e.g., `{% if len(items) > 2 %}`. -Expressions are surrounded by `{{` and `}}`, e.g., `{{ items[0] }}`. - -Control statements more or less map exactly to Python statements. We support -`if`, `for`, `while`, and `try`, all of which are terminated with `{% end %}`. -We also support *template inheritance* using the `extends` and `block` -statements, which are described in detail in the documentation for the -[`template` module](http://github.com/facebook/tornado/blob/master/tornado/template.py). - -Expressions can be any Python expression, including function calls. -Template code is executed in a namespace that includes the following objects -and functions (Note that this list applies to templates rendered using -`RequestHandler.render` and `render_string`. If you're using the `template` -module directly outside of a `RequestHandler` many of these entries are -not present). - - * `escape`: alias for `tornado.escape.xhtml_escape` - * `xhtml_escape`: alias for `tornado.escape.xhtml_escape` - * `url_escape`: alias for `tornado.escape.url_escape` - * `json_encode`: alias for `tornado.escape.json_encode` - * `squeeze`: alias for `tornado.escape.squeeze` - * `linkify`: alias for `tornado.escape.linkify` - * `datetime`: the Python `datetime` module - * `handler`: the current `RequestHandler` object - * `request`: alias for `handler.request` - * `current_user`: alias for `handler.current_user` - * `locale`: alias for `handler.locale` - * `_`: alias for `handler.locale.translate` - * `static_url`: alias for `handler.static_url` - * `xsrf_form_html`: alias for `handler.xsrf_form_html` - * `reverse_url`: alias for `Application.reverse_url` - * All entries from the `ui_methods` and `ui_modules` `Application` settings - * Any keyword arguments passed to `render` or `render_string` - -When you are building a real application, you are going to want to use -all of the features of Tornado templates, especially template inheritance. -Read all about those features in the [`template` module](http://github.com/facebook/tornado/blob/master/tornado/template.py) -section (some features, including `UIModules` are implemented in the -`web` module) - -Under the hood, Tornado templates are translated directly to Python. -The expressions you include in your template are copied verbatim into -a Python function representing your template. We don't try to prevent -anything in the template language; we created it explicitly to provide -the flexibility that other, stricter templating systems prevent. -Consequently, if you write random stuff inside of your template expressions, -you will get random Python errors when you execute the template. - -All template output is escaped by default, using the -`tornado.escape.xhtml_escape` function. This behavior can be changed globally -by passing `autoescape=None` to the `Application` or `TemplateLoader` -constructors, for a template file with the `{% autoescape None %}` -directive, or for a single expression by replacing `{{ ... }}` with -`{% raw ...%}`. Additionally, in each of these places the name of an -alternative escaping function may be used instead of `None`. - - -### Cookies and secure cookies - -You can set cookies in the user's browser with the `set_cookie` method: - - class MainHandler(tornado.web.RequestHandler): - def get(self): - if not self.get_cookie("mycookie"): - self.set_cookie("mycookie", "myvalue") - self.write("Your cookie was not set yet!") - else: - self.write("Your cookie was set!") - -Cookies are easily forged by malicious clients. If you need to set cookies -to, e.g., save the user ID of the currently logged in user, you need to -sign your cookies to prevent forgery. Tornado supports this out of the -box with the `set_secure_cookie` and `get_secure_cookie` methods. To use -these methods, you need to specify a secret key named `cookie_secret` when -you create your application. You can pass in application settings as keyword -arguments to your application: - - application = tornado.web.Application([ - (r"/", MainHandler), - ], cookie_secret="61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=") - -Signed cookies contain the encoded value of the cookie in addition to a -timestamp and an [HMAC](http://en.wikipedia.org/wiki/HMAC) signature. If the -cookie is old or if the signature doesn't match, `get_secure_cookie` will -return `None` just as if the cookie isn't set. The secure version of the -example above: - - class MainHandler(tornado.web.RequestHandler): - def get(self): - if not self.get_secure_cookie("mycookie"): - self.set_secure_cookie("mycookie", "myvalue") - self.write("Your cookie was not set yet!") - else: - self.write("Your cookie was set!") - - -### User authentication - -The currently authenticated user is available in every request handler -as `self.current_user`, and in every template as `current_user`. By -default, `current_user` is `None`. - -To implement user authentication in your application, you need to -override the `get_current_user()` method in your request handlers to -determine the current user based on, e.g., the value of a cookie. -Here is an example that lets users log into the application simply -by specifying a nickname, which is then saved in a cookie: - - class BaseHandler(tornado.web.RequestHandler): - def get_current_user(self): - return self.get_secure_cookie("user") - - class MainHandler(BaseHandler): - def get(self): - if not self.current_user: - self.redirect("/login") - return - name = tornado.escape.xhtml_escape(self.current_user) - self.write("Hello, " + name) - - class LoginHandler(BaseHandler): - def get(self): - self.write('
' - 'Name: ' - '' - '
') - - def post(self): - self.set_secure_cookie("user", self.get_argument("name")) - self.redirect("/") - - application = tornado.web.Application([ - (r"/", MainHandler), - (r"/login", LoginHandler), - ], cookie_secret="61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=") - -You can require that the user be logged in using the -[Python decorator](http://www.python.org/dev/peps/pep-0318/) -`tornado.web.authenticated`. If a request goes to a method with this -decorator, and the user is not logged in, they will be redirected to -`login_url` (another application setting). The example above could -be rewritten: - - class MainHandler(BaseHandler): - @tornado.web.authenticated - def get(self): - name = tornado.escape.xhtml_escape(self.current_user) - self.write("Hello, " + name) - - settings = { - "cookie_secret": "61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", - "login_url": "/login", - } - application = tornado.web.Application([ - (r"/", MainHandler), - (r"/login", LoginHandler), - ], **settings) - -If you decorate `post()` methods with the `authenticated` decorator, and -the user is not logged in, the server will send a `403` response. - -Tornado comes with built-in support for third-party authentication schemes -like Google OAuth. See the [`auth` module](http://github.com/facebook/tornado/blob/master/tornado/auth.py) for more details. Check -out the Tornado Blog example application for a complete example that -uses authentication (and stores user data in a MySQL database). - - -### Cross-site request forgery protection - -[Cross-site request forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery), or XSRF, is a common problem for personalized web applications. See the -[Wikipedia article](http://en.wikipedia.org/wiki/Cross-site_request_forgery) -for more information on how XSRF works. - -The generally accepted solution to prevent XSRF is to cookie every user -with an unpredictable value and include that value as an additional -argument with every form submission on your site. If the cookie and the -value in the form submission do not match, then the request is likely -forged. - -Tornado comes with built-in XSRF protection. To include it in your site, -include the application setting `xsrf_cookies`: - - settings = { - "cookie_secret": "61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", - "login_url": "/login", - "xsrf_cookies": True, - } - application = tornado.web.Application([ - (r"/", MainHandler), - (r"/login", LoginHandler), - ], **settings) - -If `xsrf_cookies` is set, the Tornado web application will set the -`_xsrf` cookie for all users and reject all `POST`, `PUT`, and -`DELETE` requests that do not contain a correct `_xsrf` value. If you -turn this setting on, you need to instrument all forms that submit via -`POST` to contain this field. You can do this with the special -function `xsrf_form_html()`, available in all templates: - -
- {{ xsrf_form_html() }} - - -
- -If you submit AJAX `POST` requests, you will also need to instrument your -JavaScript to include the `_xsrf` value with each request. This is the -[jQuery](http://jquery.com/) function we use at FriendFeed for AJAX `POST` -requests that automatically adds the `_xsrf` value to all requests: - - function getCookie(name) { - var r = document.cookie.match("\\b" + name + "=([^;]*)\\b"); - return r ? r[1] : undefined; - } - - jQuery.postJSON = function(url, args, callback) { - args._xsrf = getCookie("_xsrf"); - $.ajax({url: url, data: $.param(args), dataType: "text", type: "POST", - success: function(response) { - callback(eval("(" + response + ")")); - }}); - }; - -For `PUT` and `DELETE` requests (as well as `POST` requests that do not -use form-encoded arguments), the XSRF token may also be passed via -an HTTP header named `X-XSRFToken`. - -If you need to customize XSRF behavior on a per-handler basis, you can -override `RequestHandler.check_xsrf_cookie()`. For example, if you have -an API whose authentication does not use cookies, you may want to disable -XSRF protection by making `check_xsrf_cookie()` do nothing. However, if -you support both cookie and non-cookie-based authentication, it is important -that XSRF protection be used whenever the current request is authenticated -with a cookie. - - -### Static files and aggressive file caching - -You can serve static files from Tornado by specifying the `static_path` -setting in your application: - - settings = { - "static_path": os.path.join(os.path.dirname(__file__), "static"), - "cookie_secret": "61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", - "login_url": "/login", - "xsrf_cookies": True, - } - application = tornado.web.Application([ - (r"/", MainHandler), - (r"/login", LoginHandler), - (r"/(apple-touch-icon\.png)", tornado.web.StaticFileHandler, dict(path=settings['static_path'])), - ], **settings) - -This setting will automatically make all requests that start with `/static/` -serve from that static directory, e.g., [http://localhost:8888/static/foo.png](http://localhost:8888/static/foo.png) -will serve the file `foo.png` from the specified static directory. We -also automatically serve `/robots.txt` and `/favicon.ico` from the static -directory (even though they don't start with the `/static/` prefix). - -In the above settings, we have explicitly configured Tornado to serve `apple-touch-icon.png` -“from” the root with the `StaticFileHandler`, though it is physically in the static file directory. -(The capturing group in that regular expression is necessary to tell -`StaticFileHandler` the requested filename; capturing groups are passed -to handlers as method arguments.) -You could do the same thing to serve e.g. `sitemap.xml` from the site root. -Of course, you can also avoid faking a root `apple-touch-icon.png` -by using the appropriate `` tag in your HTML. - -To improve performance, it is generally a good idea for browsers to -cache static resources aggressively so browsers won't send unnecessary -`If-Modified-Since` or `Etag` requests that might block the rendering of -the page. Tornado supports this out of the box with *static content -versioning*. - -To use this feature, use the `static_url()` method in your templates rather -than typing the URL of the static file directly in your HTML: - - - - FriendFeed - {{ _("Home") }} - - -
- - - -The `static_url()` function will translate that relative path to a URI -that looks like `/static/images/logo.png?v=aae54`. The `v` argument is -a hash of the content in `logo.png`, and its presence makes the Tornado -server send cache headers to the user's browser that will make the browser -cache the content indefinitely. - -Since the `v` argument is based on the content of the file, if you update -a file and restart your server, it will start sending a new `v` value, -so the user's browser will automatically fetch the new file. If the file's -contents don't change, the browser will continue to use a locally cached -copy without ever checking for updates on the server, significantly -improving rendering performance. - -In production, you probably want to serve static files from a more -optimized static file server like [nginx](http://nginx.net/). You can -configure most any web server to support these caching semantics. Here -is the nginx configuration we use at FriendFeed: - - location /static/ { - root /var/friendfeed/static; - if ($query_string) { - expires max; - } - } - - -### Localization - -The locale of the current user (whether they are logged in or not) is -always available as `self.locale` in the request handler and as `locale` -in templates. The name of the locale (e.g., `en_US`) is available as -`locale.name`, and you can translate strings with the `locale.translate` -method. Templates also have the global function call `_()` available -for string translation. The translate function has two forms: - - _("Translate this string") - -which translates the string directly based on the current locale, and - - _("A person liked this", "%(num)d people liked this", len(people)) % {"num": len(people)} - -which translates a string that can be singular or plural based on the value -of the third argument. In the example above, a translation of the first -string will be returned if `len(people)` is `1`, or a translation of the -second string will be returned otherwise. - -The most common pattern for translations is to use Python named placeholders -for variables (the `%(num)d` in the example above) since placeholders can -move around on translation. - -Here is a properly localized template: - - - - FriendFeed - {{ _("Sign in") }} - - -
-
{{ _("Username") }}
-
{{ _("Password") }}
-
- {{ xsrf_form_html() }} -
- - - -By default, we detect the user's locale using the `Accept-Language` header -sent by the user's browser. We choose `en_US` if we can't find an appropriate -`Accept-Language` value. If you let user's set their locale as a preference, -you can override this default locale selection by overriding `get_user_locale` -in your request handler: - - class BaseHandler(tornado.web.RequestHandler): - def get_current_user(self): - user_id = self.get_secure_cookie("user") - if not user_id: return None - return self.backend.get_user_by_id(user_id) - - def get_user_locale(self): - if "locale" not in self.current_user.prefs: - # Use the Accept-Language header - return None - return self.current_user.prefs["locale"] - -If `get_user_locale` returns `None`, we fall back on the `Accept-Language` -header. - -You can load all the translations for your application using the -`tornado.locale.load_translations` method. It takes in the name of the -directory which should contain CSV files named after the locales whose -translations they contain, e.g., `es_GT.csv` or `fr_CA.csv`. The method -loads all the translations from those CSV files and infers the list of -supported locales based on the presence of each CSV file. You typically -call this method once in the `main()` method of your server: - - def main(): - tornado.locale.load_translations( - os.path.join(os.path.dirname(__file__), "translations")) - start_server() - -You can get the list of supported locales in your application with -`tornado.locale.get_supported_locales()`. The user's locale is chosen to -be the closest match based on the supported locales. For example, if the -user's locale is `es_GT`, and the `es` locale is supported, `self.locale` -will be `es` for that request. We fall back on `en_US` if no close match -can be found. - -See the [`locale` module](http://github.com/facebook/tornado/blob/master/tornado/locale.py) documentation for detailed information -on the CSV format and other localization methods. - - -### UI modules - -Tornado supports *UI modules* to make it easy to support standard, reusable -UI widgets across your application. UI modules are like special functional -calls to render components of your page, and they can come packaged with -their own CSS and JavaScript. - -For example, if you are implementing a blog, and you want to have -blog entries appear on both the blog home page and on each blog entry page, -you can make an `Entry` module to render them on both pages. First, create -a Python module for your UI modules, e.g., `uimodules.py`: - - class Entry(tornado.web.UIModule): - def render(self, entry, show_comments=False): - return self.render_string( - "module-entry.html", entry=entry, show_comments=show_comments) - -Tell Tornado to use `uimodules.py` using the `ui_modules` setting in your -application: - - class HomeHandler(tornado.web.RequestHandler): - def get(self): - entries = self.db.query("SELECT * FROM entries ORDER BY date DESC") - self.render("home.html", entries=entries) - - class EntryHandler(tornado.web.RequestHandler): - def get(self, entry_id): - entry = self.db.get("SELECT * FROM entries WHERE id = %s", entry_id) - if not entry: raise tornado.web.HTTPError(404) - self.render("entry.html", entry=entry) - - settings = { - "ui_modules": uimodules, - } - application = tornado.web.Application([ - (r"/", HomeHandler), - (r"/entry/([0-9]+)", EntryHandler), - ], **settings) - -Within `home.html`, you reference the `Entry` module rather than printing -the HTML directly: - - {% for entry in entries %} - {% module Entry(entry) %} - {% end %} - -Within `entry.html`, you reference the `Entry` module with the -`show_comments` argument to show the expanded form of the entry: - - {% module Entry(entry, show_comments=True) %} - -Modules can include custom CSS and JavaScript functions by overriding -the `embedded_css`, `embedded_javascript`, `javascript_files`, or -`css_files` methods: - - class Entry(tornado.web.UIModule): - def embedded_css(self): - return ".entry { margin-bottom: 1em; }" - - def render(self, entry, show_comments=False): - return self.render_string( - "module-entry.html", show_comments=show_comments) - -Module CSS and JavaScript will be included once no matter how many times -a module is used on a page. CSS is always included in the `` of the -page, and JavaScript is always included just before the `` tag -at the end of the page. - -When additional Python code is not required, a template file itself may -be used as a module. For example, the preceding example could be -rewritten to put the following in `module-entry.html`: - - {{ set_resources(embedded_css=".entry { margin-bottom: 1em; }") }} - - -This revised template module would be invoked with - - {% module Template("module-entry.html", show_comments=True) %} - -The `set_resources` function is only available in templates invoked via -`{% module Template(...) %}`. Unlike the `{% include ... %}` directive, -template modules have a distinct namespace from their containing template - -they can only see the global template namespace and their own keyword -arguments. - - -### Non-blocking, asynchronous requests - -When a request handler is executed, the request is automatically finished. -Since Tornado uses a non-blocking I/O style, you can override this default -behavior if you want a request to remain open after the main request handler -method returns using the `tornado.web.asynchronous` decorator. - -When you use this decorator, it is your responsibility to call -`self.finish()` to finish the HTTP request, or the user's browser -will simply hang: - - class MainHandler(tornado.web.RequestHandler): - @tornado.web.asynchronous - def get(self): - self.write("Hello, world") - self.finish() - -Here is a real example that makes a call to the FriendFeed API using -Tornado's built-in asynchronous HTTP client: - - class MainHandler(tornado.web.RequestHandler): - @tornado.web.asynchronous - def get(self): - http = tornado.httpclient.AsyncHTTPClient() - http.fetch("http://friendfeed-api.com/v2/feed/bret", - callback=self.on_response) - - def on_response(self, response): - if response.error: raise tornado.web.HTTPError(500) - json = tornado.escape.json_decode(response.body) - self.write("Fetched " + str(len(json["entries"])) + " entries " - "from the FriendFeed API") - self.finish() - -When `get()` returns, the request has not finished. When the HTTP client -eventually calls `on_response()`, the request is still open, and the response -is finally flushed to the client with the call to `self.finish()`. - -For a more advanced asynchronous example, take a look at the `chat` example -application, which implements an AJAX chat room using -[long polling](http://en.wikipedia.org/wiki/Push_technology#Long_polling). -Users of long polling may want to override `on_connection_close()` to clean -up after the client closes the connection (but see that method's docstring -for caveats). - - -### Asynchronous HTTP clients - -Tornado includes two non-blocking HTTP client implementations: -`SimpleAsyncHTTPClient` and `CurlAsyncHTTPClient`. The simple client -has no external dependencies because it is implemented directly on top -of Tornado's `IOLoop`. The Curl client requires that `libcurl` and -`pycurl` be installed (and a recent version of each is highly -recommended to avoid bugs in older version's asynchronous interfaces), -but is more likely to be compatible with sites that exercise -little-used parts of the HTTP specification. - -Each of these clients is available in its own module -(`tornado.simple_httpclient` and `tornado.curl_httpclient`), as well as -via a configurable alias in `tornado.httpclient`. `SimpleAsyncHTTPClient` -is the default, but to use a different implementation call the -`AsyncHTTPClient.configure` method at startup: - - AsyncHTTPClient.configure('tornado.curl_httpclient.CurlAsyncHTTPClient') - - -### Third party authentication - -Tornado's `auth` module implements the authentication and authorization -protocols for a number of the most popular sites on the web, including -Google/Gmail, Facebook, Twitter, Yahoo, and FriendFeed. The module includes -methods to log users in via these sites and, where applicable, methods to -authorize access to the service so you can, e.g., download a user's address -book or publish a Twitter message on their behalf. - -Here is an example handler that uses Google for authentication, saving -the Google credentials in a cookie for later access: - - class GoogleHandler(tornado.web.RequestHandler, tornado.auth.GoogleMixin): - @tornado.web.asynchronous - def get(self): - if self.get_argument("openid.mode", None): - self.get_authenticated_user(self._on_auth) - return - self.authenticate_redirect() - - def _on_auth(self, user): - if not user: - self.authenticate_redirect() - return - # Save the user with, e.g., set_secure_cookie() - -See the `auth` module documentation for more details. - - -### Debug mode and automatic reloading - -If you pass `debug=True` to the `Application` constructor, the app will be run -in debug mode. In this mode, templates will not be cached and the app will -watch for changes to its source files and reload itself when anything changes. -This reduces the need to manually restart the server during development. -However, certain failures (such as syntax errors at import time) can still -take the server down in a way that debug mode cannot currently recover from. - -Debug mode is not compatible with `HTTPServer`'s multi-process mode. You -must not give `HTTPServer.start` an argument greater than 1 if you are using -debug mode. - -The automatic reloading feature of debug mode is available as a -standalone module in `tornado.autoreload`, and is optionally used by -the test runner in `tornado.testing.main`. - - -Performance ------------ -Web application performance is generally bound by architecture, not frontend -performance. That said, Tornado is pretty fast relative to most popular -Python web frameworks. - -We ran a few remedial load tests on a simple "Hello, world" application -in each of the most popular Python web frameworks -([Django](http://www.djangoproject.com/), [web.py](http://webpy.org/), and -[CherryPy](http://www.cherrypy.org/)) to get the baseline performance of -each relative to Tornado. We used Apache/mod_wsgi for Django and web.py -and ran CherryPy as a standalone server, which was our impression of how -each framework is typically run in production environments. We ran 4 -single-threaded Tornado frontends behind an [nginx](http://nginx.net/) -reverse proxy, which is how we recommend running Tornado in production -(our load test machine had four cores, and we recommend 1 frontend per -core). - -We load tested each with Apache Benchmark (`ab`) on the a separate machine -with the command - - ab -n 100000 -c 25 http://10.0.1.x/ - -The results (requests per second) on a 2.4GHz AMD Opteron processor with -4 cores: - -
- -In our tests, Tornado consistently had 4X the throughput of the next fastest -framework, and even a single standalone Tornado frontend got 33% more -throughput even though it only used one of the four cores. - -Not very scientific, but at a high level, it should give you a sense that we -have cared about performance as we built Tornado, and it shouldn't add too -much latency to your apps relative to most Python web development frameworks. - - -Running Tornado in production ------------------------------ -At FriendFeed, we use [nginx](http://nginx.net/) as a load balancer -and static file server. We run multiple instances of the Tornado web -server on multiple frontend machines. We typically run one Tornado frontend -per core on the machine (sometimes more depending on utilization). - -When running behind a load balancer like nginx, it is recommended to pass -`xheaders=True` to the `HTTPServer` constructor. This will tell Tornado -to use headers like `X-Real-IP` to get the user's IP address instead of -attributing all traffic to the balancer's IP address. - -This is a barebones nginx config file that is structurally similar to the -one we use at FriendFeed. It assumes nginx and the Tornado servers -are running on the same machine, and the four Tornado servers -are running on ports 8000 - 8003: - - user nginx; - worker_processes 1; - - error_log /var/log/nginx/error.log; - pid /var/run/nginx.pid; - - events { - worker_connections 1024; - use epoll; - } - - http { - # Enumerate all the Tornado servers here - upstream frontends { - server 127.0.0.1:8000; - server 127.0.0.1:8001; - server 127.0.0.1:8002; - server 127.0.0.1:8003; - } - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - access_log /var/log/nginx/access.log; - - keepalive_timeout 65; - proxy_read_timeout 200; - sendfile on; - tcp_nopush on; - tcp_nodelay on; - gzip on; - gzip_min_length 1000; - gzip_proxied any; - gzip_types text/plain text/html text/css text/xml - application/x-javascript application/xml - application/atom+xml text/javascript; - - # Only retry if there was a communication error, not a timeout - # on the Tornado server (to avoid propagating "queries of death" - # to all frontends) - proxy_next_upstream error; - - server { - listen 80; - - # Allow file uploads - client_max_body_size 50M; - - location ^~ /static/ { - root /var/www; - if ($query_string) { - expires max; - } - } - location = /favicon.ico { - rewrite (.*) /static/favicon.ico; - } - location = /robots.txt { - rewrite (.*) /static/robots.txt; - } - - location / { - proxy_pass_header Server; - proxy_set_header Host $http_host; - proxy_redirect false; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Scheme $scheme; - proxy_pass http://frontends; - } - } - } - - -WSGI and Google AppEngine -------------------------- -Tornado comes with limited support for [WSGI](http://wsgi.org/). However, -since WSGI does not support non-blocking requests, you cannot use any -of the asynchronous/non-blocking features of Tornado in your application -if you choose to use WSGI instead of Tornado's HTTP server. Some of the -features that are not available in WSGI applications: -`@tornado.web.asynchronous`, the `httpclient` module, and the `auth` module. - -You can create a valid WSGI application from your Tornado request handlers -by using `WSGIApplication` in the `wsgi` module instead of using -`tornado.web.Application`. Here is an example that uses the built-in WSGI -`CGIHandler` to make a valid -[Google AppEngine](http://code.google.com/appengine/) application: - - import tornado.web - import tornado.wsgi - import wsgiref.handlers - - class MainHandler(tornado.web.RequestHandler): - def get(self): - self.write("Hello, world") - - if __name__ == "__main__": - application = tornado.wsgi.WSGIApplication([ - (r"/", MainHandler), - ]) - wsgiref.handlers.CGIHandler().run(application) - -See the `appengine` example application for a full-featured AppEngine -app built on Tornado. - - -Caveats and support -------------------- -Because FriendFeed and other large users of Tornado run [behind -nginx](#running-tornado-in-production) or Apache proxies, Tornado's HTTP -server currently does not attempt to handle multi-line headers and some -types of malformed input. - -You can discuss Tornado and report bugs on [the Tornado developer mailing list](http://groups.google.com/group/python-tornado). diff --git a/website/website.py b/website/website.py index 32ec68493..380820871 100644 --- a/website/website.py +++ b/website/website.py @@ -14,7 +14,6 @@ # License for the specific language governing permissions and limitations # under the License. -import markdown import os import os.path import time @@ -25,23 +24,10 @@ import wsgiref.handlers class ContentHandler(tornado.web.RequestHandler): def get(self, path="index"): - paths = ("overview", "index") + paths = ("index",) if path not in paths: raise tornado.web.HTTPError(404) - self.render(path + ".html", markdown=self.markdown) - - def markdown(self, path, toc=False): - if not hasattr(ContentHandler, "_md") or self.settings.get("debug"): - ContentHandler._md = {} - if path not in ContentHandler._md: - full_path = os.path.join(self.settings["template_path"], path) - f = open(full_path, "r") - contents = f.read().decode("utf-8") - f.close() - if toc: contents = u"[TOC]\n\n" + contents - md = markdown.Markdown(extensions=["toc"] if toc else []) - ContentHandler._md[path] = md.convert(contents).encode("utf-8") - return ContentHandler._md[path] + self.render(path + ".html") settings = { @@ -52,16 +38,13 @@ settings = { application = tornado.wsgi.WSGIApplication([ (r"/", ContentHandler), (r"/(index)", ContentHandler), - (r"/documentation/(overview)", ContentHandler), (r"/static/tornado-0.1.tar.gz", tornado.web.RedirectHandler, dict(url="http://github.com/downloads/facebook/tornado/tornado-0.1.tar.gz")), (r"/static/tornado-0.2.tar.gz", tornado.web.RedirectHandler, dict(url="http://github.com/downloads/facebook/tornado/tornado-0.2.tar.gz")), (r"/documentation/?", tornado.web.RedirectHandler, - dict(url="/documentation/overview")), - (r"/documentation/reference/?", tornado.web.RedirectHandler, - dict(url="/documentation/reference/index.html")), + dict(url="/documentation/index.html")), ], **settings)

Automatic installation: Tornado is listed in PyPI and can be installed with pip or easy_install. If you do not already have libcurl installed you may need to install it separately; see the prerequisites section below. Note that the source distribution includes demo applications that are not present when Tornado is installed using pip or easy_install