From: Bret Taylor Date: Thu, 10 Sep 2009 07:50:51 +0000 (-0700) Subject: Move Tornado project to Github X-Git-Tag: v1.0.0~141 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=2afa97349330d1a224a7e4fdce0dcdf760222a8b;p=thirdparty%2Ftornado.git Move Tornado project to Github --- diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..c7a51e409 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include demos *.py *.yaml *.html *.css *.png *.js *.xml *.sql README +include tornado/epoll.c diff --git a/README b/README index 90c586f70..d50402224 100644 --- a/README +++ b/README @@ -1,17 +1,12 @@ Tornado ======= Tornado is an open source version of the scalable, non-blocking web server -and and tools that power FriendFeed. The FriendFeed application is written -using a web framework that looks a bit like web.py or Google's webapp, but -with additional tools and optimizations to take advantage of the underlying -non-blocking infrastructure. - -Documentation and downloads are available at http://www.tornadoweb.org/ +and and tools that power FriendFeed. Documentation and downloads are +available at http://www.tornadoweb.org/ Tornado is licensed under the Apache Licence, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0.html). - Installation ============ To install: @@ -30,4 +25,3 @@ On Mac OS X, you can install the packages with: On Ubuntu Linux, you can install the packages with: sudo apt-get install python-pycurl python-simplejson - diff --git a/demos/appengine/README b/demos/appengine/README new file mode 100644 index 000000000..e4aead670 --- /dev/null +++ b/demos/appengine/README @@ -0,0 +1,48 @@ +Running the Tornado AppEngine example +===================================== +This example is designed to run in Google AppEngine, so there are a couple +of steps to get it running. You can download the Google AppEngine Python +development environment at http://code.google.com/appengine/downloads.html. + +1. Link or copy the tornado code directory into this directory: + + ln -s ../../tornado tornado + + AppEngine doesn't use the Python modules installed on this machine. + You need to have the 'tornado' module copied or linked for AppEngine + to find it. + +3. Install and run dev_appserver + + If you don't already have the App Engine SDK, download it from + http://code.google.com/appengine/downloads.html + + To start the tornado demo, run the dev server on this directory: + + dev_appserver.py . + +4. Visit http://localhost:8080/ in your browser + + If you sign in as an administrator, you will be able to create and + edit blog posts. If you sign in as anybody else, you will only see + the existing blog posts. + + +If you want to deploy the blog in production: + +1. Register a new appengine application and put its id in app.yaml + + First register a new application at http://appengine.google.com/. + Then edit app.yaml in this directory and change the "application" + setting from "tornado-appenginge" to your new application id. + +2. Deploy to App Engine + + If you registered an application id, you can now upload your new + Tornado blog by running this command: + + appcfg update . + + After that, visit application_id.appspot.com, where application_id + is the application you registered. + diff --git a/demos/appengine/app.yaml b/demos/appengine/app.yaml new file mode 100644 index 000000000..2d00c586d --- /dev/null +++ b/demos/appengine/app.yaml @@ -0,0 +1,11 @@ +application: tornado-appengine +version: 1 +runtime: python +api_version: 1 + +handlers: +- url: /static/ + static_dir: static + +- url: /.* + script: blog.py diff --git a/demos/appengine/blog.py b/demos/appengine/blog.py new file mode 100644 index 000000000..2695e77bf --- /dev/null +++ b/demos/appengine/blog.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import functools +import markdown +import os.path +import re +import tornado.web +import tornado.wsgi +import unicodedata +import wsgiref.handlers + +from google.appengine.api import users +from google.appengine.ext import db + + +class Entry(db.Model): + """A single blog entry.""" + author = db.UserProperty() + title = db.StringProperty(required=True) + slug = db.StringProperty(required=True) + markdown = db.TextProperty(required=True) + html = db.TextProperty(required=True) + published = db.DateTimeProperty(auto_now_add=True) + updated = db.DateTimeProperty(auto_now=True) + + +def administrator(method): + """Decorate with this method to restrict to site admins.""" + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + if not self.current_user: + if self.request.method == "GET": + self.redirect(self.get_login_url()) + return + raise web.HTTPError(403) + elif not self.current_user.administrator: + if self.request.method == "GET": + self.redirect("/") + return + raise web.HTTPError(403) + else: + return method(self, *args, **kwargs) + return wrapper + + +class BaseHandler(tornado.web.RequestHandler): + """Implements Google Accounts authentication methods.""" + def get_current_user(self): + user = users.get_current_user() + if user: user.administrator = users.is_current_user_admin() + return user + + def get_login_url(self): + return users.create_login_url(self.request.uri) + + def render_string(self, template_name, **kwargs): + # Let the templates access the users module to generate login URLs + return tornado.web.RequestHandler.render_string( + self, template_name, users=users, **kwargs) + + +class HomeHandler(BaseHandler): + def get(self): + entries = db.Query(Entry).order('-published').fetch(limit=5) + if not entries: + if not self.current_user or self.current_user.administrator: + self.redirect("/compose") + return + self.render("home.html", entries=entries) + + +class EntryHandler(BaseHandler): + def get(self, slug): + entry = db.Query(Entry).filter("slug =", slug).get() + if not entry: raise tornado.web.HTTPError(404) + self.render("entry.html", entry=entry) + + +class ArchiveHandler(BaseHandler): + def get(self): + entries = db.Query(Entry).order('-published') + self.render("archive.html", entries=entries) + + +class FeedHandler(BaseHandler): + def get(self): + entries = db.Query(Entry).order('-published').fetch(limit=10) + self.set_header("Content-Type", "application/atom+xml") + self.render("feed.xml", entries=entries) + + +class ComposeHandler(BaseHandler): + @administrator + def get(self): + key = self.get_argument("key", None) + entry = Entry.get(key) if key else None + self.render("compose.html", entry=entry) + + @administrator + def post(self): + key = self.get_argument("key", None) + if key: + entry = Entry.get(key) + entry.title = self.get_argument("title") + entry.markdown = self.get_argument("markdown") + entry.html = markdown.markdown(self.get_argument("markdown")) + else: + title = self.get_argument("title") + slug = unicodedata.normalize("NFKD", title).encode( + "ascii", "ignore") + slug = re.sub(r"[^\w]+", " ", slug) + slug = "-".join(slug.lower().strip().split()) + if not slug: slug = "entry" + while True: + existing = db.Query(Entry).filter("slug =", slug).get() + if not existing or str(existing.key()) == key: + break + slug += "-2" + entry = Entry( + author=self.current_user, + title=title, + slug=slug, + markdown=self.get_argument("markdown"), + html=markdown.markdown(self.get_argument("markdown")), + ) + entry.put() + self.redirect("/entry/" + entry.slug) + + +class EntryModule(tornado.web.UIModule): + def render(self, entry): + return self.render_string("modules/entry.html", entry=entry) + + +settings = { + "blog_title": u"Tornado Blog", + "template_path": os.path.join(os.path.dirname(__file__), "templates"), + "ui_modules": {"Entry": EntryModule}, + "xsrf_cookies": True, +} +application = tornado.wsgi.WSGIApplication([ + (r"/", HomeHandler), + (r"/archive", ArchiveHandler), + (r"/feed", FeedHandler), + (r"/entry/([^/]+)", EntryHandler), + (r"/compose", ComposeHandler), +], **settings) + + +def main(): + wsgiref.handlers.CGIHandler().run(application) + + +if __name__ == "__main__": + main() diff --git a/demos/appengine/markdown.py b/demos/appengine/markdown.py new file mode 100644 index 000000000..59ba731bf --- /dev/null +++ b/demos/appengine/markdown.py @@ -0,0 +1,1877 @@ +#!/usr/bin/env python +# Copyright (c) 2007-2008 ActiveState Corp. +# License: MIT (http://www.opensource.org/licenses/mit-license.php) + +r"""A fast and complete Python implementation of Markdown. + +[from http://daringfireball.net/projects/markdown/] +> Markdown is a text-to-HTML filter; it translates an easy-to-read / +> easy-to-write structured text format into HTML. Markdown's text +> format is most similar to that of plain text email, and supports +> features such as headers, *emphasis*, code blocks, blockquotes, and +> links. +> +> Markdown's syntax is designed not as a generic markup language, but +> specifically to serve as a front-end to (X)HTML. You can use span-level +> HTML tags anywhere in a Markdown document, and you can use block level +> HTML tags (like
and as well). + +Module usage: + + >>> import markdown2 + >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` + u'

boo!

\n' + + >>> markdowner = Markdown() + >>> markdowner.convert("*boo!*") + u'

boo!

\n' + >>> markdowner.convert("**boom!**") + u'

boom!

\n' + +This implementation of Markdown implements the full "core" syntax plus a +number of extras (e.g., code syntax coloring, footnotes) as described on +. +""" + +cmdln_desc = """A fast and complete Python implementation of Markdown, a +text-to-HTML conversion tool for web writers. +""" + +# Dev Notes: +# - There is already a Python markdown processor +# (http://www.freewisdom.org/projects/python-markdown/). +# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm +# not yet sure if there implications with this. Compare 'pydoc sre' +# and 'perldoc perlre'. + +__version_info__ = (1, 0, 1, 14) # first three nums match Markdown.pl +__version__ = '1.0.1.14' +__author__ = "Trent Mick" + +import os +import sys +from pprint import pprint +import re +import logging +try: + from hashlib import md5 +except ImportError: + from md5 import md5 +import optparse +from random import random +import codecs + + + +#---- Python version compat + +if sys.version_info[:2] < (2,4): + from sets import Set as set + def reversed(sequence): + for i in sequence[::-1]: + yield i + def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): + return unicode(s, encoding, errors) +else: + def _unicode_decode(s, encoding, errors='strict'): + return s.decode(encoding, errors) + + +#---- globals + +DEBUG = False +log = logging.getLogger("markdown") + +DEFAULT_TAB_WIDTH = 4 + +# Table of hash values for escaped characters: +def _escape_hash(s): + # Lame attempt to avoid possible collision with someone actually + # using the MD5 hexdigest of one of these chars in there text. + # Other ideas: random.random(), uuid.uuid() + #return md5(s).hexdigest() # Markdown.pl effectively does this. + return 'md5-'+md5(s).hexdigest() +g_escape_table = dict([(ch, _escape_hash(ch)) + for ch in '\\`*_{}[]()>#+-.!']) + + + +#---- exceptions + +class MarkdownError(Exception): + pass + + + +#---- public api + +def markdown_path(path, encoding="utf-8", + html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + text = codecs.open(path, 'r', encoding).read() + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +class Markdown(object): + # The dict of "extras" to enable in processing -- a mapping of + # extra name to argument for the extra. Most extras do not have an + # argument, in which case the value is None. + # + # This can be set via (a) subclassing and (b) the constructor + # "extras" argument. + extras = None + + urls = None + titles = None + html_blocks = None + html_spans = None + html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py + + # Used to track when we're inside an ordered or unordered list + # (see _ProcessListItems() for details): + list_level = 0 + + _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) + + def __init__(self, html4tags=False, tab_width=4, safe_mode=None, + extras=None, link_patterns=None, use_file_vars=False): + if html4tags: + self.empty_element_suffix = ">" + else: + self.empty_element_suffix = " />" + self.tab_width = tab_width + + # For compatibility with earlier markdown2.py and with + # markdown.py's safe_mode being a boolean, + # safe_mode == True -> "replace" + if safe_mode is True: + self.safe_mode = "replace" + else: + self.safe_mode = safe_mode + + if self.extras is None: + self.extras = {} + elif not isinstance(self.extras, dict): + self.extras = dict([(e, None) for e in self.extras]) + if extras: + if not isinstance(extras, dict): + extras = dict([(e, None) for e in extras]) + self.extras.update(extras) + assert isinstance(self.extras, dict) + self._instance_extras = self.extras.copy() + self.link_patterns = link_patterns + self.use_file_vars = use_file_vars + self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) + + def reset(self): + self.urls = {} + self.titles = {} + self.html_blocks = {} + self.html_spans = {} + self.list_level = 0 + self.extras = self._instance_extras.copy() + if "footnotes" in self.extras: + self.footnotes = {} + self.footnote_ids = [] + + def convert(self, text): + """Convert the given text.""" + # Main function. The order in which other subs are called here is + # essential. Link and image substitutions need to happen before + # _EscapeSpecialChars(), so that any *'s or _'s in the + # and tags get encoded. + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + self.reset() + + if not isinstance(text, unicode): + #TODO: perhaps shouldn't presume UTF-8 for string input? + text = unicode(text, 'utf-8') + + if self.use_file_vars: + # Look for emacs-style file variable hints. + emacs_vars = self._get_emacs_vars(text) + if "markdown-extras" in emacs_vars: + splitter = re.compile("[ ,]+") + for e in splitter.split(emacs_vars["markdown-extras"]): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + self.extras[ename] = earg + + # Standardize line endings: + text = re.sub("\r\n|\r", "\n", text) + + # Make sure $text ends with a couple of newlines: + text += "\n\n" + + # Convert all tabs to spaces. + text = self._detab(text) + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + text = self._ws_only_line_re.sub("", text) + + if self.safe_mode: + text = self._hash_html_spans(text) + + # Turn block-level HTML blocks into hash entries + text = self._hash_html_blocks(text, raw=True) + + # Strip link definitions, store in hashes. + if "footnotes" in self.extras: + # Must do footnotes first because an unlucky footnote defn + # looks like a link defn: + # [^4]: this "looks like a link defn" + text = self._strip_footnote_definitions(text) + text = self._strip_link_definitions(text) + + text = self._run_block_gamut(text) + + if "footnotes" in self.extras: + text = self._add_footnotes(text) + + text = self._unescape_special_chars(text) + + if self.safe_mode: + text = self._unhash_html_spans(text) + + text += "\n" + return text + + _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) + # This regular expression is intended to match blocks like this: + # PREFIX Local Variables: SUFFIX + # PREFIX mode: Tcl SUFFIX + # PREFIX End: SUFFIX + # Some notes: + # - "[ \t]" is used instead of "\s" to specifically exclude newlines + # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does + # not like anything other than Unix-style line terminators. + _emacs_local_vars_pat = re.compile(r"""^ + (?P(?:[^\r\n|\n|\r])*?) + [\ \t]*Local\ Variables:[\ \t]* + (?P.*?)(?:\r\n|\n|\r) + (?P.*?\1End:) + """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) + + def _get_emacs_vars(self, text): + """Return a dictionary of emacs-style local variables. + + Parsing is done loosely according to this spec (and according to + some in-practice deviations from this): + http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables + """ + emacs_vars = {} + SIZE = pow(2, 13) # 8kB + + # Search near the start for a '-*-'-style one-liner of variables. + head = text[:SIZE] + if "-*-" in head: + match = self._emacs_oneliner_vars_pat.search(head) + if match: + emacs_vars_str = match.group(1) + assert '\n' not in emacs_vars_str + emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') + if s.strip()] + if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: + # While not in the spec, this form is allowed by emacs: + # -*- Tcl -*- + # where the implied "variable" is "mode". This form + # is only allowed if there are no other variables. + emacs_vars["mode"] = emacs_var_strs[0].strip() + else: + for emacs_var_str in emacs_var_strs: + try: + variable, value = emacs_var_str.strip().split(':', 1) + except ValueError: + log.debug("emacs variables error: malformed -*- " + "line: %r", emacs_var_str) + continue + # Lowercase the variable name because Emacs allows "Mode" + # or "mode" or "MoDe", etc. + emacs_vars[variable.lower()] = value.strip() + + tail = text[-SIZE:] + if "Local Variables" in tail: + match = self._emacs_local_vars_pat.search(tail) + if match: + prefix = match.group("prefix") + suffix = match.group("suffix") + lines = match.group("content").splitlines(0) + #print "prefix=%r, suffix=%r, content=%r, lines: %s"\ + # % (prefix, suffix, match.group("content"), lines) + + # Validate the Local Variables block: proper prefix and suffix + # usage. + for i, line in enumerate(lines): + if not line.startswith(prefix): + log.debug("emacs variables error: line '%s' " + "does not use proper prefix '%s'" + % (line, prefix)) + return {} + # Don't validate suffix on last line. Emacs doesn't care, + # neither should we. + if i != len(lines)-1 and not line.endswith(suffix): + log.debug("emacs variables error: line '%s' " + "does not use proper suffix '%s'" + % (line, suffix)) + return {} + + # Parse out one emacs var per line. + continued_for = None + for line in lines[:-1]: # no var on the last line ("PREFIX End:") + if prefix: line = line[len(prefix):] # strip prefix + if suffix: line = line[:-len(suffix)] # strip suffix + line = line.strip() + if continued_for: + variable = continued_for + if line.endswith('\\'): + line = line[:-1].rstrip() + else: + continued_for = None + emacs_vars[variable] += ' ' + line + else: + try: + variable, value = line.split(':', 1) + except ValueError: + log.debug("local variables error: missing colon " + "in local variables entry: '%s'" % line) + continue + # Do NOT lowercase the variable name, because Emacs only + # allows "mode" (and not "Mode", "MoDe", etc.) in this block. + value = value.strip() + if value.endswith('\\'): + value = value[:-1].rstrip() + continued_for = variable + else: + continued_for = None + emacs_vars[variable] = value + + # Unquote values. + for var, val in emacs_vars.items(): + if len(val) > 1 and (val.startswith('"') and val.endswith('"') + or val.startswith('"') and val.endswith('"')): + emacs_vars[var] = val[1:-1] + + return emacs_vars + + # Cribbed from a post by Bart Lateur: + # + _detab_re = re.compile(r'(.*?)\t', re.M) + def _detab_sub(self, match): + g1 = match.group(1) + return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) + def _detab(self, text): + r"""Remove (leading?) tabs from a file. + + >>> m = Markdown() + >>> m._detab("\tfoo") + ' foo' + >>> m._detab(" \tfoo") + ' foo' + >>> m._detab("\t foo") + ' foo' + >>> m._detab(" foo") + ' foo' + >>> m._detab(" foo\n\tbar\tblam") + ' foo\n bar blam' + """ + if '\t' not in text: + return text + return self._detab_re.subn(self._detab_sub, text)[0] + + _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _strict_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_a, + re.X | re.M) + + _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' + _liberal_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_b, + re.X | re.M) + + def _hash_html_block_sub(self, match, raw=False): + html = match.group(1) + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + return "\n\n" + key + "\n\n" + + def _hash_html_blocks(self, text, raw=False): + """Hashify HTML blocks + + We only want to do this for block-level HTML tags, such as headers, + lists, and tables. That's because we still want to wrap

s around + "paragraphs" that are wrapped in non-block-level tags, such as anchors, + phrase emphasis, and spans. The list of tags we're looking for is + hard-coded. + + @param raw {boolean} indicates if these are raw HTML blocks in + the original source. It makes a difference in "safe" mode. + """ + if '<' not in text: + return text + + # Pass `raw` value into our calls to self._hash_html_block_sub. + hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) + + # First, look for nested blocks, e.g.: + #

+ #
+ # tags for inner block must be indented. + #
+ #
+ # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `
` and stop at the first `
`. + text = self._strict_tag_block_re.sub(hash_html_block_sub, text) + + # Now match more liberally, simply from `\n` to `\n` + text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) + + # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + if "", start_idx) + 3 + except ValueError, ex: + break + + # Start position for next comment block search. + start = end_idx + + # Validate whitespace before comment. + if start_idx: + # - Up to `tab_width - 1` spaces before start_idx. + for i in range(self.tab_width - 1): + if text[start_idx - 1] != ' ': + break + start_idx -= 1 + if start_idx == 0: + break + # - Must be preceded by 2 newlines or hit the start of + # the document. + if start_idx == 0: + pass + elif start_idx == 1 and text[0] == '\n': + start_idx = 0 # to match minute detail of Markdown.pl regex + elif text[start_idx-2:start_idx] == '\n\n': + pass + else: + break + + # Validate whitespace after comment. + # - Any number of spaces and tabs. + while end_idx < len(text): + if text[end_idx] not in ' \t': + break + end_idx += 1 + # - Must be following by 2 newlines or hit end of text. + if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): + continue + + # Escape and hash (must match `_hash_html_block_sub`). + html = text[start_idx:end_idx] + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] + + if "xml" in self.extras: + # Treat XML processing instructions and namespaced one-liner + # tags as if they were block HTML tags. E.g., if standalone + # (i.e. are their own paragraph), the following do not get + # wrapped in a

tag: + # + # + # + _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width) + text = _xml_oneliner_re.sub(hash_html_block_sub, text) + + return text + + def _strip_link_definitions(self, text): + # Strips link definitions from text, stores the URLs and titles in + # hash references. + less_than_tab = self.tab_width - 1 + + # Link defs are in the form: + # [id]: url "optional title" + _link_def_re = re.compile(r""" + ^[ ]{0,%d}\[(.+)\]: # id = \1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = \2 + [ \t]* + (?: + \n? # maybe one newline + [ \t]* + (?<=\s) # lookbehind for whitespace + ['"(] + ([^\n]*) # title = \3 + ['")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + """ % less_than_tab, re.X | re.M | re.U) + return _link_def_re.sub(self._extract_link_def_sub, text) + + def _extract_link_def_sub(self, match): + id, url, title = match.groups() + key = id.lower() # Link IDs are case-insensitive + self.urls[key] = self._encode_amps_and_angles(url) + if title: + self.titles[key] = title.replace('"', '"') + return "" + + def _extract_footnote_def_sub(self, match): + id, text = match.groups() + text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() + normed_id = re.sub(r'\W', '-', id) + # Ensure footnote text ends with a couple newlines (for some + # block gamut matches). + self.footnotes[normed_id] = text + "\n\n" + return "" + + def _strip_footnote_definitions(self, text): + """A footnote definition looks like this: + + [^note-id]: Text of the note. + + May include one or more indented paragraphs. + + Where, + - The 'note-id' can be pretty much anything, though typically it + is the number of the footnote. + - The first paragraph may start on the next line, like so: + + [^note-id]: + Text of the note. + """ + less_than_tab = self.tab_width - 1 + footnote_def_re = re.compile(r''' + ^[ ]{0,%d}\[\^(.+)\]: # id = \1 + [ \t]* + ( # footnote text = \2 + # First line need not start with the spaces. + (?:\s*.*\n+) + (?: + (?:[ ]{%d} | \t) # Subsequent lines must be indented. + .*\n+ + )* + ) + # Lookahead for non-space at line-start, or end of doc. + (?:(?=^[ ]{0,%d}\S)|\Z) + ''' % (less_than_tab, self.tab_width, self.tab_width), + re.X | re.M) + return footnote_def_re.sub(self._extract_footnote_def_sub, text) + + + _hr_res = [ + re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), + ] + + def _run_block_gamut(self, text): + # These are all the transformations that form block-level + # tags like paragraphs, headers, and list items. + + text = self._do_headers(text) + + # Do Horizontal Rules: + hr = "\n tags around block-level tags. + text = self._hash_html_blocks(text) + + text = self._form_paragraphs(text) + + return text + + def _pyshell_block_sub(self, match): + lines = match.group(0).splitlines(0) + _dedentlines(lines) + indent = ' ' * self.tab_width + s = ('\n' # separate from possible cuddled paragraph + + indent + ('\n'+indent).join(lines) + + '\n\n') + return s + + def _prepare_pyshell_blocks(self, text): + """Ensure that Python interactive shell sessions are put in + code blocks -- even if not properly indented. + """ + if ">>>" not in text: + return text + + less_than_tab = self.tab_width - 1 + _pyshell_block_re = re.compile(r""" + ^([ ]{0,%d})>>>[ ].*\n # first line + ^(\1.*\S+.*\n)* # any number of subsequent lines + ^\n # ends with a blank line + """ % less_than_tab, re.M | re.X) + + return _pyshell_block_re.sub(self._pyshell_block_sub, text) + + def _run_span_gamut(self, text): + # These are all the transformations that occur *within* block-level + # tags like paragraphs, headers, and list items. + + text = self._do_code_spans(text) + + text = self._escape_special_chars(text) + + # Process anchor and image tags. + text = self._do_links(text) + + # Make links out of things like `` + # Must come after _do_links(), because you can use < and > + # delimiters in inline links like [this](). + text = self._do_auto_links(text) + + if "link-patterns" in self.extras: + text = self._do_link_patterns(text) + + text = self._encode_amps_and_angles(text) + + text = self._do_italics_and_bold(text) + + # Do hard breaks: + text = re.sub(r" {2,}\n", " + | + # auto-link (e.g., ) + <\w+[^>]*> + | + # comment + | + <\?.*?\?> # processing instruction + ) + """, re.X) + + def _escape_special_chars(self, text): + # Python markdown note: the HTML tokenization here differs from + # that in Markdown.pl, hence the behaviour for subtle cases can + # differ (I believe the tokenizer here does a better job because + # it isn't susceptible to unmatched '<' and '>' in HTML tags). + # Note, however, that '>' is not allowed in an auto-link URL + # here. + escaped = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup: + # Within tags/HTML-comments/auto-links, encode * and _ + # so they don't conflict with their use in Markdown for + # italics and strong. We're replacing each such + # character with its corresponding MD5 checksum value; + # this is likely overkill, but it should prevent us from + # colliding with the escape values by accident. + escaped.append(token.replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + else: + escaped.append(self._encode_backslash_escapes(token)) + is_html_markup = not is_html_markup + return ''.join(escaped) + + def _hash_html_spans(self, text): + # Used for safe_mode. + + def _is_auto_link(s): + if ':' in s and self._auto_link_re.match(s): + return True + elif '@' in s and self._auto_email_link_re.match(s): + return True + return False + + tokens = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup and not _is_auto_link(token): + sanitized = self._sanitize_html(token) + key = _hash_text(sanitized) + self.html_spans[key] = sanitized + tokens.append(key) + else: + tokens.append(token) + is_html_markup = not is_html_markup + return ''.join(tokens) + + def _unhash_html_spans(self, text): + for key, sanitized in self.html_spans.items(): + text = text.replace(key, sanitized) + return text + + def _sanitize_html(self, s): + if self.safe_mode == "replace": + return self.html_removed_text + elif self.safe_mode == "escape": + replacements = [ + ('&', '&'), + ('<', '<'), + ('>', '>'), + ] + for before, after in replacements: + s = s.replace(before, after) + return s + else: + raise MarkdownError("invalid value for 'safe_mode': %r (must be " + "'escape' or 'replace')" % self.safe_mode) + + _tail_of_inline_link_re = re.compile(r''' + # Match tail of: [text](/url/) or [text](/url/ "title") + \( # literal paren + [ \t]* + (?P # \1 + <.*?> + | + .*? + ) + [ \t]* + ( # \2 + (['"]) # quote char = \3 + (?P.*?) + \3 # matching quote + )? # title is optional + \) + ''', re.X | re.S) + _tail_of_reference_link_re = re.compile(r''' + # Match tail of: [text][id] + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[ + (?P<id>.*?) + \] + ''', re.X | re.S) + + def _do_links(self, text): + """Turn Markdown link shortcuts into XHTML <a> and <img> tags. + + This is a combination of Markdown.pl's _DoAnchors() and + _DoImages(). They are done together because that simplified the + approach. It was necessary to use a different approach than + Markdown.pl because of the lack of atomic matching support in + Python's regex engine used in $g_nested_brackets. + """ + MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24 + + # `anchor_allowed_pos` is used to support img links inside + # anchors, but not anchors inside anchors. An anchor's start + # pos must be `>= anchor_allowed_pos`. + anchor_allowed_pos = 0 + + curr_pos = 0 + while True: # Handle the next link. + # The next '[' is the start of: + # - an inline anchor: [text](url "title") + # - a reference anchor: [text][id] + # - an inline img: ![text](url "title") + # - a reference img: ![text][id] + # - a footnote ref: [^id] + # (Only if 'footnotes' extra enabled) + # - a footnote defn: [^id]: ... + # (Only if 'footnotes' extra enabled) These have already + # been stripped in _strip_footnote_definitions() so no + # need to watch for them. + # - a link definition: [id]: url "title" + # These have already been stripped in + # _strip_link_definitions() so no need to watch for them. + # - not markup: [...anything else... + try: + start_idx = text.index('[', curr_pos) + except ValueError: + break + text_length = len(text) + + # Find the matching closing ']'. + # Markdown.pl allows *matching* brackets in link text so we + # will here too. Markdown.pl *doesn't* currently allow + # matching brackets in img alt text -- we'll differ in that + # regard. + bracket_depth = 0 + for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, + text_length)): + ch = text[p] + if ch == ']': + bracket_depth -= 1 + if bracket_depth < 0: + break + elif ch == '[': + bracket_depth += 1 + else: + # Closing bracket not found within sentinel length. + # This isn't markup. + curr_pos = start_idx + 1 + continue + link_text = text[start_idx+1:p] + + # Possibly a footnote ref? + if "footnotes" in self.extras and link_text.startswith("^"): + normed_id = re.sub(r'\W', '-', link_text[1:]) + if normed_id in self.footnotes: + self.footnote_ids.append(normed_id) + result = '<sup class="footnote-ref" id="fnref-%s">' \ + '<a href="#fn-%s">%s</a></sup>' \ + % (normed_id, normed_id, len(self.footnote_ids)) + text = text[:start_idx] + result + text[p+1:] + else: + # This id isn't defined, leave the markup alone. + curr_pos = p+1 + continue + + # Now determine what this is by the remainder. + p += 1 + if p == text_length: + return text + + # Inline anchor or img? + if text[p] == '(': # attempt at perf improvement + match = self._tail_of_inline_link_re.match(text, p) + if match: + # Handle an inline anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + + url, title = match.group("url"), match.group("title") + if url and url[0] == '<': + url = url[1:-1] # '<url>' -> 'url' + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + if title: + title_str = ' title="%s"' \ + % title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) \ + .replace('"', '"') + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + continue + + # Reference anchor or img? + else: + match = self._tail_of_reference_link_re.match(text, p) + if match: + # Handle a reference-style anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + link_id = match.group("id").lower() + if not link_id: + link_id = link_text.lower() # for links like [this][] + if link_id in self.urls: + url = self.urls[link_id] + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title = self.titles.get(link_id) + if title: + title = title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title_str = ' title="%s"' % title + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result = '<a href="%s"%s>%s</a>' \ + % (url, title_str, link_text) + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + else: + # This id isn't defined, leave the markup alone. + curr_pos = match.end() + continue + + # Otherwise, it isn't markup. + curr_pos = start_idx + 1 + + return text + + + _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) + def _setext_h_sub(self, match): + n = {"=": 1, "-": 2}[match.group(2)[0]] + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(1)), n) + + _atx_h_re = re.compile(r''' + ^(\#{1,6}) # \1 = string of #'s + [ \t]* + (.+?) # \2 = Header text + [ \t]* + (?<!\\) # ensure not an escaped trailing '#' + \#* # optional closing #'s (not counted) + \n+ + ''', re.X | re.M) + def _atx_h_sub(self, match): + n = len(match.group(1)) + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(2)), n) + + def _do_headers(self, text): + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + text = self._setext_h_re.sub(self._setext_h_sub, text) + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + text = self._atx_h_re.sub(self._atx_h_sub, text) + + return text + + + _marker_ul_chars = '*+-' + _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars + _marker_ul = '(?:[%s])' % _marker_ul_chars + _marker_ol = r'(?:\d+\.)' + + def _list_sub(self, match): + lst = match.group(1) + lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" + result = self._process_list_items(lst) + if self.list_level: + return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) + else: + return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) + + def _do_lists(self, text): + # Form HTML ordered (numbered) and unordered (bulleted) lists. + + for marker_pat in (self._marker_ul, self._marker_ol): + # Re-usable pattern to match any entire ul or ol list: + less_than_tab = self.tab_width - 1 + whole_list = r''' + ( # \1 = whole list + ( # \2 + [ ]{0,%d} + (%s) # \3 = first list item marker + [ \t]+ + ) + (?:.+?) + ( # \4 + \Z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + %s[ \t]+ + ) + ) + ) + ''' % (less_than_tab, marker_pat, marker_pat) + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _process_list_items(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if self.list_level: + sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) + text = sub_list_re.sub(self._list_sub, text) + else: + list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, + re.X | re.M | re.S) + text = list_re.sub(self._list_sub, text) + + return text + + _list_item_re = re.compile(r''' + (\n)? # leading line = \1 + (^[ \t]*) # leading whitespace = \2 + (%s) [ \t]+ # list marker = \3 + ((?:.+?) # list item text = \4 + (\n{1,2})) # eols = \5 + (?= \n* (\Z | \2 (%s) [ \t]+)) + ''' % (_marker_any, _marker_any), + re.M | re.X | re.S) + + _last_li_endswith_two_eols = False + def _list_item_sub(self, match): + item = match.group(4) + leading_line = match.group(1) + leading_space = match.group(2) + if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: + item = self._run_block_gamut(self._outdent(item)) + else: + # Recursion for sub-lists: + item = self._do_lists(self._outdent(item)) + if item.endswith('\n'): + item = item[:-1] + item = self._run_span_gamut(item) + self._last_li_endswith_two_eols = (len(match.group(5)) == 2) + return "<li>%s</li>\n" % item + + def _process_list_items(self, list_str): + # Process the contents of a single ordered or unordered list, + # splitting it into individual list items. + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + self.list_level += 1 + self._last_li_endswith_two_eols = False + list_str = list_str.rstrip('\n') + '\n' + list_str = self._list_item_re.sub(self._list_item_sub, list_str) + self.list_level -= 1 + return list_str + + def _get_pygments_lexer(self, lexer_name): + try: + from pygments import lexers, util + except ImportError: + return None + try: + return lexers.get_lexer_by_name(lexer_name) + except util.ClassNotFound: + return None + + def _color_with_pygments(self, codeblock, lexer, **formatter_opts): + import pygments + import pygments.formatters + + class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): + def _wrap_code(self, inner): + """A function for use in a Pygments Formatter which + wraps in <code> tags. + """ + yield 0, "<code>" + for tup in inner: + yield tup + yield 0, "</code>" + + def wrap(self, source, outfile): + """Return the source with a code, pre, and div.""" + return self._wrap_div(self._wrap_pre(self._wrap_code(source))) + + formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) + return pygments.highlight(codeblock, lexer, formatter) + + def _code_block_sub(self, match): + codeblock = match.group(1) + codeblock = self._outdent(codeblock) + codeblock = self._detab(codeblock) + codeblock = codeblock.lstrip('\n') # trim leading newlines + codeblock = codeblock.rstrip() # trim trailing whitespace + + if "code-color" in self.extras and codeblock.startswith(":::"): + lexer_name, rest = codeblock.split('\n', 1) + lexer_name = lexer_name[3:].strip() + lexer = self._get_pygments_lexer(lexer_name) + codeblock = rest.lstrip("\n") # Remove lexer declaration line. + if lexer: + formatter_opts = self.extras['code-color'] or {} + colored = self._color_with_pygments(codeblock, lexer, + **formatter_opts) + return "\n\n%s\n\n" % colored + + codeblock = self._encode_code(codeblock) + return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock + + def _do_code_blocks(self, text): + """Process Markdown `<pre><code>` blocks.""" + code_block_re = re.compile(r''' + (?:\n\n|\A) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?: + (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + ''' % (self.tab_width, self.tab_width), + re.M | re.X) + + return code_block_re.sub(self._code_block_sub, text) + + + # Rules for a code span: + # - backslash escapes are not interpreted in a code span + # - to include one or or a run of more backticks the delimiters must + # be a longer run of backticks + # - cannot start or end a code span with a backtick; pad with a + # space and that space will be removed in the emitted HTML + # See `test/tm-cases/escapes.text` for a number of edge-case + # examples. + _code_span_re = re.compile(r''' + (?<!\\) + (`+) # \1 = Opening run of ` + (?!`) # See Note A test/tm-cases/escapes.text + (.+?) # \2 = The code block + (?<!`) + \1 # Matching closer + (?!`) + ''', re.X | re.S) + + def _code_span_sub(self, match): + c = match.group(2).strip(" \t") + c = self._encode_code(c) + return "<code>%s</code>" % c + + def _do_code_spans(self, text): + # * Backtick quotes are used for <code></code> spans. + # + # * You can use multiple backticks as the delimiters if you want to + # include literal backticks in the code span. So, this input: + # + # Just type ``foo `bar` baz`` at the prompt. + # + # Will translate to: + # + # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> + # + # There's no arbitrary limit to the number of backticks you + # can use as delimters. If you need three consecutive backticks + # in your code, use four for delimiters, etc. + # + # * You can use spaces to get literal backticks at the edges: + # + # ... type `` `bar` `` ... + # + # Turns to: + # + # ... type <code>`bar`</code> ... + return self._code_span_re.sub(self._code_span_sub, text) + + def _encode_code(self, text): + """Encode/escape certain characters inside Markdown code runs. + The point is that in code, these characters are literals, + and lose their special Markdown meanings. + """ + replacements = [ + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + ('&', '&'), + # Do the angle bracket song and dance: + ('<', '<'), + ('>', '>'), + # Now, escape characters that are magic in Markdown: + ('*', g_escape_table['*']), + ('_', g_escape_table['_']), + ('{', g_escape_table['{']), + ('}', g_escape_table['}']), + ('[', g_escape_table['[']), + (']', g_escape_table[']']), + ('\\', g_escape_table['\\']), + ] + for before, after in replacements: + text = text.replace(before, after) + return text + + _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) + _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) + _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) + _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) + def _do_italics_and_bold(self, text): + # <strong> must go first: + if "code-friendly" in self.extras: + text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) + text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) + else: + text = self._strong_re.sub(r"<strong>\2</strong>", text) + text = self._em_re.sub(r"<em>\2</em>", text) + return text + + + _block_quote_re = re.compile(r''' + ( # Wrap whole match in \1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + ''', re.M | re.X) + _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); + + _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) + def _dedent_two_spaces_sub(self, match): + return re.sub(r'(?m)^ ', '', match.group(1)) + + def _block_quote_sub(self, match): + bq = match.group(1) + bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting + bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines + bq = self._run_block_gamut(bq) # recurse + + bq = re.sub('(?m)^', ' ', bq) + # These leading spaces screw with <pre> content, so we need to fix that: + bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) + + return "<blockquote>\n%s\n</blockquote>\n\n" % bq + + def _do_block_quotes(self, text): + if '>' not in text: + return text + return self._block_quote_re.sub(self._block_quote_sub, text) + + def _form_paragraphs(self, text): + # Strip leading and trailing lines: + text = text.strip('\n') + + # Wrap <p> tags. + grafs = re.split(r"\n{2,}", text) + for i, graf in enumerate(grafs): + if graf in self.html_blocks: + # Unhashify HTML blocks + grafs[i] = self.html_blocks[graf] + else: + # Wrap <p> tags. + graf = self._run_span_gamut(graf) + grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>" + + return "\n\n".join(grafs) + + def _add_footnotes(self, text): + if self.footnotes: + footer = [ + '<div class="footnotes">', + '<hr' + self.empty_element_suffix, + '<ol>', + ] + for i, id in enumerate(self.footnote_ids): + if i != 0: + footer.append('') + footer.append('<li id="fn-%s">' % id) + footer.append(self._run_block_gamut(self.footnotes[id])) + backlink = ('<a href="#fnref-%s" ' + 'class="footnoteBackLink" ' + 'title="Jump back to footnote %d in the text.">' + '↩</a>' % (id, i+1)) + if footer[-1].endswith("</p>"): + footer[-1] = footer[-1][:-len("</p>")] \ + + ' ' + backlink + "</p>" + else: + footer.append("\n<p>%s</p>" % backlink) + footer.append('</li>') + footer.append('</ol>') + footer.append('</div>') + return text + '\n\n' + '\n'.join(footer) + else: + return text + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') + _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) + _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) + + def _encode_amps_and_angles(self, text): + # Smart processing for ampersands and angle brackets that need + # to be encoded. + text = self._ampersand_re.sub('&', text) + + # Encode naked <'s + text = self._naked_lt_re.sub('<', text) + + # Encode naked >'s + # Note: Other markdown implementations (e.g. Markdown.pl, PHP + # Markdown) don't do this. + text = self._naked_gt_re.sub('>', text) + return text + + def _encode_backslash_escapes(self, text): + for ch, escape in g_escape_table.items(): + text = text.replace("\\"+ch, escape) + return text + + _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) + def _auto_link_sub(self, match): + g1 = match.group(1) + return '<a href="%s">%s</a>' % (g1, g1) + + _auto_email_link_re = re.compile(r""" + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-\w]+(\.[-\w]+)*\.[a-z]+ + ) + > + """, re.I | re.X | re.U) + def _auto_email_link_sub(self, match): + return self._encode_email_address( + self._unescape_special_chars(match.group(1))) + + def _do_auto_links(self, text): + text = self._auto_link_re.sub(self._auto_link_sub, text) + text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) + return text + + def _encode_email_address(self, addr): + # Input: an email address, e.g. "foo@example.com" + # + # Output: the email address as a mailto link, with each character + # of the address encoded as either a decimal or hex entity, in + # the hopes of foiling most address harvesting spam bots. E.g.: + # + # <a href="mailto:foo@e + # xample.com">foo + # @example.com</a> + # + # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk + # mailing list: <http://tinyurl.com/yu7ue> + chars = [_xml_encode_email_char_at_random(ch) + for ch in "mailto:" + addr] + # Strip the mailto: from the visible part. + addr = '<a href="%s">%s</a>' \ + % (''.join(chars), ''.join(chars[7:])) + return addr + + def _do_link_patterns(self, text): + """Caveat emptor: there isn't much guarding against link + patterns being formed inside other standard Markdown links, e.g. + inside a [link def][like this]. + + Dev Notes: *Could* consider prefixing regexes with a negative + lookbehind assertion to attempt to guard against this. + """ + link_from_hash = {} + for regex, repl in self.link_patterns: + replacements = [] + for match in regex.finditer(text): + if hasattr(repl, "__call__"): + href = repl(match) + else: + href = match.expand(repl) + replacements.append((match.span(), href)) + for (start, end), href in reversed(replacements): + escaped_href = ( + href.replace('"', '"') # b/c of attr quote + # To avoid markdown <em> and <strong>: + .replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) + hash = md5(link).hexdigest() + link_from_hash[hash] = link + text = text[:start] + hash + text[end:] + for hash, link in link_from_hash.items(): + text = text.replace(hash, link) + return text + + def _unescape_special_chars(self, text): + # Swap back in all the special characters we've hidden. + for ch, hash in g_escape_table.items(): + text = text.replace(hash, ch) + return text + + def _outdent(self, text): + # Remove one level of line-leading tabs or spaces + return self._outdent_re.sub('', text) + + +class MarkdownWithExtras(Markdown): + """A markdowner class that enables most extras: + + - footnotes + - code-color (only has effect if 'pygments' Python module on path) + + These are not included: + - pyshell (specific to Python-related documenting) + - code-friendly (because it *disables* part of the syntax) + - link-patterns (because you need to specify some actual + link-patterns anyway) + """ + extras = ["footnotes", "code-color"] + + +#---- internal support functions + +# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 +def _curry(*args, **kwargs): + function, args = args[0], args[1:] + def result(*rest, **kwrest): + combined = kwargs.copy() + combined.update(kwrest) + return function(*args + rest, **combined) + return result + +# Recipe: regex_from_encoded_pattern (1.0) +def _regex_from_encoded_pattern(s): + """'foo' -> re.compile(re.escape('foo')) + '/foo/' -> re.compile('foo') + '/foo/i' -> re.compile('foo', re.I) + """ + if s.startswith('/') and s.rfind('/') != 0: + # Parse it: /PATTERN/FLAGS + idx = s.rfind('/') + pattern, flags_str = s[1:idx], s[idx+1:] + flag_from_char = { + "i": re.IGNORECASE, + "l": re.LOCALE, + "s": re.DOTALL, + "m": re.MULTILINE, + "u": re.UNICODE, + } + flags = 0 + for char in flags_str: + try: + flags |= flag_from_char[char] + except KeyError: + raise ValueError("unsupported regex flag: '%s' in '%s' " + "(must be one of '%s')" + % (char, s, ''.join(flag_from_char.keys()))) + return re.compile(s[1:idx], flags) + else: # not an encoded regex + return re.compile(re.escape(s)) + +# Recipe: dedent (0.1.2) +def _dedentlines(lines, tabsize=8, skip_first_line=False): + """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines + + "lines" is a list of lines to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + Same as dedent() except operates on a sequence of lines. Note: the + lines list is modified **in-place**. + """ + DEBUG = False + if DEBUG: + print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ + % (tabsize, skip_first_line) + indents = [] + margin = None + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + indent = 0 + for ch in line: + if ch == ' ': + indent += 1 + elif ch == '\t': + indent += tabsize - (indent % tabsize) + elif ch in '\r\n': + continue # skip all-whitespace lines + else: + break + else: + continue # skip all-whitespace lines + if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + if margin is None: + margin = indent + else: + margin = min(margin, indent) + if DEBUG: print "dedent: margin=%r" % margin + + if margin is not None and margin > 0: + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + removed = 0 + for j, ch in enumerate(line): + if ch == ' ': + removed += 1 + elif ch == '\t': + removed += tabsize - (removed % tabsize) + elif ch in '\r\n': + if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + lines[i] = lines[i][j:] + break + else: + raise ValueError("unexpected non-whitespace char %r in " + "line %r while removing %d-space margin" + % (ch, line, margin)) + if DEBUG: + print "dedent: %r: %r -> removed %d/%d"\ + % (line, ch, removed, margin) + if removed == margin: + lines[i] = lines[i][j+1:] + break + elif removed > margin: + lines[i] = ' '*(removed-margin) + lines[i][j+1:] + break + else: + if removed: + lines[i] = lines[i][removed:] + return lines + +def _dedent(text, tabsize=8, skip_first_line=False): + """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text + + "text" is the text to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + textwrap.dedent(s), but don't expand tabs to spaces + """ + lines = text.splitlines(1) + _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) + return ''.join(lines) + + +class _memoized(object): + """Decorator that caches a function's return value each time it is called. + If called later with the same arguments, the cached value is returned, and + not re-evaluated. + + http://wiki.python.org/moin/PythonDecoratorLibrary + """ + def __init__(self, func): + self.func = func + self.cache = {} + def __call__(self, *args): + try: + return self.cache[args] + except KeyError: + self.cache[args] = value = self.func(*args) + return value + except TypeError: + # uncachable -- for instance, passing a list as an argument. + # Better to not cache than to blow up entirely. + return self.func(*args) + def __repr__(self): + """Return the function's docstring.""" + return self.func.__doc__ + + +def _xml_oneliner_re_from_tab_width(tab_width): + """Standalone XML processing instruction regex.""" + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,%d} + (?: + <\?\w+\b\s+.*?\?> # XML processing instruction + | + <\w+:\w+\b\s+.*?/> # namespaced single tag + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) + +def _hr_tag_re_from_tab_width(tab_width): + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in \1 + [ ]{0,%d} + <(hr) # start tag = \2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) + + +def _xml_encode_email_char_at_random(ch): + r = random() + # Roughly 10% raw, 45% hex, 45% dec. + # '@' *must* be encoded. I [John Gruber] insist. + # Issue 26: '_' must be encoded. + if r > 0.9 and ch not in "@_": + return ch + elif r < 0.45: + # The [1:] is to drop leading '0': 0x63 -> x63 + return '&#%s;' % hex(ord(ch))[1:] + else: + return '&#%s;' % ord(ch) + +def _hash_text(text): + return 'md5:'+md5(text.encode("utf-8")).hexdigest() + + +#---- mainline + +class _NoReflowFormatter(optparse.IndentedHelpFormatter): + """An optparse formatter that does NOT reflow the description.""" + def format_description(self, description): + return description or "" + +def _test(): + import doctest + doctest.testmod() + +def main(argv=None): + if argv is None: + argv = sys.argv + if not logging.root.handlers: + logging.basicConfig() + + usage = "usage: %prog [PATHS...]" + version = "%prog "+__version__ + parser = optparse.OptionParser(prog="markdown2", usage=usage, + version=version, description=cmdln_desc, + formatter=_NoReflowFormatter()) + parser.add_option("-v", "--verbose", dest="log_level", + action="store_const", const=logging.DEBUG, + help="more verbose output") + parser.add_option("--encoding", + help="specify encoding of text content") + parser.add_option("--html4tags", action="store_true", default=False, + help="use HTML 4 style for empty element tags") + parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", + help="sanitize literal HTML: 'escape' escapes " + "HTML meta chars, 'replace' replaces with an " + "[HTML_REMOVED] note") + parser.add_option("-x", "--extras", action="append", + help="Turn on specific extra features (not part of " + "the core Markdown spec). Supported values: " + "'code-friendly' disables _/__ for emphasis; " + "'code-color' adds code-block syntax coloring; " + "'link-patterns' adds auto-linking based on patterns; " + "'footnotes' adds the footnotes syntax;" + "'xml' passes one-liner processing instructions and namespaced XML tags;" + "'pyshell' to put unindented Python interactive shell sessions in a <code> block.") + parser.add_option("--use-file-vars", + help="Look for and use Emacs-style 'markdown-extras' " + "file var to turn on extras. See " + "<http://code.google.com/p/python-markdown2/wiki/Extras>.") + parser.add_option("--link-patterns-file", + help="path to a link pattern file") + parser.add_option("--self-test", action="store_true", + help="run internal self-tests (some doctests)") + parser.add_option("--compare", action="store_true", + help="run against Markdown.pl as well (for testing)") + parser.set_defaults(log_level=logging.INFO, compare=False, + encoding="utf-8", safe_mode=None, use_file_vars=False) + opts, paths = parser.parse_args() + log.setLevel(opts.log_level) + + if opts.self_test: + return _test() + + if opts.extras: + extras = {} + for s in opts.extras: + splitter = re.compile("[,;: ]+") + for e in splitter.split(s): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + extras[ename] = earg + else: + extras = None + + if opts.link_patterns_file: + link_patterns = [] + f = open(opts.link_patterns_file) + try: + for i, line in enumerate(f.readlines()): + if not line.strip(): continue + if line.lstrip().startswith("#"): continue + try: + pat, href = line.rstrip().rsplit(None, 1) + except ValueError: + raise MarkdownError("%s:%d: invalid link pattern line: %r" + % (opts.link_patterns_file, i+1, line)) + link_patterns.append( + (_regex_from_encoded_pattern(pat), href)) + finally: + f.close() + else: + link_patterns = None + + from os.path import join, dirname, abspath, exists + markdown_pl = join(dirname(dirname(abspath(__file__))), "test", + "Markdown.pl") + for path in paths: + if opts.compare: + print "==== Markdown.pl ====" + perl_cmd = 'perl %s "%s"' % (markdown_pl, path) + o = os.popen(perl_cmd) + perl_html = o.read() + o.close() + sys.stdout.write(perl_html) + print "==== markdown2.py ====" + html = markdown_path(path, encoding=opts.encoding, + html4tags=opts.html4tags, + safe_mode=opts.safe_mode, + extras=extras, link_patterns=link_patterns, + use_file_vars=opts.use_file_vars) + sys.stdout.write( + html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + if opts.compare: + test_dir = join(dirname(dirname(abspath(__file__))), "test") + if exists(join(test_dir, "test_markdown2.py")): + sys.path.insert(0, test_dir) + from test_markdown2 import norm_html_from_html + norm_html = norm_html_from_html(html) + norm_perl_html = norm_html_from_html(perl_html) + else: + norm_html = html + norm_perl_html = perl_html + print "==== match? %r ====" % (norm_perl_html == norm_html) + + +if __name__ == "__main__": + sys.exit( main(sys.argv) ) + diff --git a/demos/appengine/static/blog.css b/demos/appengine/static/blog.css new file mode 100644 index 000000000..8902ec1f2 --- /dev/null +++ b/demos/appengine/static/blog.css @@ -0,0 +1,153 @@ +/* + * Copyright 2009 Facebook + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +body { + background: white; + color: black; + margin: 15px; + margin-top: 0; +} + +body, +input, +textarea { + font-family: Georgia, serif; + font-size: 12pt; +} + +table { + border-collapse: collapse; + border: 0; +} + +td { + border: 0; + padding: 0; +} + +h1, +h2, +h3, +h4 { + font-family: "Helvetica Nue", Helvetica, Arial, sans-serif; + margin: 0; +} + +h1 { + font-size: 20pt; +} + +pre, +code { + font-family: monospace; + color: #060; +} + +pre { + margin-left: 1em; + padding-left: 1em; + border-left: 1px solid silver; + line-height: 14pt; +} + +a, +a code { + color: #00c; +} + +#body { + max-width: 800px; + margin: auto; +} + +#header { + background-color: #3b5998; + padding: 5px; + padding-left: 10px; + padding-right: 10px; + margin-bottom: 1em; +} + +#header, +#header a { + color: white; +} + +#header h1 a { + text-decoration: none; +} + +#footer, +#content { + margin-left: 10px; + margin-right: 10px; +} + +#footer { + margin-top: 3em; +} + +.entry h1 a { + color: black; + text-decoration: none; +} + +.entry { + margin-bottom: 2em; +} + +.entry .date { + margin-top: 3px; +} + +.entry p { + margin: 0; + margin-bottom: 1em; +} + +.entry .body { + margin-top: 1em; + line-height: 16pt; +} + +.compose td { + vertical-align: middle; + padding-bottom: 5px; +} + +.compose td.field { + padding-right: 10px; +} + +.compose .title, +.compose .submit { + font-family: "Helvetica Nue", Helvetica, Arial, sans-serif; + font-weight: bold; +} + +.compose .title { + font-size: 20pt; +} + +.compose .title, +.compose .markdown { + width: 100%; +} + +.compose .markdown { + height: 500px; + line-height: 16pt; +} diff --git a/demos/appengine/templates/archive.html b/demos/appengine/templates/archive.html new file mode 100644 index 000000000..9f1699793 --- /dev/null +++ b/demos/appengine/templates/archive.html @@ -0,0 +1,31 @@ +{% extends "base.html" %} + +{% block head %} + <style type="text/css"> + ul.archive { + list-style-type: none; + margin: 0; + padding: 0; + } + + ul.archive li { + margin-bottom: 1em; + } + + ul.archive .title { + font-family: "Helvetica Nue", Helvetica, Arial, sans-serif; + font-size: 14pt; + } + </style> +{% end %} + +{% block body %} + <ul class="archive"> + {% for entry in entries %} + <li> + <div class="title"><a href="/entry/{{ entry.slug }}">{{ escape(entry.title) }}</a></div> + <div class="date">{{ locale.format_date(entry.published, full_format=True, shorter=True) }}</div> + </li> + {% end %} + </ul> +{% end %} diff --git a/demos/appengine/templates/base.html b/demos/appengine/templates/base.html new file mode 100644 index 000000000..15cbf5404 --- /dev/null +++ b/demos/appengine/templates/base.html @@ -0,0 +1,29 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> + <title>{{ escape(handler.settings["blog_title"]) }} + + + {% block head %}{% end %} + + +

+ +
{% block body %}{% end %}
+
+ {% block bottom %}{% end %} + + diff --git a/demos/appengine/templates/compose.html b/demos/appengine/templates/compose.html new file mode 100644 index 000000000..5ad548307 --- /dev/null +++ b/demos/appengine/templates/compose.html @@ -0,0 +1,42 @@ +{% extends "base.html" %} + +{% block body %} +
+
+
+
+
{{ _("Syntax documentation") }}
+ +  {{ _("Cancel") }} +
+ {% if entry %} + + {% end %} + {{ xsrf_form_html() }} + +{% end %} + +{% block bottom %} + + +{% end %} + diff --git a/demos/appengine/templates/entry.html b/demos/appengine/templates/entry.html new file mode 100644 index 000000000..43c835dea --- /dev/null +++ b/demos/appengine/templates/entry.html @@ -0,0 +1,5 @@ +{% extends "base.html" %} + +{% block body %} + {{ modules.Entry(entry) }} +{% end %} diff --git a/demos/appengine/templates/feed.xml b/demos/appengine/templates/feed.xml new file mode 100644 index 000000000..98a929802 --- /dev/null +++ b/demos/appengine/templates/feed.xml @@ -0,0 +1,26 @@ + + + {% set date_format = "%Y-%m-%dT%H:%M:%SZ" %} + {{ escape(handler.settings["blog_title"]) }} + {% if len(entries) > 0 %} + {{ max(e.updated for e in entries).strftime(date_format) }} + {% else %} + {{ datetime.datetime.utcnow().strftime(date_format) }} + {% end %} + http://{{ request.host }}/ + + + {{ escape(handler.settings["blog_title"]) }} + {% for entry in entries %} + + http://{{ request.host }}/entry/{{ entry.slug }} + {{ escape(entry.title) }} + + {{ entry.updated.strftime(date_format) }} + {{ entry.published.strftime(date_format) }} + +
{{ entry.html }}
+
+
+ {% end %} +
diff --git a/demos/appengine/templates/home.html b/demos/appengine/templates/home.html new file mode 100644 index 000000000..dd069a97f --- /dev/null +++ b/demos/appengine/templates/home.html @@ -0,0 +1,8 @@ +{% extends "base.html" %} + +{% block body %} + {% for entry in entries %} + {{ modules.Entry(entry) }} + {% end %} +
{{ _("Archive") }}
+{% end %} diff --git a/demos/appengine/templates/modules/entry.html b/demos/appengine/templates/modules/entry.html new file mode 100644 index 000000000..06237657c --- /dev/null +++ b/demos/appengine/templates/modules/entry.html @@ -0,0 +1,8 @@ +
+

{{ escape(entry.title) }}

+
{{ locale.format_date(entry.published, full_format=True, shorter=True) }}
+
{{ entry.html }}
+ {% if current_user and current_user.administrator %} + + {% end %} +
diff --git a/demos/auth/authdemo.py b/demos/auth/authdemo.py new file mode 100755 index 000000000..2a309f384 --- /dev/null +++ b/demos/auth/authdemo.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tornado.auth +import tornado.escape +import tornado.httpserver +import tornado.ioloop +import tornado.options +import tornado.web + +from tornado.options import define, options + +define("port", default=8888, help="run on the given port", type=int) + + +class Application(tornado.web.Application): + def __init__(self): + handlers = [ + (r"/", MainHandler), + (r"/auth/login", AuthHandler), + ] + settings = dict( + cookie_secret="32oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + login_url="/auth/login", + google_consumer_key="www.tornadoweb.org", + google_consumer_secret="ZcyJGvEEFn82+h9/PWgBeB0E", + ) + tornado.web.Application.__init__(self, handlers, **settings) + + +class BaseHandler(tornado.web.RequestHandler): + def get_current_user(self): + user_json = self.get_secure_cookie("user") + if not user_json: return None + return tornado.escape.json_decode(user_json) + + +class MainHandler(BaseHandler): + @tornado.web.authenticated + def get(self): + name = tornado.escape.xhtml_escape(self.current_user["name"]) + self.write("Hello, " + name) + + +class AuthHandler(BaseHandler, tornado.auth.GoogleMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("openid.mode", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authenticate_redirect() + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Google auth failed") + self.set_secure_cookie("user", tornado.escape.json_encode(user)) + self.redirect("/") + + +def main(): + tornado.options.parse_command_line() + http_server = tornado.httpserver.HTTPServer(Application()) + http_server.listen(options.port) + tornado.ioloop.IOLoop.instance().start() + + +if __name__ == "__main__": + main() diff --git a/demos/blog/README b/demos/blog/README new file mode 100644 index 000000000..a033e7a11 --- /dev/null +++ b/demos/blog/README @@ -0,0 +1,57 @@ +Running the Tornado Blog example app +==================================== +This demo is a simple blogging engine that uses MySQL to store posts and +Google Accounts for author authentication. Since it depends on MySQL, you +need to set up MySQL and the database schema for the demo to run. + +1. Install prerequisites and build tornado + + See http://www.tornadoweb.org/ for installation instructions. If you can + run the "helloworld" example application, your environment is set up + correctly. + +2. Install MySQL if needed + + Consult the documentation for your platform. Under Ubuntu Linux you + can run "apt-get install mysql". Under OS X you can download the + MySQL PKG file from http://dev.mysql.com/downloads/mysql/ + +3. Connect to MySQL and create a database and user for the blog. + + Connect to MySQL as a user that can create databases and users: + mysql -u root + + Create a database named "blog": + mysql> CREATE DATABASE blog; + + Allow the "blog" user to connect with the password "blog": + mysql> GRANT ALL PRIVILEGES ON blog.* TO 'blog'@'localhost' IDENTIFIED BY 'blog'; + +4. Create the tables in your new database. + + You can use the provided schema.sql file by running this command: + mysql --user=blog --password=blog --database=blog < schema.sql + + You can run the above command again later if you want to delete the + contents of the blog and start over after testing. + +5. Run the blog example + + With the default user, password, and database you can just run: + ./blog.py + + If you've changed anything, you can alter the default MySQL settings + with arguments on the command line, e.g.: + ./blog.py --mysql_user=casey --mysql_password=happiness --mysql_database=foodblog + +6. Visit your new blog + + Open http://localhost:8888/ in your web browser. You will be redirected to + a Google account sign-in page because the blog uses Google accounts for + authentication. + + Currently the first user to connect will automatically be given the + ability to create and edit posts. + + Once you've created one blog post, subsequent users will not be + prompted to sign in. diff --git a/demos/blog/blog.py b/demos/blog/blog.py new file mode 100755 index 000000000..808a9afc5 --- /dev/null +++ b/demos/blog/blog.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import markdown +import os.path +import re +import tornado.auth +import tornado.database +import tornado.httpserver +import tornado.ioloop +import tornado.options +import tornado.web +import unicodedata + +from tornado.options import define, options + +define("port", default=8888, help="run on the given port", type=int) +define("mysql_host", default="127.0.0.1:3306", help="blog database host") +define("mysql_database", default="blog", help="blog database name") +define("mysql_user", default="blog", help="blog database user") +define("mysql_password", default="blog", help="blog database password") + + +class Application(tornado.web.Application): + def __init__(self): + handlers = [ + (r"/", HomeHandler), + (r"/archive", ArchiveHandler), + (r"/feed", FeedHandler), + (r"/entry/([^/]+)", EntryHandler), + (r"/compose", ComposeHandler), + (r"/auth/login", AuthLoginHandler), + (r"/auth/logout", AuthLogoutHandler), + ] + settings = dict( + blog_title=u"Tornado Blog", + template_path=os.path.join(os.path.dirname(__file__), "templates"), + static_path=os.path.join(os.path.dirname(__file__), "static"), + ui_modules={"Entry": EntryModule}, + xsrf_cookies=True, + cookie_secret="11oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + login_url="/auth/login", + ) + tornado.web.Application.__init__(self, handlers, **settings) + + # Have one global connection to the blog DB across all handlers + self.db = tornado.database.Connection( + host=options.mysql_host, database=options.mysql_database, + user=options.mysql_user, password=options.mysql_password) + + +class BaseHandler(tornado.web.RequestHandler): + @property + def db(self): + return self.application.db + + def get_current_user(self): + user_id = self.get_secure_cookie("user") + if not user_id: return None + return self.db.get("SELECT * FROM authors WHERE id = %s", int(user_id)) + + +class HomeHandler(BaseHandler): + def get(self): + entries = self.db.query("SELECT * FROM entries ORDER BY published " + "DESC LIMIT 5") + if not entries: + self.redirect("/compose") + return + self.render("home.html", entries=entries) + + +class EntryHandler(BaseHandler): + def get(self, slug): + entry = self.db.get("SELECT * FROM entries WHERE slug = %s", slug) + if not entry: raise tornado.web.HTTPError(404) + self.render("entry.html", entry=entry) + + +class ArchiveHandler(BaseHandler): + def get(self): + entries = self.db.query("SELECT * FROM entries ORDER BY published " + "DESC") + self.render("archive.html", entries=entries) + + +class FeedHandler(BaseHandler): + def get(self): + entries = self.db.query("SELECT * FROM entries ORDER BY published " + "DESC LIMIT 10") + self.set_header("Content-Type", "application/atom+xml") + self.render("feed.xml", entries=entries) + + +class ComposeHandler(BaseHandler): + @tornado.web.authenticated + def get(self): + id = self.get_argument("id", None) + entry = None + if id: + entry = self.db.get("SELECT * FROM entries WHERE id = %s", int(id)) + self.render("compose.html", entry=entry) + + @tornado.web.authenticated + def post(self): + id = self.get_argument("id", None) + title = self.get_argument("title") + text = self.get_argument("markdown") + html = markdown.markdown(text) + if id: + entry = self.db.get("SELECT * FROM entries WHERE id = %s", int(id)) + if not entry: raise tornado.web.HTTPError(404) + slug = entry.slug + self.db.execute( + "UPDATE entries SET title = %s, markdown = %s, html = %s " + "WHERE id = %s", title, text, html, int(id)) + else: + slug = unicodedata.normalize("NFKD", title).encode( + "ascii", "ignore") + slug = re.sub(r"[^\w]+", " ", slug) + slug = "-".join(slug.lower().strip().split()) + if not slug: slug = "entry" + while True: + e = self.db.get("SELECT * FROM entries WHERE slug = %s", slug) + if not e: break + slug += "-2" + self.db.execute( + "INSERT INTO entries (author_id,title,slug,markdown,html," + "published) VALUES (%s,%s,%s,%s,%s,UTC_TIMESTAMP())", + self.current_user.id, title, slug, text, html) + self.redirect("/entry/" + slug) + + +class AuthLoginHandler(BaseHandler, tornado.auth.GoogleMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("openid.mode", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authenticate_redirect() + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Google auth failed") + author = self.db.get("SELECT * FROM authors WHERE email = %s", + user["email"]) + if not author: + # Auto-create first author + any_author = self.db.get("SELECT * FROM authors LIMIT 1") + if not any_author: + author_id = self.db.execute( + "INSERT INTO authors (email,name) VALUES (%s,%s)", + user["email"], user["name"]) + else: + self.redirect("/") + return + else: + author_id = author["id"] + self.set_secure_cookie("user", str(author_id)) + self.redirect(self.get_argument("next", "/")) + + +class AuthLogoutHandler(BaseHandler): + def get(self): + self.clear_cookie("user") + self.redirect(self.get_argument("next", "/")) + + +class EntryModule(tornado.web.UIModule): + def render(self, entry): + return self.render_string("modules/entry.html", entry=entry) + + +def main(): + tornado.options.parse_command_line() + http_server = tornado.httpserver.HTTPServer(Application()) + http_server.listen(options.port) + tornado.ioloop.IOLoop.instance().start() + + +if __name__ == "__main__": + main() diff --git a/demos/blog/markdown.py b/demos/blog/markdown.py new file mode 100644 index 000000000..59ba731bf --- /dev/null +++ b/demos/blog/markdown.py @@ -0,0 +1,1877 @@ +#!/usr/bin/env python +# Copyright (c) 2007-2008 ActiveState Corp. +# License: MIT (http://www.opensource.org/licenses/mit-license.php) + +r"""A fast and complete Python implementation of Markdown. + +[from http://daringfireball.net/projects/markdown/] +> Markdown is a text-to-HTML filter; it translates an easy-to-read / +> easy-to-write structured text format into HTML. Markdown's text +> format is most similar to that of plain text email, and supports +> features such as headers, *emphasis*, code blocks, blockquotes, and +> links. +> +> Markdown's syntax is designed not as a generic markup language, but +> specifically to serve as a front-end to (X)HTML. You can use span-level +> HTML tags anywhere in a Markdown document, and you can use block level +> HTML tags (like
and
as well). + +Module usage: + + >>> import markdown2 + >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` + u'

boo!

\n' + + >>> markdowner = Markdown() + >>> markdowner.convert("*boo!*") + u'

boo!

\n' + >>> markdowner.convert("**boom!**") + u'

boom!

\n' + +This implementation of Markdown implements the full "core" syntax plus a +number of extras (e.g., code syntax coloring, footnotes) as described on +. +""" + +cmdln_desc = """A fast and complete Python implementation of Markdown, a +text-to-HTML conversion tool for web writers. +""" + +# Dev Notes: +# - There is already a Python markdown processor +# (http://www.freewisdom.org/projects/python-markdown/). +# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm +# not yet sure if there implications with this. Compare 'pydoc sre' +# and 'perldoc perlre'. + +__version_info__ = (1, 0, 1, 14) # first three nums match Markdown.pl +__version__ = '1.0.1.14' +__author__ = "Trent Mick" + +import os +import sys +from pprint import pprint +import re +import logging +try: + from hashlib import md5 +except ImportError: + from md5 import md5 +import optparse +from random import random +import codecs + + + +#---- Python version compat + +if sys.version_info[:2] < (2,4): + from sets import Set as set + def reversed(sequence): + for i in sequence[::-1]: + yield i + def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): + return unicode(s, encoding, errors) +else: + def _unicode_decode(s, encoding, errors='strict'): + return s.decode(encoding, errors) + + +#---- globals + +DEBUG = False +log = logging.getLogger("markdown") + +DEFAULT_TAB_WIDTH = 4 + +# Table of hash values for escaped characters: +def _escape_hash(s): + # Lame attempt to avoid possible collision with someone actually + # using the MD5 hexdigest of one of these chars in there text. + # Other ideas: random.random(), uuid.uuid() + #return md5(s).hexdigest() # Markdown.pl effectively does this. + return 'md5-'+md5(s).hexdigest() +g_escape_table = dict([(ch, _escape_hash(ch)) + for ch in '\\`*_{}[]()>#+-.!']) + + + +#---- exceptions + +class MarkdownError(Exception): + pass + + + +#---- public api + +def markdown_path(path, encoding="utf-8", + html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + text = codecs.open(path, 'r', encoding).read() + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +class Markdown(object): + # The dict of "extras" to enable in processing -- a mapping of + # extra name to argument for the extra. Most extras do not have an + # argument, in which case the value is None. + # + # This can be set via (a) subclassing and (b) the constructor + # "extras" argument. + extras = None + + urls = None + titles = None + html_blocks = None + html_spans = None + html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py + + # Used to track when we're inside an ordered or unordered list + # (see _ProcessListItems() for details): + list_level = 0 + + _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) + + def __init__(self, html4tags=False, tab_width=4, safe_mode=None, + extras=None, link_patterns=None, use_file_vars=False): + if html4tags: + self.empty_element_suffix = ">" + else: + self.empty_element_suffix = " />" + self.tab_width = tab_width + + # For compatibility with earlier markdown2.py and with + # markdown.py's safe_mode being a boolean, + # safe_mode == True -> "replace" + if safe_mode is True: + self.safe_mode = "replace" + else: + self.safe_mode = safe_mode + + if self.extras is None: + self.extras = {} + elif not isinstance(self.extras, dict): + self.extras = dict([(e, None) for e in self.extras]) + if extras: + if not isinstance(extras, dict): + extras = dict([(e, None) for e in extras]) + self.extras.update(extras) + assert isinstance(self.extras, dict) + self._instance_extras = self.extras.copy() + self.link_patterns = link_patterns + self.use_file_vars = use_file_vars + self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) + + def reset(self): + self.urls = {} + self.titles = {} + self.html_blocks = {} + self.html_spans = {} + self.list_level = 0 + self.extras = self._instance_extras.copy() + if "footnotes" in self.extras: + self.footnotes = {} + self.footnote_ids = [] + + def convert(self, text): + """Convert the given text.""" + # Main function. The order in which other subs are called here is + # essential. Link and image substitutions need to happen before + # _EscapeSpecialChars(), so that any *'s or _'s in the + # and tags get encoded. + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + self.reset() + + if not isinstance(text, unicode): + #TODO: perhaps shouldn't presume UTF-8 for string input? + text = unicode(text, 'utf-8') + + if self.use_file_vars: + # Look for emacs-style file variable hints. + emacs_vars = self._get_emacs_vars(text) + if "markdown-extras" in emacs_vars: + splitter = re.compile("[ ,]+") + for e in splitter.split(emacs_vars["markdown-extras"]): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + self.extras[ename] = earg + + # Standardize line endings: + text = re.sub("\r\n|\r", "\n", text) + + # Make sure $text ends with a couple of newlines: + text += "\n\n" + + # Convert all tabs to spaces. + text = self._detab(text) + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + text = self._ws_only_line_re.sub("", text) + + if self.safe_mode: + text = self._hash_html_spans(text) + + # Turn block-level HTML blocks into hash entries + text = self._hash_html_blocks(text, raw=True) + + # Strip link definitions, store in hashes. + if "footnotes" in self.extras: + # Must do footnotes first because an unlucky footnote defn + # looks like a link defn: + # [^4]: this "looks like a link defn" + text = self._strip_footnote_definitions(text) + text = self._strip_link_definitions(text) + + text = self._run_block_gamut(text) + + if "footnotes" in self.extras: + text = self._add_footnotes(text) + + text = self._unescape_special_chars(text) + + if self.safe_mode: + text = self._unhash_html_spans(text) + + text += "\n" + return text + + _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) + # This regular expression is intended to match blocks like this: + # PREFIX Local Variables: SUFFIX + # PREFIX mode: Tcl SUFFIX + # PREFIX End: SUFFIX + # Some notes: + # - "[ \t]" is used instead of "\s" to specifically exclude newlines + # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does + # not like anything other than Unix-style line terminators. + _emacs_local_vars_pat = re.compile(r"""^ + (?P(?:[^\r\n|\n|\r])*?) + [\ \t]*Local\ Variables:[\ \t]* + (?P.*?)(?:\r\n|\n|\r) + (?P.*?\1End:) + """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) + + def _get_emacs_vars(self, text): + """Return a dictionary of emacs-style local variables. + + Parsing is done loosely according to this spec (and according to + some in-practice deviations from this): + http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables + """ + emacs_vars = {} + SIZE = pow(2, 13) # 8kB + + # Search near the start for a '-*-'-style one-liner of variables. + head = text[:SIZE] + if "-*-" in head: + match = self._emacs_oneliner_vars_pat.search(head) + if match: + emacs_vars_str = match.group(1) + assert '\n' not in emacs_vars_str + emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') + if s.strip()] + if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: + # While not in the spec, this form is allowed by emacs: + # -*- Tcl -*- + # where the implied "variable" is "mode". This form + # is only allowed if there are no other variables. + emacs_vars["mode"] = emacs_var_strs[0].strip() + else: + for emacs_var_str in emacs_var_strs: + try: + variable, value = emacs_var_str.strip().split(':', 1) + except ValueError: + log.debug("emacs variables error: malformed -*- " + "line: %r", emacs_var_str) + continue + # Lowercase the variable name because Emacs allows "Mode" + # or "mode" or "MoDe", etc. + emacs_vars[variable.lower()] = value.strip() + + tail = text[-SIZE:] + if "Local Variables" in tail: + match = self._emacs_local_vars_pat.search(tail) + if match: + prefix = match.group("prefix") + suffix = match.group("suffix") + lines = match.group("content").splitlines(0) + #print "prefix=%r, suffix=%r, content=%r, lines: %s"\ + # % (prefix, suffix, match.group("content"), lines) + + # Validate the Local Variables block: proper prefix and suffix + # usage. + for i, line in enumerate(lines): + if not line.startswith(prefix): + log.debug("emacs variables error: line '%s' " + "does not use proper prefix '%s'" + % (line, prefix)) + return {} + # Don't validate suffix on last line. Emacs doesn't care, + # neither should we. + if i != len(lines)-1 and not line.endswith(suffix): + log.debug("emacs variables error: line '%s' " + "does not use proper suffix '%s'" + % (line, suffix)) + return {} + + # Parse out one emacs var per line. + continued_for = None + for line in lines[:-1]: # no var on the last line ("PREFIX End:") + if prefix: line = line[len(prefix):] # strip prefix + if suffix: line = line[:-len(suffix)] # strip suffix + line = line.strip() + if continued_for: + variable = continued_for + if line.endswith('\\'): + line = line[:-1].rstrip() + else: + continued_for = None + emacs_vars[variable] += ' ' + line + else: + try: + variable, value = line.split(':', 1) + except ValueError: + log.debug("local variables error: missing colon " + "in local variables entry: '%s'" % line) + continue + # Do NOT lowercase the variable name, because Emacs only + # allows "mode" (and not "Mode", "MoDe", etc.) in this block. + value = value.strip() + if value.endswith('\\'): + value = value[:-1].rstrip() + continued_for = variable + else: + continued_for = None + emacs_vars[variable] = value + + # Unquote values. + for var, val in emacs_vars.items(): + if len(val) > 1 and (val.startswith('"') and val.endswith('"') + or val.startswith('"') and val.endswith('"')): + emacs_vars[var] = val[1:-1] + + return emacs_vars + + # Cribbed from a post by Bart Lateur: + # + _detab_re = re.compile(r'(.*?)\t', re.M) + def _detab_sub(self, match): + g1 = match.group(1) + return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) + def _detab(self, text): + r"""Remove (leading?) tabs from a file. + + >>> m = Markdown() + >>> m._detab("\tfoo") + ' foo' + >>> m._detab(" \tfoo") + ' foo' + >>> m._detab("\t foo") + ' foo' + >>> m._detab(" foo") + ' foo' + >>> m._detab(" foo\n\tbar\tblam") + ' foo\n bar blam' + """ + if '\t' not in text: + return text + return self._detab_re.subn(self._detab_sub, text)[0] + + _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _strict_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_a, + re.X | re.M) + + _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' + _liberal_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_b, + re.X | re.M) + + def _hash_html_block_sub(self, match, raw=False): + html = match.group(1) + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + return "\n\n" + key + "\n\n" + + def _hash_html_blocks(self, text, raw=False): + """Hashify HTML blocks + + We only want to do this for block-level HTML tags, such as headers, + lists, and tables. That's because we still want to wrap

s around + "paragraphs" that are wrapped in non-block-level tags, such as anchors, + phrase emphasis, and spans. The list of tags we're looking for is + hard-coded. + + @param raw {boolean} indicates if these are raw HTML blocks in + the original source. It makes a difference in "safe" mode. + """ + if '<' not in text: + return text + + # Pass `raw` value into our calls to self._hash_html_block_sub. + hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) + + # First, look for nested blocks, e.g.: + #

+ #
+ # tags for inner block must be indented. + #
+ #
+ # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `
` and stop at the first `
`. + text = self._strict_tag_block_re.sub(hash_html_block_sub, text) + + # Now match more liberally, simply from `\n` to `\n` + text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) + + # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + if "", start_idx) + 3 + except ValueError, ex: + break + + # Start position for next comment block search. + start = end_idx + + # Validate whitespace before comment. + if start_idx: + # - Up to `tab_width - 1` spaces before start_idx. + for i in range(self.tab_width - 1): + if text[start_idx - 1] != ' ': + break + start_idx -= 1 + if start_idx == 0: + break + # - Must be preceded by 2 newlines or hit the start of + # the document. + if start_idx == 0: + pass + elif start_idx == 1 and text[0] == '\n': + start_idx = 0 # to match minute detail of Markdown.pl regex + elif text[start_idx-2:start_idx] == '\n\n': + pass + else: + break + + # Validate whitespace after comment. + # - Any number of spaces and tabs. + while end_idx < len(text): + if text[end_idx] not in ' \t': + break + end_idx += 1 + # - Must be following by 2 newlines or hit end of text. + if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): + continue + + # Escape and hash (must match `_hash_html_block_sub`). + html = text[start_idx:end_idx] + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] + + if "xml" in self.extras: + # Treat XML processing instructions and namespaced one-liner + # tags as if they were block HTML tags. E.g., if standalone + # (i.e. are their own paragraph), the following do not get + # wrapped in a

tag: + # + # + # + _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width) + text = _xml_oneliner_re.sub(hash_html_block_sub, text) + + return text + + def _strip_link_definitions(self, text): + # Strips link definitions from text, stores the URLs and titles in + # hash references. + less_than_tab = self.tab_width - 1 + + # Link defs are in the form: + # [id]: url "optional title" + _link_def_re = re.compile(r""" + ^[ ]{0,%d}\[(.+)\]: # id = \1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = \2 + [ \t]* + (?: + \n? # maybe one newline + [ \t]* + (?<=\s) # lookbehind for whitespace + ['"(] + ([^\n]*) # title = \3 + ['")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + """ % less_than_tab, re.X | re.M | re.U) + return _link_def_re.sub(self._extract_link_def_sub, text) + + def _extract_link_def_sub(self, match): + id, url, title = match.groups() + key = id.lower() # Link IDs are case-insensitive + self.urls[key] = self._encode_amps_and_angles(url) + if title: + self.titles[key] = title.replace('"', '"') + return "" + + def _extract_footnote_def_sub(self, match): + id, text = match.groups() + text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() + normed_id = re.sub(r'\W', '-', id) + # Ensure footnote text ends with a couple newlines (for some + # block gamut matches). + self.footnotes[normed_id] = text + "\n\n" + return "" + + def _strip_footnote_definitions(self, text): + """A footnote definition looks like this: + + [^note-id]: Text of the note. + + May include one or more indented paragraphs. + + Where, + - The 'note-id' can be pretty much anything, though typically it + is the number of the footnote. + - The first paragraph may start on the next line, like so: + + [^note-id]: + Text of the note. + """ + less_than_tab = self.tab_width - 1 + footnote_def_re = re.compile(r''' + ^[ ]{0,%d}\[\^(.+)\]: # id = \1 + [ \t]* + ( # footnote text = \2 + # First line need not start with the spaces. + (?:\s*.*\n+) + (?: + (?:[ ]{%d} | \t) # Subsequent lines must be indented. + .*\n+ + )* + ) + # Lookahead for non-space at line-start, or end of doc. + (?:(?=^[ ]{0,%d}\S)|\Z) + ''' % (less_than_tab, self.tab_width, self.tab_width), + re.X | re.M) + return footnote_def_re.sub(self._extract_footnote_def_sub, text) + + + _hr_res = [ + re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), + ] + + def _run_block_gamut(self, text): + # These are all the transformations that form block-level + # tags like paragraphs, headers, and list items. + + text = self._do_headers(text) + + # Do Horizontal Rules: + hr = "\n tags around block-level tags. + text = self._hash_html_blocks(text) + + text = self._form_paragraphs(text) + + return text + + def _pyshell_block_sub(self, match): + lines = match.group(0).splitlines(0) + _dedentlines(lines) + indent = ' ' * self.tab_width + s = ('\n' # separate from possible cuddled paragraph + + indent + ('\n'+indent).join(lines) + + '\n\n') + return s + + def _prepare_pyshell_blocks(self, text): + """Ensure that Python interactive shell sessions are put in + code blocks -- even if not properly indented. + """ + if ">>>" not in text: + return text + + less_than_tab = self.tab_width - 1 + _pyshell_block_re = re.compile(r""" + ^([ ]{0,%d})>>>[ ].*\n # first line + ^(\1.*\S+.*\n)* # any number of subsequent lines + ^\n # ends with a blank line + """ % less_than_tab, re.M | re.X) + + return _pyshell_block_re.sub(self._pyshell_block_sub, text) + + def _run_span_gamut(self, text): + # These are all the transformations that occur *within* block-level + # tags like paragraphs, headers, and list items. + + text = self._do_code_spans(text) + + text = self._escape_special_chars(text) + + # Process anchor and image tags. + text = self._do_links(text) + + # Make links out of things like `` + # Must come after _do_links(), because you can use < and > + # delimiters in inline links like [this](). + text = self._do_auto_links(text) + + if "link-patterns" in self.extras: + text = self._do_link_patterns(text) + + text = self._encode_amps_and_angles(text) + + text = self._do_italics_and_bold(text) + + # Do hard breaks: + text = re.sub(r" {2,}\n", " + | + # auto-link (e.g., ) + <\w+[^>]*> + | + # comment + | + <\?.*?\?> # processing instruction + ) + """, re.X) + + def _escape_special_chars(self, text): + # Python markdown note: the HTML tokenization here differs from + # that in Markdown.pl, hence the behaviour for subtle cases can + # differ (I believe the tokenizer here does a better job because + # it isn't susceptible to unmatched '<' and '>' in HTML tags). + # Note, however, that '>' is not allowed in an auto-link URL + # here. + escaped = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup: + # Within tags/HTML-comments/auto-links, encode * and _ + # so they don't conflict with their use in Markdown for + # italics and strong. We're replacing each such + # character with its corresponding MD5 checksum value; + # this is likely overkill, but it should prevent us from + # colliding with the escape values by accident. + escaped.append(token.replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + else: + escaped.append(self._encode_backslash_escapes(token)) + is_html_markup = not is_html_markup + return ''.join(escaped) + + def _hash_html_spans(self, text): + # Used for safe_mode. + + def _is_auto_link(s): + if ':' in s and self._auto_link_re.match(s): + return True + elif '@' in s and self._auto_email_link_re.match(s): + return True + return False + + tokens = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup and not _is_auto_link(token): + sanitized = self._sanitize_html(token) + key = _hash_text(sanitized) + self.html_spans[key] = sanitized + tokens.append(key) + else: + tokens.append(token) + is_html_markup = not is_html_markup + return ''.join(tokens) + + def _unhash_html_spans(self, text): + for key, sanitized in self.html_spans.items(): + text = text.replace(key, sanitized) + return text + + def _sanitize_html(self, s): + if self.safe_mode == "replace": + return self.html_removed_text + elif self.safe_mode == "escape": + replacements = [ + ('&', '&'), + ('<', '<'), + ('>', '>'), + ] + for before, after in replacements: + s = s.replace(before, after) + return s + else: + raise MarkdownError("invalid value for 'safe_mode': %r (must be " + "'escape' or 'replace')" % self.safe_mode) + + _tail_of_inline_link_re = re.compile(r''' + # Match tail of: [text](/url/) or [text](/url/ "title") + \( # literal paren + [ \t]* + (?P # \1 + <.*?> + | + .*? + ) + [ \t]* + ( # \2 + (['"]) # quote char = \3 + (?P.*?) + \3 # matching quote + )? # title is optional + \) + ''', re.X | re.S) + _tail_of_reference_link_re = re.compile(r''' + # Match tail of: [text][id] + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[ + (?P<id>.*?) + \] + ''', re.X | re.S) + + def _do_links(self, text): + """Turn Markdown link shortcuts into XHTML <a> and <img> tags. + + This is a combination of Markdown.pl's _DoAnchors() and + _DoImages(). They are done together because that simplified the + approach. It was necessary to use a different approach than + Markdown.pl because of the lack of atomic matching support in + Python's regex engine used in $g_nested_brackets. + """ + MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24 + + # `anchor_allowed_pos` is used to support img links inside + # anchors, but not anchors inside anchors. An anchor's start + # pos must be `>= anchor_allowed_pos`. + anchor_allowed_pos = 0 + + curr_pos = 0 + while True: # Handle the next link. + # The next '[' is the start of: + # - an inline anchor: [text](url "title") + # - a reference anchor: [text][id] + # - an inline img: ![text](url "title") + # - a reference img: ![text][id] + # - a footnote ref: [^id] + # (Only if 'footnotes' extra enabled) + # - a footnote defn: [^id]: ... + # (Only if 'footnotes' extra enabled) These have already + # been stripped in _strip_footnote_definitions() so no + # need to watch for them. + # - a link definition: [id]: url "title" + # These have already been stripped in + # _strip_link_definitions() so no need to watch for them. + # - not markup: [...anything else... + try: + start_idx = text.index('[', curr_pos) + except ValueError: + break + text_length = len(text) + + # Find the matching closing ']'. + # Markdown.pl allows *matching* brackets in link text so we + # will here too. Markdown.pl *doesn't* currently allow + # matching brackets in img alt text -- we'll differ in that + # regard. + bracket_depth = 0 + for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, + text_length)): + ch = text[p] + if ch == ']': + bracket_depth -= 1 + if bracket_depth < 0: + break + elif ch == '[': + bracket_depth += 1 + else: + # Closing bracket not found within sentinel length. + # This isn't markup. + curr_pos = start_idx + 1 + continue + link_text = text[start_idx+1:p] + + # Possibly a footnote ref? + if "footnotes" in self.extras and link_text.startswith("^"): + normed_id = re.sub(r'\W', '-', link_text[1:]) + if normed_id in self.footnotes: + self.footnote_ids.append(normed_id) + result = '<sup class="footnote-ref" id="fnref-%s">' \ + '<a href="#fn-%s">%s</a></sup>' \ + % (normed_id, normed_id, len(self.footnote_ids)) + text = text[:start_idx] + result + text[p+1:] + else: + # This id isn't defined, leave the markup alone. + curr_pos = p+1 + continue + + # Now determine what this is by the remainder. + p += 1 + if p == text_length: + return text + + # Inline anchor or img? + if text[p] == '(': # attempt at perf improvement + match = self._tail_of_inline_link_re.match(text, p) + if match: + # Handle an inline anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + + url, title = match.group("url"), match.group("title") + if url and url[0] == '<': + url = url[1:-1] # '<url>' -> 'url' + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + if title: + title_str = ' title="%s"' \ + % title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) \ + .replace('"', '"') + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + continue + + # Reference anchor or img? + else: + match = self._tail_of_reference_link_re.match(text, p) + if match: + # Handle a reference-style anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + link_id = match.group("id").lower() + if not link_id: + link_id = link_text.lower() # for links like [this][] + if link_id in self.urls: + url = self.urls[link_id] + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title = self.titles.get(link_id) + if title: + title = title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title_str = ' title="%s"' % title + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result = '<a href="%s"%s>%s</a>' \ + % (url, title_str, link_text) + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + else: + # This id isn't defined, leave the markup alone. + curr_pos = match.end() + continue + + # Otherwise, it isn't markup. + curr_pos = start_idx + 1 + + return text + + + _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) + def _setext_h_sub(self, match): + n = {"=": 1, "-": 2}[match.group(2)[0]] + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(1)), n) + + _atx_h_re = re.compile(r''' + ^(\#{1,6}) # \1 = string of #'s + [ \t]* + (.+?) # \2 = Header text + [ \t]* + (?<!\\) # ensure not an escaped trailing '#' + \#* # optional closing #'s (not counted) + \n+ + ''', re.X | re.M) + def _atx_h_sub(self, match): + n = len(match.group(1)) + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(2)), n) + + def _do_headers(self, text): + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + text = self._setext_h_re.sub(self._setext_h_sub, text) + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + text = self._atx_h_re.sub(self._atx_h_sub, text) + + return text + + + _marker_ul_chars = '*+-' + _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars + _marker_ul = '(?:[%s])' % _marker_ul_chars + _marker_ol = r'(?:\d+\.)' + + def _list_sub(self, match): + lst = match.group(1) + lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" + result = self._process_list_items(lst) + if self.list_level: + return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) + else: + return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) + + def _do_lists(self, text): + # Form HTML ordered (numbered) and unordered (bulleted) lists. + + for marker_pat in (self._marker_ul, self._marker_ol): + # Re-usable pattern to match any entire ul or ol list: + less_than_tab = self.tab_width - 1 + whole_list = r''' + ( # \1 = whole list + ( # \2 + [ ]{0,%d} + (%s) # \3 = first list item marker + [ \t]+ + ) + (?:.+?) + ( # \4 + \Z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + %s[ \t]+ + ) + ) + ) + ''' % (less_than_tab, marker_pat, marker_pat) + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _process_list_items(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if self.list_level: + sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) + text = sub_list_re.sub(self._list_sub, text) + else: + list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, + re.X | re.M | re.S) + text = list_re.sub(self._list_sub, text) + + return text + + _list_item_re = re.compile(r''' + (\n)? # leading line = \1 + (^[ \t]*) # leading whitespace = \2 + (%s) [ \t]+ # list marker = \3 + ((?:.+?) # list item text = \4 + (\n{1,2})) # eols = \5 + (?= \n* (\Z | \2 (%s) [ \t]+)) + ''' % (_marker_any, _marker_any), + re.M | re.X | re.S) + + _last_li_endswith_two_eols = False + def _list_item_sub(self, match): + item = match.group(4) + leading_line = match.group(1) + leading_space = match.group(2) + if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: + item = self._run_block_gamut(self._outdent(item)) + else: + # Recursion for sub-lists: + item = self._do_lists(self._outdent(item)) + if item.endswith('\n'): + item = item[:-1] + item = self._run_span_gamut(item) + self._last_li_endswith_two_eols = (len(match.group(5)) == 2) + return "<li>%s</li>\n" % item + + def _process_list_items(self, list_str): + # Process the contents of a single ordered or unordered list, + # splitting it into individual list items. + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + self.list_level += 1 + self._last_li_endswith_two_eols = False + list_str = list_str.rstrip('\n') + '\n' + list_str = self._list_item_re.sub(self._list_item_sub, list_str) + self.list_level -= 1 + return list_str + + def _get_pygments_lexer(self, lexer_name): + try: + from pygments import lexers, util + except ImportError: + return None + try: + return lexers.get_lexer_by_name(lexer_name) + except util.ClassNotFound: + return None + + def _color_with_pygments(self, codeblock, lexer, **formatter_opts): + import pygments + import pygments.formatters + + class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): + def _wrap_code(self, inner): + """A function for use in a Pygments Formatter which + wraps in <code> tags. + """ + yield 0, "<code>" + for tup in inner: + yield tup + yield 0, "</code>" + + def wrap(self, source, outfile): + """Return the source with a code, pre, and div.""" + return self._wrap_div(self._wrap_pre(self._wrap_code(source))) + + formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) + return pygments.highlight(codeblock, lexer, formatter) + + def _code_block_sub(self, match): + codeblock = match.group(1) + codeblock = self._outdent(codeblock) + codeblock = self._detab(codeblock) + codeblock = codeblock.lstrip('\n') # trim leading newlines + codeblock = codeblock.rstrip() # trim trailing whitespace + + if "code-color" in self.extras and codeblock.startswith(":::"): + lexer_name, rest = codeblock.split('\n', 1) + lexer_name = lexer_name[3:].strip() + lexer = self._get_pygments_lexer(lexer_name) + codeblock = rest.lstrip("\n") # Remove lexer declaration line. + if lexer: + formatter_opts = self.extras['code-color'] or {} + colored = self._color_with_pygments(codeblock, lexer, + **formatter_opts) + return "\n\n%s\n\n" % colored + + codeblock = self._encode_code(codeblock) + return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock + + def _do_code_blocks(self, text): + """Process Markdown `<pre><code>` blocks.""" + code_block_re = re.compile(r''' + (?:\n\n|\A) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?: + (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + ''' % (self.tab_width, self.tab_width), + re.M | re.X) + + return code_block_re.sub(self._code_block_sub, text) + + + # Rules for a code span: + # - backslash escapes are not interpreted in a code span + # - to include one or or a run of more backticks the delimiters must + # be a longer run of backticks + # - cannot start or end a code span with a backtick; pad with a + # space and that space will be removed in the emitted HTML + # See `test/tm-cases/escapes.text` for a number of edge-case + # examples. + _code_span_re = re.compile(r''' + (?<!\\) + (`+) # \1 = Opening run of ` + (?!`) # See Note A test/tm-cases/escapes.text + (.+?) # \2 = The code block + (?<!`) + \1 # Matching closer + (?!`) + ''', re.X | re.S) + + def _code_span_sub(self, match): + c = match.group(2).strip(" \t") + c = self._encode_code(c) + return "<code>%s</code>" % c + + def _do_code_spans(self, text): + # * Backtick quotes are used for <code></code> spans. + # + # * You can use multiple backticks as the delimiters if you want to + # include literal backticks in the code span. So, this input: + # + # Just type ``foo `bar` baz`` at the prompt. + # + # Will translate to: + # + # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> + # + # There's no arbitrary limit to the number of backticks you + # can use as delimters. If you need three consecutive backticks + # in your code, use four for delimiters, etc. + # + # * You can use spaces to get literal backticks at the edges: + # + # ... type `` `bar` `` ... + # + # Turns to: + # + # ... type <code>`bar`</code> ... + return self._code_span_re.sub(self._code_span_sub, text) + + def _encode_code(self, text): + """Encode/escape certain characters inside Markdown code runs. + The point is that in code, these characters are literals, + and lose their special Markdown meanings. + """ + replacements = [ + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + ('&', '&'), + # Do the angle bracket song and dance: + ('<', '<'), + ('>', '>'), + # Now, escape characters that are magic in Markdown: + ('*', g_escape_table['*']), + ('_', g_escape_table['_']), + ('{', g_escape_table['{']), + ('}', g_escape_table['}']), + ('[', g_escape_table['[']), + (']', g_escape_table[']']), + ('\\', g_escape_table['\\']), + ] + for before, after in replacements: + text = text.replace(before, after) + return text + + _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) + _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) + _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) + _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) + def _do_italics_and_bold(self, text): + # <strong> must go first: + if "code-friendly" in self.extras: + text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) + text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) + else: + text = self._strong_re.sub(r"<strong>\2</strong>", text) + text = self._em_re.sub(r"<em>\2</em>", text) + return text + + + _block_quote_re = re.compile(r''' + ( # Wrap whole match in \1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + ''', re.M | re.X) + _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); + + _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) + def _dedent_two_spaces_sub(self, match): + return re.sub(r'(?m)^ ', '', match.group(1)) + + def _block_quote_sub(self, match): + bq = match.group(1) + bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting + bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines + bq = self._run_block_gamut(bq) # recurse + + bq = re.sub('(?m)^', ' ', bq) + # These leading spaces screw with <pre> content, so we need to fix that: + bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) + + return "<blockquote>\n%s\n</blockquote>\n\n" % bq + + def _do_block_quotes(self, text): + if '>' not in text: + return text + return self._block_quote_re.sub(self._block_quote_sub, text) + + def _form_paragraphs(self, text): + # Strip leading and trailing lines: + text = text.strip('\n') + + # Wrap <p> tags. + grafs = re.split(r"\n{2,}", text) + for i, graf in enumerate(grafs): + if graf in self.html_blocks: + # Unhashify HTML blocks + grafs[i] = self.html_blocks[graf] + else: + # Wrap <p> tags. + graf = self._run_span_gamut(graf) + grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>" + + return "\n\n".join(grafs) + + def _add_footnotes(self, text): + if self.footnotes: + footer = [ + '<div class="footnotes">', + '<hr' + self.empty_element_suffix, + '<ol>', + ] + for i, id in enumerate(self.footnote_ids): + if i != 0: + footer.append('') + footer.append('<li id="fn-%s">' % id) + footer.append(self._run_block_gamut(self.footnotes[id])) + backlink = ('<a href="#fnref-%s" ' + 'class="footnoteBackLink" ' + 'title="Jump back to footnote %d in the text.">' + '↩</a>' % (id, i+1)) + if footer[-1].endswith("</p>"): + footer[-1] = footer[-1][:-len("</p>")] \ + + ' ' + backlink + "</p>" + else: + footer.append("\n<p>%s</p>" % backlink) + footer.append('</li>') + footer.append('</ol>') + footer.append('</div>') + return text + '\n\n' + '\n'.join(footer) + else: + return text + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') + _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) + _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) + + def _encode_amps_and_angles(self, text): + # Smart processing for ampersands and angle brackets that need + # to be encoded. + text = self._ampersand_re.sub('&', text) + + # Encode naked <'s + text = self._naked_lt_re.sub('<', text) + + # Encode naked >'s + # Note: Other markdown implementations (e.g. Markdown.pl, PHP + # Markdown) don't do this. + text = self._naked_gt_re.sub('>', text) + return text + + def _encode_backslash_escapes(self, text): + for ch, escape in g_escape_table.items(): + text = text.replace("\\"+ch, escape) + return text + + _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) + def _auto_link_sub(self, match): + g1 = match.group(1) + return '<a href="%s">%s</a>' % (g1, g1) + + _auto_email_link_re = re.compile(r""" + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-\w]+(\.[-\w]+)*\.[a-z]+ + ) + > + """, re.I | re.X | re.U) + def _auto_email_link_sub(self, match): + return self._encode_email_address( + self._unescape_special_chars(match.group(1))) + + def _do_auto_links(self, text): + text = self._auto_link_re.sub(self._auto_link_sub, text) + text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) + return text + + def _encode_email_address(self, addr): + # Input: an email address, e.g. "foo@example.com" + # + # Output: the email address as a mailto link, with each character + # of the address encoded as either a decimal or hex entity, in + # the hopes of foiling most address harvesting spam bots. E.g.: + # + # <a href="mailto:foo@e + # xample.com">foo + # @example.com</a> + # + # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk + # mailing list: <http://tinyurl.com/yu7ue> + chars = [_xml_encode_email_char_at_random(ch) + for ch in "mailto:" + addr] + # Strip the mailto: from the visible part. + addr = '<a href="%s">%s</a>' \ + % (''.join(chars), ''.join(chars[7:])) + return addr + + def _do_link_patterns(self, text): + """Caveat emptor: there isn't much guarding against link + patterns being formed inside other standard Markdown links, e.g. + inside a [link def][like this]. + + Dev Notes: *Could* consider prefixing regexes with a negative + lookbehind assertion to attempt to guard against this. + """ + link_from_hash = {} + for regex, repl in self.link_patterns: + replacements = [] + for match in regex.finditer(text): + if hasattr(repl, "__call__"): + href = repl(match) + else: + href = match.expand(repl) + replacements.append((match.span(), href)) + for (start, end), href in reversed(replacements): + escaped_href = ( + href.replace('"', '"') # b/c of attr quote + # To avoid markdown <em> and <strong>: + .replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) + hash = md5(link).hexdigest() + link_from_hash[hash] = link + text = text[:start] + hash + text[end:] + for hash, link in link_from_hash.items(): + text = text.replace(hash, link) + return text + + def _unescape_special_chars(self, text): + # Swap back in all the special characters we've hidden. + for ch, hash in g_escape_table.items(): + text = text.replace(hash, ch) + return text + + def _outdent(self, text): + # Remove one level of line-leading tabs or spaces + return self._outdent_re.sub('', text) + + +class MarkdownWithExtras(Markdown): + """A markdowner class that enables most extras: + + - footnotes + - code-color (only has effect if 'pygments' Python module on path) + + These are not included: + - pyshell (specific to Python-related documenting) + - code-friendly (because it *disables* part of the syntax) + - link-patterns (because you need to specify some actual + link-patterns anyway) + """ + extras = ["footnotes", "code-color"] + + +#---- internal support functions + +# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 +def _curry(*args, **kwargs): + function, args = args[0], args[1:] + def result(*rest, **kwrest): + combined = kwargs.copy() + combined.update(kwrest) + return function(*args + rest, **combined) + return result + +# Recipe: regex_from_encoded_pattern (1.0) +def _regex_from_encoded_pattern(s): + """'foo' -> re.compile(re.escape('foo')) + '/foo/' -> re.compile('foo') + '/foo/i' -> re.compile('foo', re.I) + """ + if s.startswith('/') and s.rfind('/') != 0: + # Parse it: /PATTERN/FLAGS + idx = s.rfind('/') + pattern, flags_str = s[1:idx], s[idx+1:] + flag_from_char = { + "i": re.IGNORECASE, + "l": re.LOCALE, + "s": re.DOTALL, + "m": re.MULTILINE, + "u": re.UNICODE, + } + flags = 0 + for char in flags_str: + try: + flags |= flag_from_char[char] + except KeyError: + raise ValueError("unsupported regex flag: '%s' in '%s' " + "(must be one of '%s')" + % (char, s, ''.join(flag_from_char.keys()))) + return re.compile(s[1:idx], flags) + else: # not an encoded regex + return re.compile(re.escape(s)) + +# Recipe: dedent (0.1.2) +def _dedentlines(lines, tabsize=8, skip_first_line=False): + """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines + + "lines" is a list of lines to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + Same as dedent() except operates on a sequence of lines. Note: the + lines list is modified **in-place**. + """ + DEBUG = False + if DEBUG: + print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ + % (tabsize, skip_first_line) + indents = [] + margin = None + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + indent = 0 + for ch in line: + if ch == ' ': + indent += 1 + elif ch == '\t': + indent += tabsize - (indent % tabsize) + elif ch in '\r\n': + continue # skip all-whitespace lines + else: + break + else: + continue # skip all-whitespace lines + if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + if margin is None: + margin = indent + else: + margin = min(margin, indent) + if DEBUG: print "dedent: margin=%r" % margin + + if margin is not None and margin > 0: + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + removed = 0 + for j, ch in enumerate(line): + if ch == ' ': + removed += 1 + elif ch == '\t': + removed += tabsize - (removed % tabsize) + elif ch in '\r\n': + if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + lines[i] = lines[i][j:] + break + else: + raise ValueError("unexpected non-whitespace char %r in " + "line %r while removing %d-space margin" + % (ch, line, margin)) + if DEBUG: + print "dedent: %r: %r -> removed %d/%d"\ + % (line, ch, removed, margin) + if removed == margin: + lines[i] = lines[i][j+1:] + break + elif removed > margin: + lines[i] = ' '*(removed-margin) + lines[i][j+1:] + break + else: + if removed: + lines[i] = lines[i][removed:] + return lines + +def _dedent(text, tabsize=8, skip_first_line=False): + """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text + + "text" is the text to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + textwrap.dedent(s), but don't expand tabs to spaces + """ + lines = text.splitlines(1) + _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) + return ''.join(lines) + + +class _memoized(object): + """Decorator that caches a function's return value each time it is called. + If called later with the same arguments, the cached value is returned, and + not re-evaluated. + + http://wiki.python.org/moin/PythonDecoratorLibrary + """ + def __init__(self, func): + self.func = func + self.cache = {} + def __call__(self, *args): + try: + return self.cache[args] + except KeyError: + self.cache[args] = value = self.func(*args) + return value + except TypeError: + # uncachable -- for instance, passing a list as an argument. + # Better to not cache than to blow up entirely. + return self.func(*args) + def __repr__(self): + """Return the function's docstring.""" + return self.func.__doc__ + + +def _xml_oneliner_re_from_tab_width(tab_width): + """Standalone XML processing instruction regex.""" + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,%d} + (?: + <\?\w+\b\s+.*?\?> # XML processing instruction + | + <\w+:\w+\b\s+.*?/> # namespaced single tag + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) + +def _hr_tag_re_from_tab_width(tab_width): + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in \1 + [ ]{0,%d} + <(hr) # start tag = \2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) + + +def _xml_encode_email_char_at_random(ch): + r = random() + # Roughly 10% raw, 45% hex, 45% dec. + # '@' *must* be encoded. I [John Gruber] insist. + # Issue 26: '_' must be encoded. + if r > 0.9 and ch not in "@_": + return ch + elif r < 0.45: + # The [1:] is to drop leading '0': 0x63 -> x63 + return '&#%s;' % hex(ord(ch))[1:] + else: + return '&#%s;' % ord(ch) + +def _hash_text(text): + return 'md5:'+md5(text.encode("utf-8")).hexdigest() + + +#---- mainline + +class _NoReflowFormatter(optparse.IndentedHelpFormatter): + """An optparse formatter that does NOT reflow the description.""" + def format_description(self, description): + return description or "" + +def _test(): + import doctest + doctest.testmod() + +def main(argv=None): + if argv is None: + argv = sys.argv + if not logging.root.handlers: + logging.basicConfig() + + usage = "usage: %prog [PATHS...]" + version = "%prog "+__version__ + parser = optparse.OptionParser(prog="markdown2", usage=usage, + version=version, description=cmdln_desc, + formatter=_NoReflowFormatter()) + parser.add_option("-v", "--verbose", dest="log_level", + action="store_const", const=logging.DEBUG, + help="more verbose output") + parser.add_option("--encoding", + help="specify encoding of text content") + parser.add_option("--html4tags", action="store_true", default=False, + help="use HTML 4 style for empty element tags") + parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", + help="sanitize literal HTML: 'escape' escapes " + "HTML meta chars, 'replace' replaces with an " + "[HTML_REMOVED] note") + parser.add_option("-x", "--extras", action="append", + help="Turn on specific extra features (not part of " + "the core Markdown spec). Supported values: " + "'code-friendly' disables _/__ for emphasis; " + "'code-color' adds code-block syntax coloring; " + "'link-patterns' adds auto-linking based on patterns; " + "'footnotes' adds the footnotes syntax;" + "'xml' passes one-liner processing instructions and namespaced XML tags;" + "'pyshell' to put unindented Python interactive shell sessions in a <code> block.") + parser.add_option("--use-file-vars", + help="Look for and use Emacs-style 'markdown-extras' " + "file var to turn on extras. See " + "<http://code.google.com/p/python-markdown2/wiki/Extras>.") + parser.add_option("--link-patterns-file", + help="path to a link pattern file") + parser.add_option("--self-test", action="store_true", + help="run internal self-tests (some doctests)") + parser.add_option("--compare", action="store_true", + help="run against Markdown.pl as well (for testing)") + parser.set_defaults(log_level=logging.INFO, compare=False, + encoding="utf-8", safe_mode=None, use_file_vars=False) + opts, paths = parser.parse_args() + log.setLevel(opts.log_level) + + if opts.self_test: + return _test() + + if opts.extras: + extras = {} + for s in opts.extras: + splitter = re.compile("[,;: ]+") + for e in splitter.split(s): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + extras[ename] = earg + else: + extras = None + + if opts.link_patterns_file: + link_patterns = [] + f = open(opts.link_patterns_file) + try: + for i, line in enumerate(f.readlines()): + if not line.strip(): continue + if line.lstrip().startswith("#"): continue + try: + pat, href = line.rstrip().rsplit(None, 1) + except ValueError: + raise MarkdownError("%s:%d: invalid link pattern line: %r" + % (opts.link_patterns_file, i+1, line)) + link_patterns.append( + (_regex_from_encoded_pattern(pat), href)) + finally: + f.close() + else: + link_patterns = None + + from os.path import join, dirname, abspath, exists + markdown_pl = join(dirname(dirname(abspath(__file__))), "test", + "Markdown.pl") + for path in paths: + if opts.compare: + print "==== Markdown.pl ====" + perl_cmd = 'perl %s "%s"' % (markdown_pl, path) + o = os.popen(perl_cmd) + perl_html = o.read() + o.close() + sys.stdout.write(perl_html) + print "==== markdown2.py ====" + html = markdown_path(path, encoding=opts.encoding, + html4tags=opts.html4tags, + safe_mode=opts.safe_mode, + extras=extras, link_patterns=link_patterns, + use_file_vars=opts.use_file_vars) + sys.stdout.write( + html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + if opts.compare: + test_dir = join(dirname(dirname(abspath(__file__))), "test") + if exists(join(test_dir, "test_markdown2.py")): + sys.path.insert(0, test_dir) + from test_markdown2 import norm_html_from_html + norm_html = norm_html_from_html(html) + norm_perl_html = norm_html_from_html(perl_html) + else: + norm_html = html + norm_perl_html = perl_html + print "==== match? %r ====" % (norm_perl_html == norm_html) + + +if __name__ == "__main__": + sys.exit( main(sys.argv) ) + diff --git a/demos/blog/schema.sql b/demos/blog/schema.sql new file mode 100644 index 000000000..86bff9a8a --- /dev/null +++ b/demos/blog/schema.sql @@ -0,0 +1,44 @@ +-- Copyright 2009 FriendFeed +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); you may +-- not use this file except in compliance with the License. You may obtain +-- a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +-- WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +-- License for the specific language governing permissions and limitations +-- under the License. + +-- To create the database: +-- CREATE DATABASE blog; +-- GRANT ALL PRIVILEGES ON blog.* TO 'blog'@'localhost' IDENTIFIED BY 'blog'; +-- +-- To reload the tables: +-- mysql --user=blog --password=blog --database=blog < schema.sql + +SET SESSION storage_engine = "InnoDB"; +SET SESSION time_zone = "+0:00"; +ALTER DATABASE CHARACTER SET "utf8"; + +DROP TABLE IF EXISTS entries; +CREATE TABLE entries ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + author_id INT NOT NULL REFERENCES authors(id), + slug VARCHAR(100) NOT NULL UNIQUE, + title VARCHAR(512) NOT NULL, + markdown MEDIUMTEXT NOT NULL, + html MEDIUMTEXT NOT NULL, + published DATETIME NOT NULL, + updated TIMESTAMP NOT NULL, + KEY (published) +); + +DROP TABLE IF EXISTS authors; +CREATE TABLE authors ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + email VARCHAR(100) NOT NULL UNIQUE, + name VARCHAR(100) NOT NULL +); diff --git a/demos/blog/static/blog.css b/demos/blog/static/blog.css new file mode 100644 index 000000000..8902ec1f2 --- /dev/null +++ b/demos/blog/static/blog.css @@ -0,0 +1,153 @@ +/* + * Copyright 2009 Facebook + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +body { + background: white; + color: black; + margin: 15px; + margin-top: 0; +} + +body, +input, +textarea { + font-family: Georgia, serif; + font-size: 12pt; +} + +table { + border-collapse: collapse; + border: 0; +} + +td { + border: 0; + padding: 0; +} + +h1, +h2, +h3, +h4 { + font-family: "Helvetica Nue", Helvetica, Arial, sans-serif; + margin: 0; +} + +h1 { + font-size: 20pt; +} + +pre, +code { + font-family: monospace; + color: #060; +} + +pre { + margin-left: 1em; + padding-left: 1em; + border-left: 1px solid silver; + line-height: 14pt; +} + +a, +a code { + color: #00c; +} + +#body { + max-width: 800px; + margin: auto; +} + +#header { + background-color: #3b5998; + padding: 5px; + padding-left: 10px; + padding-right: 10px; + margin-bottom: 1em; +} + +#header, +#header a { + color: white; +} + +#header h1 a { + text-decoration: none; +} + +#footer, +#content { + margin-left: 10px; + margin-right: 10px; +} + +#footer { + margin-top: 3em; +} + +.entry h1 a { + color: black; + text-decoration: none; +} + +.entry { + margin-bottom: 2em; +} + +.entry .date { + margin-top: 3px; +} + +.entry p { + margin: 0; + margin-bottom: 1em; +} + +.entry .body { + margin-top: 1em; + line-height: 16pt; +} + +.compose td { + vertical-align: middle; + padding-bottom: 5px; +} + +.compose td.field { + padding-right: 10px; +} + +.compose .title, +.compose .submit { + font-family: "Helvetica Nue", Helvetica, Arial, sans-serif; + font-weight: bold; +} + +.compose .title { + font-size: 20pt; +} + +.compose .title, +.compose .markdown { + width: 100%; +} + +.compose .markdown { + height: 500px; + line-height: 16pt; +} diff --git a/demos/blog/templates/archive.html b/demos/blog/templates/archive.html new file mode 100644 index 000000000..9f1699793 --- /dev/null +++ b/demos/blog/templates/archive.html @@ -0,0 +1,31 @@ +{% extends "base.html" %} + +{% block head %} + <style type="text/css"> + ul.archive { + list-style-type: none; + margin: 0; + padding: 0; + } + + ul.archive li { + margin-bottom: 1em; + } + + ul.archive .title { + font-family: "Helvetica Nue", Helvetica, Arial, sans-serif; + font-size: 14pt; + } + </style> +{% end %} + +{% block body %} + <ul class="archive"> + {% for entry in entries %} + <li> + <div class="title"><a href="/entry/{{ entry.slug }}">{{ escape(entry.title) }}</a></div> + <div class="date">{{ locale.format_date(entry.published, full_format=True, shorter=True) }}</div> + </li> + {% end %} + </ul> +{% end %} diff --git a/demos/blog/templates/base.html b/demos/blog/templates/base.html new file mode 100644 index 000000000..4152037c0 --- /dev/null +++ b/demos/blog/templates/base.html @@ -0,0 +1,27 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> + <title>{{ escape(handler.settings["blog_title"]) }} + + + {% block head %}{% end %} + + +

+ +
{% block body %}{% end %}
+
+ {% block bottom %}{% end %} + + diff --git a/demos/blog/templates/compose.html b/demos/blog/templates/compose.html new file mode 100644 index 000000000..bc054b334 --- /dev/null +++ b/demos/blog/templates/compose.html @@ -0,0 +1,42 @@ +{% extends "base.html" %} + +{% block body %} +
+
+
+
+
{{ _("Syntax documentation") }}
+ +  {{ _("Cancel") }} +
+ {% if entry %} + + {% end %} + {{ xsrf_form_html() }} + +{% end %} + +{% block bottom %} + + +{% end %} + diff --git a/demos/blog/templates/entry.html b/demos/blog/templates/entry.html new file mode 100644 index 000000000..43c835dea --- /dev/null +++ b/demos/blog/templates/entry.html @@ -0,0 +1,5 @@ +{% extends "base.html" %} + +{% block body %} + {{ modules.Entry(entry) }} +{% end %} diff --git a/demos/blog/templates/feed.xml b/demos/blog/templates/feed.xml new file mode 100644 index 000000000..98a929802 --- /dev/null +++ b/demos/blog/templates/feed.xml @@ -0,0 +1,26 @@ + + + {% set date_format = "%Y-%m-%dT%H:%M:%SZ" %} + {{ escape(handler.settings["blog_title"]) }} + {% if len(entries) > 0 %} + {{ max(e.updated for e in entries).strftime(date_format) }} + {% else %} + {{ datetime.datetime.utcnow().strftime(date_format) }} + {% end %} + http://{{ request.host }}/ + + + {{ escape(handler.settings["blog_title"]) }} + {% for entry in entries %} + + http://{{ request.host }}/entry/{{ entry.slug }} + {{ escape(entry.title) }} + + {{ entry.updated.strftime(date_format) }} + {{ entry.published.strftime(date_format) }} + +
{{ entry.html }}
+
+
+ {% end %} +
diff --git a/demos/blog/templates/home.html b/demos/blog/templates/home.html new file mode 100644 index 000000000..dd069a97f --- /dev/null +++ b/demos/blog/templates/home.html @@ -0,0 +1,8 @@ +{% extends "base.html" %} + +{% block body %} + {% for entry in entries %} + {{ modules.Entry(entry) }} + {% end %} +
{{ _("Archive") }}
+{% end %} diff --git a/demos/blog/templates/modules/entry.html b/demos/blog/templates/modules/entry.html new file mode 100644 index 000000000..27ea0d76c --- /dev/null +++ b/demos/blog/templates/modules/entry.html @@ -0,0 +1,8 @@ +
+

{{ escape(entry.title) }}

+
{{ locale.format_date(entry.published, full_format=True, shorter=True) }}
+
{{ entry.html }}
+ {% if current_user %} + + {% end %} +
diff --git a/demos/chat/chatdemo.py b/demos/chat/chatdemo.py new file mode 100755 index 000000000..25d033035 --- /dev/null +++ b/demos/chat/chatdemo.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import logging +import tornado.auth +import tornado.escape +import tornado.httpserver +import tornado.ioloop +import tornado.options +import tornado.web +import os.path +import uuid + +from tornado.options import define, options + +define("port", default=8888, help="run on the given port", type=int) + + +class Application(tornado.web.Application): + def __init__(self): + handlers = [ + (r"/", MainHandler), + (r"/auth/login", AuthLoginHandler), + (r"/auth/logout", AuthLogoutHandler), + (r"/a/message/new", MessageNewHandler), + (r"/a/message/updates", MessageUpdatesHandler), + ] + settings = dict( + cookie_secret="43oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + login_url="/auth/login", + template_path=os.path.join(os.path.dirname(__file__), "templates"), + static_path=os.path.join(os.path.dirname(__file__), "static"), + xsrf_cookies=True, + ) + tornado.web.Application.__init__(self, handlers, **settings) + + +class BaseHandler(tornado.web.RequestHandler): + def get_current_user(self): + user_json = self.get_secure_cookie("user") + if not user_json: return None + return tornado.escape.json_decode(user_json) + + +class MainHandler(BaseHandler): + @tornado.web.authenticated + def get(self): + self.render("index.html", messages=MessageMixin.cache) + + +class MessageMixin(object): + waiters = [] + cache = [] + cache_size = 200 + + def wait_for_messages(self, callback, cursor=None): + cls = MessageMixin + if cursor: + index = 0 + for i in xrange(len(cls.cache)): + index = len(cls.cache) - i - 1 + if cls.cache[index]["id"] == cursor: break + recent = cls.cache[index + 1:] + if recent: + callback(recent) + return + cls.waiters.append(callback) + + def new_messages(self, messages): + cls = MessageMixin + logging.info("Sending new message to %r listeners", len(cls.waiters)) + for callback in cls.waiters: + try: + callback(messages) + except: + logging.error("Error in waiter callback", exc_info=True) + cls.waiters = [] + cls.cache.extend(messages) + if len(cls.cache) > self.cache_size: + cls.cache = cls.cache[-self.cache_size:] + + +class MessageNewHandler(BaseHandler, MessageMixin): + @tornado.web.authenticated + def post(self): + message = { + "id": str(uuid.uuid4()), + "from": self.current_user["first_name"], + "body": self.get_argument("body"), + } + message["html"] = self.render_string("message.html", message=message) + if self.get_argument("next", None): + self.redirect(self.get_argument("next")) + else: + self.write(message) + self.new_messages([message]) + + +class MessageUpdatesHandler(BaseHandler, MessageMixin): + @tornado.web.authenticated + @tornado.web.asynchronous + def post(self): + cursor = self.get_argument("cursor", None) + self.wait_for_messages(self.async_callback(self.on_new_messages), + cursor=cursor) + + def on_new_messages(self, messages): + # Closed client connection + if self.request.connection.stream.closed(): + return + self.finish(dict(messages=messages)) + + +class AuthLoginHandler(BaseHandler, tornado.auth.GoogleMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("openid.mode", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authenticate_redirect(ax_attrs=["name"]) + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Google auth failed") + self.set_secure_cookie("user", tornado.escape.json_encode(user)) + self.redirect("/") + + +class AuthLogoutHandler(BaseHandler, tornado.auth.FacebookMixin): + def get(self): + self.clear_cookie("user") + self.redirect(self.get_argument("next", "/")) + + +def main(): + tornado.options.parse_command_line() + http_server = tornado.httpserver.HTTPServer(Application()) + http_server.listen(options.port) + tornado.ioloop.IOLoop.instance().start() + + +if __name__ == "__main__": + main() diff --git a/demos/chat/static/chat.css b/demos/chat/static/chat.css new file mode 100644 index 000000000..657d191c2 --- /dev/null +++ b/demos/chat/static/chat.css @@ -0,0 +1,51 @@ +/* + * Copyright 2009 FriendFeed + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +body { + background: white; + margin: 10px; +} + +body, +input { + font-family: sans-serif; + font-size: 10pt; + color: black; +} + +table { + border-collapse: collapse; + border: 0; +} + +td { + border: 0; + padding: 0; +} + +#body { + position: absolute; + bottom: 10px; + left: 10px; +} + +#input { + margin-top: 0.5em; +} + +#inbox .message { + padding-top: 0.25em; +} diff --git a/demos/chat/static/chat.js b/demos/chat/static/chat.js new file mode 100644 index 000000000..f23a9dd93 --- /dev/null +++ b/demos/chat/static/chat.js @@ -0,0 +1,135 @@ +// Copyright 2009 FriendFeed +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +$(document).ready(function() { + if (!window.console) window.console = {}; + if (!window.console.log) window.console.log = function() {}; + + $("#messageform").live("submit", function() { + newMessage($(this)); + return false; + }); + $("#messageform").live("keypress", function(e) { + if (e.keyCode == 13) { + newMessage($(this)); + return false; + } + }); + $("#message").select(); + updater.poll(); +}); + +function newMessage(form) { + var message = form.formToDict(); + var disabled = form.find("input[type=submit]"); + disabled.disable(); + $.postJSON("/a/message/new", message, function(response) { + updater.showMessage(response); + if (message.id) { + form.parent().remove(); + } else { + form.find("input[type=text]").val("").select(); + disabled.enable(); + } + }); +} + +function getCookie(name) { + var r = document.cookie.match("\\b" + name + "=([^;]*)\\b"); + return r ? r[1] : undefined; +} + +jQuery.postJSON = function(url, args, callback) { + args._xsrf = getCookie("_xsrf"); + $.ajax({url: url, data: $.param(args), dataType: "text", type: "POST", + success: function(response) { + if (callback) callback(eval("(" + response + ")")); + }, error: function(response) { + console.log("ERROR:", response) + }}); +}; + +jQuery.fn.formToDict = function() { + var fields = this.serializeArray(); + var json = {} + for (var i = 0; i < fields.length; i++) { + json[fields[i].name] = fields[i].value; + } + if (json.next) delete json.next; + return json; +}; + +jQuery.fn.disable = function() { + this.enable(false); + return this; +}; + +jQuery.fn.enable = function(opt_enable) { + if (arguments.length && !opt_enable) { + this.attr("disabled", "disabled"); + } else { + this.removeAttr("disabled"); + } + return this; +}; + +var updater = { + errorSleepTime: 500, + cursor: null, + + poll: function() { + var args = {"_xsrf": getCookie("_xsrf")}; + if (updater.cursor) args.cursor = updater.cursor; + $.ajax({url: "/a/message/updates", type: "POST", dataType: "text", + data: $.param(args), success: updater.onSuccess, + error: updater.onError}); + }, + + onSuccess: function(response) { + try { + updater.newMessages(eval("(" + response + ")")); + } catch (e) { + updater.onError(); + return; + } + updater.errorSleepTime = 500; + window.setTimeout(updater.poll, 0); + }, + + onError: function(response) { + updater.errorSleepTime *= 2; + console.log("Poll error; sleeping for", updater.errorSleepTime, "ms"); + window.setTimeout(updater.poll, updater.errorSleepTime); + }, + + newMessages: function(response) { + if (!response.messages) return; + updater.cursor = response.cursor; + var messages = response.messages; + updater.cursor = messages[messages.length - 1].id; + console.log(messages.length, "new messages, cursor:", updater.cursor); + for (var i = 0; i < messages.length; i++) { + updater.showMessage(messages[i]); + } + }, + + showMessage: function(message) { + var existing = $("#m" + message.id); + if (existing.length > 0) return; + var node = $(message.html); + node.hide(); + $("#inbox").append(node); + node.slideDown(); + } +}; diff --git a/demos/chat/templates/index.html b/demos/chat/templates/index.html new file mode 100644 index 000000000..1f2583faf --- /dev/null +++ b/demos/chat/templates/index.html @@ -0,0 +1,37 @@ + + + + + Tornado Chat Demo + + + +
+ {{ escape(current_user["name"]) }} - + {{ _("Sign out") }} +
+
+
+ {% for message in messages %} + {% include "message.html" %} + {% end %} +
+
+
+
+ + + + +
+ + + {{ xsrf_form_html() }} +
+ +
+ + + + + diff --git a/demos/chat/templates/message.html b/demos/chat/templates/message.html new file mode 100644 index 000000000..2ebd1b289 --- /dev/null +++ b/demos/chat/templates/message.html @@ -0,0 +1 @@ +
{{ message["from"] }}: {{ escape(message["body"]) }}
diff --git a/demos/facebook/README b/demos/facebook/README new file mode 100644 index 000000000..26a631eb4 --- /dev/null +++ b/demos/facebook/README @@ -0,0 +1,8 @@ +Running the Tornado AppEngine example +===================================== +To work with the provided Facebook api key, this example must be +accessed at http://localhost:8888/ to match the Connect URL set in the +example application. + +To use any other domain, a new Facebook application must be registered +with a Connect URL set to that domain. diff --git a/demos/facebook/facebook.py b/demos/facebook/facebook.py new file mode 100755 index 000000000..0c984ddaa --- /dev/null +++ b/demos/facebook/facebook.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import logging +import os.path +import tornado.auth +import tornado.escape +import tornado.httpserver +import tornado.ioloop +import tornado.options +import tornado.web +import uimodules + +from tornado.options import define, options + +define("port", default=8888, help="run on the given port", type=int) +define("facebook_api_key", help="your Facebook application API key", + default="9e2ada1b462142c4dfcc8e894ea1e37c") +define("facebook_secret", help="your Facebook application secret", + default="32fc6114554e3c53d5952594510021e2") + + +class Application(tornado.web.Application): + def __init__(self): + handlers = [ + (r"/", MainHandler), + (r"/auth/login", AuthLoginHandler), + (r"/auth/logout", AuthLogoutHandler), + ] + settings = dict( + cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + login_url="/auth/login", + template_path=os.path.join(os.path.dirname(__file__), "templates"), + static_path=os.path.join(os.path.dirname(__file__), "static"), + xsrf_cookies=True, + facebook_api_key=options.facebook_api_key, + facebook_secret=options.facebook_secret, + ui_modules= {"Post": PostModule}, + debug=True, + ) + tornado.web.Application.__init__(self, handlers, **settings) + + +class BaseHandler(tornado.web.RequestHandler): + def get_current_user(self): + user_json = self.get_secure_cookie("user") + if not user_json: return None + return tornado.escape.json_decode(user_json) + + +class MainHandler(BaseHandler, tornado.auth.FacebookMixin): + @tornado.web.authenticated + @tornado.web.asynchronous + def get(self): + self.facebook_request( + method="stream.get", + callback=self.async_callback(self._on_stream), + session_key=self.current_user["session_key"]) + + def _on_stream(self, stream): + if stream is None: + # Session may have expired + self.redirect("/auth/login") + return + # Turn profiles into a dict mapping id => profile + stream["profiles"] = dict((p["id"], p) for p in stream["profiles"]) + self.render("stream.html", stream=stream) + + +class AuthLoginHandler(BaseHandler, tornado.auth.FacebookMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("session", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authorize_redirect("read_stream") + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Facebook auth failed") + self.set_secure_cookie("user", tornado.escape.json_encode(user)) + self.redirect(self.get_argument("next", "/")) + + +class AuthLogoutHandler(BaseHandler, tornado.auth.FacebookMixin): + @tornado.web.asynchronous + def get(self): + self.clear_cookie("user") + if not self.current_user: + self.redirect(self.get_argument("next", "/")) + return + self.facebook_request( + method="auth.revokeAuthorization", + callback=self.async_callback(self._on_deauthorize), + session_key=self.current_user["session_key"]) + + def _on_deauthorize(self, response): + self.redirect(self.get_argument("next", "/")) + + +class PostModule(tornado.web.UIModule): + def render(self, post, actor): + return self.render_string("modules/post.html", post=post, actor=actor) + + +def main(): + tornado.options.parse_command_line() + http_server = tornado.httpserver.HTTPServer(Application()) + http_server.listen(options.port) + tornado.ioloop.IOLoop.instance().start() + + +if __name__ == "__main__": + main() diff --git a/demos/facebook/static/facebook.css b/demos/facebook/static/facebook.css new file mode 100644 index 000000000..4fee72678 --- /dev/null +++ b/demos/facebook/static/facebook.css @@ -0,0 +1,97 @@ +/* + * Copyright 2009 Facebook + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +body { + background: white; + color: black; + margin: 15px; +} + +body, +input, +textarea { + font-family: "Lucida Grande", Tahoma, Verdana, sans-serif; + font-size: 10pt; +} + +table { + border-collapse: collapse; + border: 0; +} + +td { + border: 0; + padding: 0; +} + +img { + border: 0; +} + +a { + text-decoration: none; + color: #3b5998; +} + +a:hover { + text-decoration: underline; +} + +.post { + border-bottom: 1px solid #eeeeee; + min-height: 50px; + padding-bottom: 10px; + margin-top: 10px; +} + +.post .picture { + float: left; +} + +.post .picture img { + height: 50px; + width: 50px; +} + +.post .body { + margin-left: 60px; +} + +.post .media img { + border: 1px solid #cccccc; + padding: 3px; +} + +.post .media:hover img { + border: 1px solid #3b5998; +} + +.post a.actor { + font-weight: bold; +} + +.post .meta { + font-size: 11px; +} + +.post a.permalink { + color: #777777; +} + +#body { + max-width: 700px; + margin: auto; +} diff --git a/demos/facebook/static/facebook.js b/demos/facebook/static/facebook.js new file mode 100644 index 000000000..e69de29bb diff --git a/demos/facebook/templates/modules/post.html b/demos/facebook/templates/modules/post.html new file mode 100644 index 000000000..f938c735a --- /dev/null +++ b/demos/facebook/templates/modules/post.html @@ -0,0 +1,29 @@ +
+
+ +
+
+ {{ escape(actor["name"]) }} + {% if post["message"] %} + {{ escape(post["message"]) }} + {% end %} + {% if post["attachment"] %} +
+ {% if post["attachment"].get("name") %} + + {% end %} + {% if post["attachment"].get("description") %} +
{{ post["attachment"]["description"] }}
+ {% end %} + {% for media in filter(lambda m: m.get("src") and m["type"] in ("photo", "link"), post["attachment"]["media"]) %} + + {{ escape(media.get( + + {% end %} +
+ {% end %} + +
+
diff --git a/demos/facebook/templates/stream.html b/demos/facebook/templates/stream.html new file mode 100644 index 000000000..06ab1dfcb --- /dev/null +++ b/demos/facebook/templates/stream.html @@ -0,0 +1,22 @@ + + + + + Tornado Facebook Stream Demo + + + +
+
+ {{ escape(current_user["name"]) }} - + {{ _("Sign out") }} +
+
{{ _("Refresh stream") }}
+
+ {% for post in stream["posts"] %} + {{ modules.Post(post, stream["profiles"][post["actor_id"]]) }} + {% end %} +
+
+ + diff --git a/demos/facebook/uimodules.py b/demos/facebook/uimodules.py new file mode 100644 index 000000000..1173db634 --- /dev/null +++ b/demos/facebook/uimodules.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tornado.web + + +class Entry(tornado.web.UIModule): + def render(self): + return '
ENTRY
' diff --git a/demos/helloworld/helloworld.py b/demos/helloworld/helloworld.py new file mode 100755 index 000000000..754f72f30 --- /dev/null +++ b/demos/helloworld/helloworld.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tornado.httpserver +import tornado.ioloop +import tornado.options +import tornado.web +import logging + +from tornado.options import define, options + +define("port", default=8888, help="run on the given port", type=int) + + +class MainHandler(tornado.web.RequestHandler): + def get(self): + self.write("Hello, world") + + +def main(): + tornado.options.parse_command_line() + application = tornado.web.Application([ + (r"/", MainHandler), + ]) + http_server = tornado.httpserver.HTTPServer(application) + http_server.listen(options.port) + tornado.ioloop.IOLoop.instance().start() + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..68495c649 --- /dev/null +++ b/setup.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import distutils.core +import sys + +# Build the epoll extension for Linux systems with Python < 2.6 +extensions = [] +major, minor = sys.version_info[:2] +python_26 = (major > 2 or (major == 2 and minor >= 6)) +if "linux" in sys.platform.lower() and not python_26: + extensions.append(distutils.core.Extension( + "tornado.epoll", ["tornado/epoll.c"])) + +distutils.core.setup( + name="tornado", + version="0.1", + packages = ["tornado"], + ext_modules = extensions, + author="Facebook", + author_email="python-tornado@googlegroups.com", + url="http://www.tornadoweb.org/", + license="http://www.apache.org/licenses/LICENSE-2.0", + description="Tornado is an open source version of the scalable, non-blocking web server and and tools that power FriendFeed", +) diff --git a/tornado/__init__.py b/tornado/__init__.py new file mode 100644 index 000000000..8f73764eb --- /dev/null +++ b/tornado/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""The Tornado web server and tools.""" diff --git a/tornado/auth.py b/tornado/auth.py new file mode 100644 index 000000000..8d583d562 --- /dev/null +++ b/tornado/auth.py @@ -0,0 +1,878 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Implementations of various third-party authentication schemes. + +All the classes in this file are class Mixins designed to be used with +web.py RequestHandler classes. The primary methods for each service are +authenticate_redirect(), authorize_redirect(), and get_authenticated_user(). +The former should be called to redirect the user to, e.g., the OpenID +authentication page on the third party service, and the latter should +be called upon return to get the user data from the data returned by +the third party service. + +They all take slightly different arguments due to the fact all these +services implement authentication and authorization slightly differently. +See the individual service classes below for complete documentation. + +Example usage for Google OpenID: + +class GoogleHandler(tornado.web.RequestHandler, tornado.auth.GoogleMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("openid.mode", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authenticate_redirect() + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Google auth failed") + # Save the user with, e.g., set_secure_cookie() + +""" + +import base64 +import binascii +import cgi +import hashlib +import hmac +import httpclient +import escape +import logging +import time +import urllib +import urlparse +import uuid + + +class OpenIdMixin(object): + """Abstract implementation of OpenID and Attribute Exchange. + + See GoogleMixin below for example implementations. + """ + def authenticate_redirect(self, callback_uri=None, + ax_attrs=["name","email","language","username"]): + """Returns the authentication URL for this service. + + After authentication, the service will redirect back to the given + callback URI. + + We request the given attributes for the authenticated user by + default (name, email, language, and username). If you don't need + all those attributes for your app, you can request fewer with + the ax_attrs keyword argument. + """ + callback_uri = callback_uri or self.request.path + args = self._openid_args(callback_uri, ax_attrs=ax_attrs) + self.redirect(self._OPENID_ENDPOINT + "?" + urllib.urlencode(args)) + + def get_authenticated_user(self, callback): + """Fetches the authenticated user data upon redirect. + + This method should be called by the handler that receives the + redirect from the authenticate_redirect() or authorize_redirect() + methods. + """ + # Verify the OpenID response via direct request to the OP + args = dict((k, v[-1]) for k, v in self.request.arguments.iteritems()) + args["openid.mode"] = u"check_authentication" + url = self._OPENID_ENDPOINT + "?" + urllib.urlencode(args) + http = httpclient.AsyncHTTPClient() + http.fetch(url, self.async_callback( + self._on_authentication_verified, callback)) + + def _openid_args(self, callback_uri, ax_attrs=[], oauth_scope=None): + url = urlparse.urljoin(self.request.full_url(), callback_uri) + args = { + "openid.ns": "http://specs.openid.net/auth/2.0", + "openid.claimed_id": + "http://specs.openid.net/auth/2.0/identifier_select", + "openid.identity": + "http://specs.openid.net/auth/2.0/identifier_select", + "openid.return_to": url, + "openid.realm": "http://" + self.request.host + "/", + "openid.mode": "checkid_setup", + } + if ax_attrs: + args.update({ + "openid.ns.ax": "http://openid.net/srv/ax/1.0", + "openid.ax.mode": "fetch_request", + }) + ax_attrs = set(ax_attrs) + required = [] + if "name" in ax_attrs: + ax_attrs -= set(["name", "firstname", "fullname", "lastname"]) + required += ["firstname", "fullname", "lastname"] + args.update({ + "openid.ax.type.firstname": + "http://axschema.org/namePerson/first", + "openid.ax.type.fullname": + "http://axschema.org/namePerson", + "openid.ax.type.lastname": + "http://axschema.org/namePerson/last", + }) + known_attrs = { + "email": "http://axschema.org/contact/email", + "language": "http://axschema.org/pref/language", + "username": "http://axschema.org/namePerson/friendly", + } + for name in ax_attrs: + args["openid.ax.type." + name] = known_attrs[name] + required.append(name) + args["openid.ax.required"] = ",".join(required) + if oauth_scope: + args.update({ + "openid.ns.oauth": + "http://specs.openid.net/extensions/oauth/1.0", + "openid.oauth.consumer": self.request.host.split(":")[0], + "openid.oauth.scope": oauth_scope, + }) + return args + + def _on_authentication_verified(self, callback, response): + if response.error or u"is_valid:true" not in response.body: + logging.warning("Invalid OpenID response: %s", response.error or + response.body) + callback(None) + return + + # Make sure we got back at least an email from attribute exchange + ax_ns = None + for name, values in self.request.arguments.iteritems(): + if name.startswith("openid.ns.") and \ + values[-1] == u"http://openid.net/srv/ax/1.0": + ax_ns = name[10:] + break + def get_ax_arg(uri): + if not ax_ns: return u"" + prefix = "openid." + ax_ns + ".type." + ax_name = None + for name, values in self.request.arguments.iteritems(): + if values[-1] == uri and name.startswith(prefix): + part = name[len(prefix):] + ax_name = "openid." + ax_ns + ".value." + part + break + if not ax_name: return u"" + return self.get_argument(ax_name, u"") + + email = get_ax_arg("http://axschema.org/contact/email") + name = get_ax_arg("http://axschema.org/namePerson") + first_name = get_ax_arg("http://axschema.org/namePerson/first") + last_name = get_ax_arg("http://axschema.org/namePerson/last") + username = get_ax_arg("http://axschema.org/namePerson/friendly") + locale = get_ax_arg("http://axschema.org/pref/language").lower() + user = dict() + name_parts = [] + if first_name: + user["first_name"] = first_name + name_parts.append(first_name) + if last_name: + user["last_name"] = last_name + name_parts.append(last_name) + if name: + user["name"] = name + elif name_parts: + user["name"] = u" ".join(name_parts) + elif email: + user["name"] = email.split("@")[0] + if email: user["email"] = email + if locale: user["locale"] = locale + if username: user["username"] = username + callback(user) + + +class OAuthMixin(object): + """Abstract implementation of OAuth. + + See TwitterMixin and FriendFeedMixin below for example implementations. + """ + def authorize_redirect(self, callback_uri=None): + """Redirects the user to obtain OAuth authorization for this service. + + Twitter and FriendFeed both require that you register a Callback + URL with your application. You should call this method to log the + user in, and then call get_authenticated_user() in the handler + you registered as your Callback URL to complete the authorization + process. + + This method sets a cookie called _oauth_request_token which is + subsequently used (and cleared) in get_authenticated_user for + security purposes. + """ + if callback_uri and getattr(self, "_OAUTH_NO_CALLBACKS", False): + raise Exception("This service does not support oauth_callback") + http = httpclient.AsyncHTTPClient() + http.fetch(self._oauth_request_token_url(), self.async_callback( + self._on_request_token, self._OAUTH_AUTHORIZE_URL, callback_uri)) + + def get_authenticated_user(self, callback): + """Gets the OAuth authorized user and access token on callback. + + This method should be called from the handler for your registered + OAuth Callback URL to complete the registration process. We call + callback with the authenticated user, which in addition to standard + attributes like 'name' includes the 'access_key' attribute, which + contains the OAuth access you can use to make authorized requests + to this service on behalf of the user. + """ + request_key = self.get_argument("oauth_token") + request_cookie = self.get_cookie("_oauth_request_token") + if not request_cookie: + logging.warning("Missing OAuth request token cookie") + callback(None) + return + cookie_key, cookie_secret = request_cookie.split("|") + if cookie_key != request_key: + logging.warning("Request token does not match cookie") + callback(None) + return + token = dict(key=cookie_key, secret=cookie_secret) + http = httpclient.AsyncHTTPClient() + http.fetch(self._oauth_access_token_url(token), self.async_callback( + self._on_access_token, callback)) + + def _oauth_request_token_url(self): + consumer_token = self._oauth_consumer_token() + url = self._OAUTH_REQUEST_TOKEN_URL + args = dict( + oauth_consumer_key=consumer_token["key"], + oauth_signature_method="HMAC-SHA1", + oauth_timestamp=str(int(time.time())), + oauth_nonce=binascii.b2a_hex(uuid.uuid4().bytes), + oauth_version="1.0", + ) + signature = _oauth_signature(consumer_token, "GET", url, args) + args["oauth_signature"] = signature + return url + "?" + urllib.urlencode(args) + + def _on_request_token(self, authorize_url, callback_uri, response): + if response.error: + raise Exception("Could not get request token") + request_token = _oauth_parse_response(response.body) + data = "|".join([request_token["key"], request_token["secret"]]) + self.set_cookie("_oauth_request_token", data) + args = dict(oauth_token=request_token["key"]) + if callback_uri: + args["oauth_callback"] = urlparse.urljoin( + self.request.full_url(), callback_uri) + self.redirect(authorize_url + "?" + urllib.urlencode(args)) + + def _oauth_access_token_url(self, request_token): + consumer_token = self._oauth_consumer_token() + url = self._OAUTH_ACCESS_TOKEN_URL + args = dict( + oauth_consumer_key=consumer_token["key"], + oauth_token=request_token["key"], + oauth_signature_method="HMAC-SHA1", + oauth_timestamp=str(int(time.time())), + oauth_nonce=binascii.b2a_hex(uuid.uuid4().bytes), + oauth_version="1.0", + ) + signature = _oauth_signature(consumer_token, "GET", url, args, + request_token) + args["oauth_signature"] = signature + return url + "?" + urllib.urlencode(args) + + def _on_access_token(self, callback, response): + if response.error: + logging.warning("Could not fetch access token") + callback(None) + return + access_token = _oauth_parse_response(response.body) + user = self._oauth_get_user(access_token, self.async_callback( + self._on_oauth_get_user, access_token, callback)) + + def _oauth_get_user(self, access_token, callback): + raise NotImplementedError() + + def _on_oauth_get_user(self, access_token, callback, user): + if not user: + callback(None) + return + user["access_token"] = access_token + callback(user) + + def _oauth_request_parameters(self, url, access_token, parameters={}, + method="GET"): + """Returns the OAuth parameters as a dict for the given request. + + parameters should include all POST arguments and query string arguments + that will be sent with the request. + """ + consumer_token = self._oauth_consumer_token() + base_args = dict( + oauth_consumer_key=consumer_token["key"], + oauth_token=access_token["key"], + oauth_signature_method="HMAC-SHA1", + oauth_timestamp=str(int(time.time())), + oauth_nonce=binascii.b2a_hex(uuid.uuid4().bytes), + oauth_version="1.0", + ) + args = {} + args.update(base_args) + args.update(parameters) + signature = _oauth_signature(consumer_token, method, url, args, + access_token) + base_args["oauth_signature"] = signature + return base_args + + +class TwitterMixin(OAuthMixin): + """Twitter OAuth authentication. + + To authenticate with Twitter, register your application with + Twitter at http://twitter.com/apps. Then copy your Consumer Key and + Consumer Secret to the application settings 'twitter_consumer_key' and + 'twitter_consumer_secret'. Use this Mixin on the handler for the URL + you registered as your application's Callback URL. + + When your application is set up, you can use this Mixin like this + to authenticate the user with Twitter and get access to their stream: + + class TwitterHandler(tornado.web.RequestHandler, + tornado.auth.TwitterMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("oauth_token", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authorize_redirect() + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Twitter auth failed") + # Save the user using, e.g., set_secure_cookie() + + The user object returned by get_authenticated_user() includes the + attributes 'username', 'name', and all of the custom Twitter user + attributes describe at + http://apiwiki.twitter.com/Twitter-REST-API-Method%3A-users%C2%A0show + in addition to 'access_token'. You should save the access token with + the user; it is required to make requests on behalf of the user later + with twitter_request(). + """ + _OAUTH_REQUEST_TOKEN_URL = "http://twitter.com/oauth/request_token" + _OAUTH_ACCESS_TOKEN_URL = "http://twitter.com/oauth/access_token" + _OAUTH_AUTHORIZE_URL = "http://twitter.com/oauth/authorize" + _OAUTH_AUTHENTICATE_URL = "http://twitter.com/oauth/authenticate" + _OAUTH_NO_CALLBACKS = True + + def authenticate_redirect(self): + """Just like authorize_redirect(), but auto-redirects if authorized. + + This is generally the right interface to use if you are using + Twitter for single-sign on. + """ + http = httpclient.AsyncHTTPClient() + http.fetch(self._oauth_request_token_url(), self.async_callback( + self._on_request_token, self._OAUTH_AUTHENTICATE_URL, None)) + + def twitter_request(self, path, callback, access_token=None, + post_args=None, **args): + """Fetches the given API path, e.g., "/statuses/user_timeline/btaylor" + + The path should not include the format (we automatically append + ".json" and parse the JSON output). + + If the request is a POST, post_args should be provided. Query + string arguments should be given as keyword arguments. + + All the Twitter methods are documented at + http://apiwiki.twitter.com/Twitter-API-Documentation. + + Many methods require an OAuth access token which you can obtain + through authorize_redirect() and get_authenticated_user(). The + user returned through that process includes an 'access_token' + attribute that can be used to make authenticated requests via + this method. Example usage: + + class MainHandler(tornado.web.RequestHandler, + tornado.auth.TwitterMixin): + @tornado.web.authenticated + @tornado.web.asynchronous + def get(self): + self.twitter_request( + "/statuses/update", + post_args={"status": "Testing Tornado Web Server"}, + access_token=user["access_token"], + callback=self.async_callback(self._on_post)) + + def _on_post(self, new_entry): + if not new_entry: + # Call failed; perhaps missing permission? + self.authorize_redirect() + return + self.finish("Posted a message!") + + """ + # Add the OAuth resource request signature if we have credentials + url = "http://twitter.com" + path + ".json" + if access_token: + all_args = {} + all_args.update(args) + all_args.update(post_args or {}) + consumer_token = self._oauth_consumer_token() + method = "POST" if post_args is not None else "GET" + oauth = self._oauth_request_parameters( + url, access_token, all_args, method=method) + args.update(oauth) + if args: url += "?" + urllib.urlencode(args) + callback = self.async_callback(self._on_twitter_request, callback) + http = httpclient.AsyncHTTPClient() + if post_args is not None: + http.fetch(url, method="POST", body=urllib.urlencode(post_args), + callback=callback) + else: + http.fetch(url, callback=callback) + + def _on_twitter_request(self, callback, response): + if response.error: + logging.warning("Error response %s fetching %s", response.error, + response.request.url) + callback(None) + return + callback(escape.json_decode(response.body)) + + def _oauth_consumer_token(self): + self.require_setting("twitter_consumer_key", "Twitter OAuth") + self.require_setting("twitter_consumer_secret", "Twitter OAuth") + return dict( + key=self.settings["twitter_consumer_key"], + secret=self.settings["twitter_consumer_secret"]) + + def _oauth_get_user(self, access_token, callback): + callback = self.async_callback(self._parse_user_response, callback) + self.twitter_request( + "/users/show/" + access_token["screen_name"], + access_token=access_token, callback=callback) + + def _parse_user_response(self, callback, user): + if user: + user["username"] = user["screen_name"] + callback(user) + + +class FriendFeedMixin(OAuthMixin): + """FriendFeed OAuth authentication. + + To authenticate with FriendFeed, register your application with + FriendFeed at http://friendfeed.com/api/applications. Then + copy your Consumer Key and Consumer Secret to the application settings + 'friendfeed_consumer_key' and 'friendfeed_consumer_secret'. Use + this Mixin on the handler for the URL you registered as your + application's Callback URL. + + When your application is set up, you can use this Mixin like this + to authenticate the user with FriendFeed and get access to their feed: + + class FriendFeedHandler(tornado.web.RequestHandler, + tornado.auth.FriendFeedMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("oauth_token", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authorize_redirect() + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "FriendFeed auth failed") + # Save the user using, e.g., set_secure_cookie() + + The user object returned by get_authenticated_user() includes the + attributes 'username', 'name', and 'description' in addition to + 'access_token'. You should save the access token with the user; + it is required to make requests on behalf of the user later with + friendfeed_request(). + """ + _OAUTH_REQUEST_TOKEN_URL = "https://friendfeed.com/account/oauth/request_token" + _OAUTH_ACCESS_TOKEN_URL = "https://friendfeed.com/account/oauth/access_token" + _OAUTH_AUTHORIZE_URL = "https://friendfeed.com/account/oauth/authorize" + _OAUTH_NO_CALLBACKS = True + + def friendfeed_request(self, path, callback, access_token=None, + post_args=None, **args): + """Fetches the given relative API path, e.g., "/bret/friends" + + If the request is a POST, post_args should be provided. Query + string arguments should be given as keyword arguments. + + All the FriendFeed methods are documented at + http://friendfeed.com/api/documentation. + + Many methods require an OAuth access token which you can obtain + through authorize_redirect() and get_authenticated_user(). The + user returned through that process includes an 'access_token' + attribute that can be used to make authenticated requests via + this method. Example usage: + + class MainHandler(tornado.web.RequestHandler, + tornado.auth.FriendFeedMixin): + @tornado.web.authenticated + @tornado.web.asynchronous + def get(self): + self.friendfeed_request( + "/entry", + post_args={"body": "Testing Tornado Web Server"}, + access_token=self.current_user["access_token"], + callback=self.async_callback(self._on_post)) + + def _on_post(self, new_entry): + if not new_entry: + # Call failed; perhaps missing permission? + self.authorize_redirect() + return + self.finish("Posted a message!") + + """ + # Add the OAuth resource request signature if we have credentials + url = "http://friendfeed-api.com/v2" + path + if access_token: + all_args = {} + all_args.update(args) + all_args.update(post_args or {}) + consumer_token = self._oauth_consumer_token() + method = "POST" if post_args is not None else "GET" + oauth = self._oauth_request_parameters( + url, access_token, all_args, method=method) + args.update(oauth) + if args: url += "?" + urllib.urlencode(args) + callback = self.async_callback(self._on_friendfeed_request, callback) + http = httpclient.AsyncHTTPClient() + if post_args is not None: + http.fetch(url, method="POST", body=urllib.urlencode(post_args), + callback=callback) + else: + http.fetch(url, callback=callback) + + def _on_friendfeed_request(self, callback, response): + if response.error: + logging.warning("Error response %s fetching %s", response.error, + response.request.url) + callback(None) + return + callback(escape.json_decode(response.body)) + + def _oauth_consumer_token(self): + self.require_setting("friendfeed_consumer_key", "FriendFeed OAuth") + self.require_setting("friendfeed_consumer_secret", "FriendFeed OAuth") + return dict( + key=self.settings["friendfeed_consumer_key"], + secret=self.settings["friendfeed_consumer_secret"]) + + def _oauth_get_user(self, access_token, callback): + callback = self.async_callback(self._parse_user_response, callback) + self.friendfeed_request( + "/feedinfo/" + access_token["username"], + include="id,name,description", access_token=access_token, + callback=callback) + + def _parse_user_response(self, callback, user): + if user: + user["username"] = user["id"] + callback(user) + + +class GoogleMixin(OpenIdMixin, OAuthMixin): + """Google Open ID / OAuth authentication. + + No application registration is necessary to use Google for authentication + or to access Google resources on behalf of a user. To authenticate with + Google, redirect with authenticate_redirect(). On return, parse the + response with get_authenticated_user(). We send a dict containing the + values for the user, including 'email', 'name', and 'locale'. + Example usage: + + class GoogleHandler(tornado.web.RequestHandler, tornado.auth.GoogleMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("openid.mode", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authenticate_redirect() + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Google auth failed") + # Save the user with, e.g., set_secure_cookie() + + """ + _OPENID_ENDPOINT = "https://www.google.com/accounts/o8/ud" + _OAUTH_ACCESS_TOKEN_URL = "https://www.google.com/accounts/OAuthGetAccessToken" + + def authorize_redirect(self, oauth_scope, callback_uri=None, + ax_attrs=["name","email","language","username"]): + """Authenticates and authorizes for the given Google resource. + + Some of the available resources are: + + Gmail Contacts - http://www.google.com/m8/feeds/ + Calendar - http://www.google.com/calendar/feeds/ + Finance - http://finance.google.com/finance/feeds/ + + You can authorize multiple resources by separating the resource + URLs with a space. + """ + callback_uri = callback_uri or self.request.path + args = self._openid_args(callback_uri, ax_attrs=ax_attrs, + oauth_scope=oauth_scope) + self.redirect(self._OPENID_ENDPOINT + "?" + urllib.urlencode(args)) + + def get_authenticated_user(self, callback): + """Fetches the authenticated user data upon redirect.""" + # Look to see if we are doing combined OpenID/OAuth + oauth_ns = "" + for name, values in self.request.arguments.iteritems(): + if name.startswith("openid.ns.") and \ + values[-1] == u"http://specs.openid.net/extensions/oauth/1.0": + oauth_ns = name[10:] + break + token = self.get_argument("openid." + oauth_ns + ".request_token", "") + if token: + http = httpclient.AsyncHTTPClient() + token = dict(key=token, secret="") + http.fetch(self._oauth_access_token_url(token), + self.async_callback(self._on_access_token, callback)) + else: + OpenIdMixin.get_authenticated_user(self, callback) + + def _oauth_consumer_token(self): + self.require_setting("google_consumer_key", "Google OAuth") + self.require_setting("google_consumer_secret", "Google OAuth") + return dict( + key=self.settings["google_consumer_key"], + secret=self.settings["google_consumer_secret"]) + + def _oauth_get_user(self, access_token, callback): + OpenIdMixin.get_authenticated_user(self, callback) + + +class FacebookMixin(object): + """Facebook Connect authentication. + + To authenticate with Facebook, register your application with + Facebook at http://www.facebook.com/developers/apps.php. Then + copy your API Key and Application Secret to the application settings + 'facebook_api_key' and 'facebook_secret'. + + When your application is set up, you can use this Mixin like this + to authenticate the user with Facebook: + + class FacebookHandler(tornado.web.RequestHandler, + tornado.auth.FacebookMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("auth_token", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authenticate_redirect() + + def _on_auth(self, user): + if not user: + raise tornado.web.HTTPError(500, "Facebook auth failed") + # Save the user using, e.g., set_secure_cookie() + + The user object returned by get_authenticated_user() includes the + attributes 'facebook_uid' and 'name' in addition to session attributes + like 'session_key'. You should save the session key with the user; it is + required to make requests on behalf of the user later with + facebook_request(). + """ + def authenticate_redirect(self, callback_uri=None, cancel_uri=None, + extended_permissions=None): + """Authenticates/installs this app for the current user.""" + self.require_setting("facebook_api_key", "Facebook Connect") + callback_uri = callback_uri or self.request.path + args = { + "api_key": self.settings["facebook_api_key"], + "v": "1.0", + "fbconnect": "true", + "display": "page", + "next": urlparse.urljoin(self.request.full_url(), callback_uri), + "return_session": "true", + } + if cancel_uri: + args["cancel_url"] = urlparse.urljoin( + self.request.full_url(), cancel_uri) + if extended_permissions: + if isinstance(extended_permissions, basestring): + extended_permissions = [extended_permissions] + args["req_perms"] = ",".join(extended_permissions) + self.redirect("http://www.facebook.com/login.php?" + + urllib.urlencode(args)) + + def authorize_redirect(self, extended_permissions, callback_uri=None, + cancel_uri=None): + """Redirects to an authorization request for the given FB resource. + + The available resource names are listed at + http://wiki.developers.facebook.com/index.php/Extended_permission. + The most common resource types include: + + publish_stream + read_stream + email + sms + + extended_permissions can be a single permission name or a list of + names. To get the session secret and session key, call + get_authenticated_user() just as you would with + authenticate_redirect(). + """ + self.authenticate_redirect(callback_uri, cancel_uri, + extended_permissions) + + def get_authenticated_user(self, callback): + """Fetches the authenticated Facebook user. + + The authenticated user includes the special Facebook attributes + 'session_key' and 'facebook_uid' in addition to the standard + user attributes like 'name'. + """ + self.require_setting("facebook_api_key", "Facebook Connect") + session = escape.json_decode(self.get_argument("session")) + self.facebook_request( + method="facebook.users.getInfo", + callback=self.async_callback( + self._on_get_user_info, callback, session), + session_key=session["session_key"], + uids=session["uid"], + fields="uid,first_name,last_name,name,locale,pic_square") + + def facebook_request(self, method, callback, **args): + """Makes a Facebook API REST request. + + We automatically include the Facebook API key and signature, but + it is the callers responsibility to include 'session_key' and any + other required arguments to the method. + + The available Facebook methods are documented here: + http://wiki.developers.facebook.com/index.php/API + + Here is an example for the stream.get() method: + + class MainHandler(tornado.web.RequestHandler, + tornado.auth.FacebookMixin): + @tornado.web.authenticated + @tornado.web.asynchronous + def get(self): + self.facebook_request( + method="stream.get", + callback=self.async_callback(self._on_stream), + session_key=self.current_user["session_key"]) + + def _on_stream(self, stream): + if stream is None: + # Not authorized to read the stream yet? + self.redirect(self.authorize_redirect("read_stream")) + return + self.render("stream.html", stream=stream) + + """ + self.require_setting("facebook_api_key", "Facebook Connect") + self.require_setting("facebook_secret", "Facebook Connect") + if not method.startswith("facebook."): + method = "facebook." + method + args["api_key"] = self.settings["facebook_api_key"] + args["v"] = "1.0" + args["method"] = method + args["call_id"] = str(long(time.time() * 1e6)) + args["format"] = "json" + args["sig"] = self._signature(args) + url = "http://api.facebook.com/restserver.php?" + \ + urllib.urlencode(args) + http = httpclient.AsyncHTTPClient() + http.fetch(url, callback=self.async_callback( + self._parse_response, callback)) + + def _on_get_user_info(self, callback, session, users): + if users is None: + callback(None) + return + callback({ + "name": users[0]["name"], + "first_name": users[0]["first_name"], + "last_name": users[0]["last_name"], + "uid": users[0]["uid"], + "locale": users[0]["locale"], + "session_key": session["session_key"], + "session_expires": session["expires"], + }) + + def _parse_response(self, callback, response): + if response.error: + logging.warning("HTTP error from Facebook: %s", response.error) + callback(None) + return + try: + json = escape.json_decode(response.body) + except: + logging.warning("Invalid JSON from Facebook: %r", response.body) + callback(None) + return + if isinstance(json, dict) and json.get("error_code"): + logging.warning("Facebook error: %d: %r", json["error_code"], + json.get("error_msg")) + callback(None) + return + callback(json) + + def _signature(self, args): + parts = ["%s=%s" % (n, args[n]) for n in sorted(args.keys())] + body = "".join(parts) + self.settings["facebook_secret"] + if isinstance(body, unicode): body = body.encode("utf-8") + return hashlib.md5(body).hexdigest() + + +def _oauth_signature(consumer_token, method, url, parameters={}, token=None): + """Calculates the HMAC-SHA1 OAuth signature for the given request. + + See http://oauth.net/core/1.0/#signing_process + """ + parts = urlparse.urlparse(url) + scheme, netloc, path = parts[:3] + normalized_url = scheme.lower() + "://" + netloc.lower() + path + + base_elems = [] + base_elems.append(method.upper()) + base_elems.append(normalized_url) + base_elems.append("&".join("%s=%s" % (k, _oauth_escape(str(v))) + for k, v in sorted(parameters.items()))) + base_string = "&".join(_oauth_escape(e) for e in base_elems) + + key_elems = [consumer_token["secret"]] + key_elems.append(token["secret"] if token else "") + key = "&".join(key_elems) + + hash = hmac.new(key, base_string, hashlib.sha1) + return binascii.b2a_base64(hash.digest())[:-1] + + +def _oauth_escape(val): + if isinstance(val, unicode): + val = val.encode("utf-8") + return urllib.quote(val, safe="~") + + +def _oauth_parse_response(body): + p = cgi.parse_qs(body, keep_blank_values=False) + token = dict(key=p["oauth_token"][0], secret=p["oauth_token_secret"][0]) + + # Add the extra parameters the Provider included to the token + special = ("oauth_token", "oauth_token_secret") + token.update((k, p[k][0]) for k in p if k not in special) + return token diff --git a/tornado/database.py b/tornado/database.py new file mode 100644 index 000000000..c9da7fddd --- /dev/null +++ b/tornado/database.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A lightweight wrapper around MySQLdb.""" + +import copy +import MySQLdb +import MySQLdb.constants +import MySQLdb.converters +import MySQLdb.cursors +import itertools + + +class Connection(object): + """A lightweight wrapper around MySQLdb DB-API connections. + + The main value we provide is wrapping rows in a dict/object so that + columns can be accessed by name. Typical usage: + + db = database.Connection("localhost", "mydatabase") + for article in db.query("SELECT * FROM articles"): + print article.title + + Cursors are hidden by the implementation, but other than that, the methods + are very similar to the DB-API. + + We explicitly set the timezone to UTC and the character encoding to + UTF-8 on all connections to avoid time zone and encoding errors. + """ + def __init__(self, host, database, user=None, password=None): + self.host = host + self.database = database + + args = dict(conv=CONVERSIONS, use_unicode=True, charset="utf8", + db=database, init_command='SET time_zone = "+0:00"', + sql_mode="TRADITIONAL") + if user is not None: + args["user"] = user + if password is not None: + args["passwd"] = password + + # We accept a path to a MySQL socket file or a host(:port) string + if "/" in host: + args["unix_socket"] = host + else: + self.socket = None + pair = host.split(":") + if len(pair) == 2: + args["host"] = pair[0] + args["port"] = int(pair[1]) + else: + args["host"] = host + args["port"] = 3306 + + self._db = None + self._db = MySQLdb.connect(**args) + self._db.autocommit(True) + + def __del__(self): + if self._db is not None: + self.close() + + def close(self): + """Closes this database connection.""" + self._db.close() + self._db = None + + def iter(self, query, *parameters): + """Returns an iterator for the given query and parameters.""" + cursor = MySQLdb.cursors.SSCursor(self._db) + try: + cursor.execute(query, parameters) + column_names = [d[0] for d in cursor.description] + for row in cursor: + yield Row(zip(column_names, row)) + finally: + cursor.close() + + def query(self, query, *parameters): + """Returns a row list for the given query and parameters.""" + cursor = self._db.cursor() + try: + cursor.execute(query, parameters) + column_names = [d[0] for d in cursor.description] + return [Row(itertools.izip(column_names, row)) for row in cursor] + finally: + cursor.close() + + def get(self, query, *parameters): + """Returns the first row returned for the given query.""" + rows = self.query(query, *parameters) + if not rows: + return None + elif len(rows) > 1: + raise Exception("Multiple rows returned for Database.get() query") + else: + return rows[0] + + def execute(self, query, *parameters): + """Executes the given query, returning the lastrowid from the query.""" + cursor = self._db.cursor() + try: + cursor.execute(query, parameters) + return cursor.lastrowid + finally: + cursor.close() + + def executemany(self, query, parameters): + """Executes the given query against all the given param sequences. + + We return the lastrowid from the query. + """ + cursor = self._db.cursor() + try: + cursor.executemany(query, parameters) + return cursor.lastrowid + finally: + cursor.close() + + +class Row(dict): + """A dict that allows for object-like property access syntax.""" + def __getattr__(self, name): + try: + return self[name] + except KeyError: + raise AttributeError(name) + + +# Fix the access conversions to properly recognize unicode/binary +FIELD_TYPE = MySQLdb.constants.FIELD_TYPE +FLAG = MySQLdb.constants.FLAG +CONVERSIONS = copy.deepcopy(MySQLdb.converters.conversions) +for field_type in \ + [FIELD_TYPE.BLOB, FIELD_TYPE.STRING, FIELD_TYPE.VAR_STRING] + \ + ([FIELD_TYPE.VARCHAR] if 'VARCHAR' in vars(FIELD_TYPE) else []): + CONVERSIONS[field_type].insert(0, (FLAG.BINARY, str)) + + +# Alias some common MySQL exceptions +IntegrityError = MySQLdb.IntegrityError +OperationalError = MySQLdb.OperationalError diff --git a/tornado/epoll.c b/tornado/epoll.c new file mode 100644 index 000000000..b87515429 --- /dev/null +++ b/tornado/epoll.c @@ -0,0 +1,112 @@ +/* + * Copyright 2009 Facebook + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include "Python.h" +#include +#include + +#define MAX_EVENTS 24 + +/* + * Simple wrapper around epoll_create. + */ +static PyObject* _epoll_create(void) { + int fd = epoll_create(MAX_EVENTS); + if (fd == -1) { + PyErr_SetFromErrno(PyExc_Exception); + return NULL; + } + + return PyInt_FromLong(fd); +} + +/* + * Simple wrapper around epoll_ctl. We throw an exception if the call fails + * rather than returning the error code since it is an infrequent (and likely + * catastrophic) event when it does happen. + */ +static PyObject* _epoll_ctl(PyObject* self, PyObject* args) { + int epfd, op, fd, events; + struct epoll_event event; + + if (!PyArg_ParseTuple(args, "iiiI", &epfd, &op, &fd, &events)) { + return NULL; + } + + memset(&event, 0, sizeof(event)); + event.events = events; + event.data.fd = fd; + if (epoll_ctl(epfd, op, fd, &event) == -1) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + Py_INCREF(Py_None); + return Py_None; +} + +/* + * Simple wrapper around epoll_wait. We return None if the call times out and + * throw an exception if an error occurs. Otherwise, we return a list of + * (fd, event) tuples. + */ +static PyObject* _epoll_wait(PyObject* self, PyObject* args) { + struct epoll_event events[MAX_EVENTS]; + int epfd, timeout, num_events, i; + PyObject* list; + PyObject* tuple; + + if (!PyArg_ParseTuple(args, "ii", &epfd, &timeout)) { + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + num_events = epoll_wait(epfd, events, MAX_EVENTS, timeout); + Py_END_ALLOW_THREADS + if (num_events == -1) { + PyErr_SetFromErrno(PyExc_Exception); + return NULL; + } + + list = PyList_New(num_events); + for (i = 0; i < num_events; i++) { + tuple = PyTuple_New(2); + PyTuple_SET_ITEM(tuple, 0, PyInt_FromLong(events[i].data.fd)); + PyTuple_SET_ITEM(tuple, 1, PyInt_FromLong(events[i].events)); + PyList_SET_ITEM(list, i, tuple); + } + return list; +} + +/* + * Our method declararations + */ +static PyMethodDef kEpollMethods[] = { + {"epoll_create", (PyCFunction)_epoll_create, METH_NOARGS, + "Create an epoll file descriptor"}, + {"epoll_ctl", _epoll_ctl, METH_VARARGS, + "Control an epoll file descriptor"}, + {"epoll_wait", _epoll_wait, METH_VARARGS, + "Wait for events on an epoll file descriptor"}, + {NULL, NULL, 0, NULL} +}; + +/* + * Module initialization + */ +PyMODINIT_FUNC initepoll(void) { + Py_InitModule("epoll", kEpollMethods); +} diff --git a/tornado/escape.py b/tornado/escape.py new file mode 100644 index 000000000..8773bf9c3 --- /dev/null +++ b/tornado/escape.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Escaping/unescaping methods for HTML, JSON, URLs, and others.""" + +import htmlentitydefs +import re +import xml.sax.saxutils +import urllib + +try: + import json + assert hasattr(json, "loads") and hasattr(json, "dumps") + _json_decode = lambda s: json.loads(s) + _json_encode = lambda v: json.dumps(v) +except: + try: + import simplejson + _json_decode = lambda s: simplejson.loads(_unicode(s)) + _json_encode = lambda v: simplejson.dumps(v) + except ImportError: + try: + # For Google AppEngine + from django.utils import simplejson + _json_decode = lambda s: simplejson.loads(_unicode(s)) + _json_encode = lambda v: simplejson.dumps(v) + except ImportError: + raise Exception("A JSON parser is required, e.g., simplejson at " + "http://pypi.python.org/pypi/simplejson/") + + +def xhtml_escape(value): + """Escapes a string so it is valid within XML or XHTML.""" + return utf8(xml.sax.saxutils.escape(value)) + + +def xhtml_unescape(value): + """Un-escapes an XML-escaped string.""" + return re.sub(r"&(#?)(\w+?);", _convert_entity, _unicode(value)) + + +def json_encode(value): + """JSON-encodes the given Python object.""" + return _json_encode(value) + + +def json_decode(value): + """Returns Python objects for the given JSON string.""" + return _json_decode(value) + + +def squeeze(value): + """Replace all sequences of whitespace chars with a single space.""" + return re.sub(r"[\x00-\x20]+", " ", value).strip() + + +def url_escape(value): + """Returns a valid URL-encoded version of the given value.""" + return urllib.quote_plus(utf8(value)) + + +def url_unescape(value): + """Decodes the given value from a URL.""" + return _unicode(urllib.unquote_plus(value)) + + +def utf8(value): + if isinstance(value, unicode): + return value.encode("utf-8") + assert isinstance(value, str) + return value + + +def _unicode(value): + if isinstance(value, str): + return value.decode("utf-8") + assert isinstance(value, unicode) + return value + + +def _convert_entity(m): + if m.group(1) == "#": + try: + return unichr(int(m.group(2))) + except ValueError: + return "&#%s;" % m.group(2) + try: + return _HTML_UNICODE_MAP[m.group(2)] + except KeyError: + return "&%s;" % m.group(2) + + +def _build_unicode_map(): + unicode_map = {} + for name, value in htmlentitydefs.name2codepoint.iteritems(): + unicode_map[name] = unichr(value) + return unicode_map + +_HTML_UNICODE_MAP = _build_unicode_map() diff --git a/tornado/httpclient.py b/tornado/httpclient.py new file mode 100644 index 000000000..92a5d1650 --- /dev/null +++ b/tornado/httpclient.py @@ -0,0 +1,437 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Blocking and non-blocking HTTP client implementations using pycurl.""" + +import calendar +import collections +import cStringIO +import email.utils +import errno +import functools +import httplib +import ioloop +import logging +import pycurl +import time + + +class HTTPClient(object): + """A blocking HTTP client backed with pycurl. + + Typical usage looks like this: + + http_client = httpclient.HTTPClient() + try: + response = http_client.fetch("http://www.google.com/") + print response.body + except httpclient.HTTPError, e: + print "Error:", e + + fetch() can take a string URL or an HTTPRequest instance, which offers + more options, like executing POST/PUT/DELETE requests. + """ + def __init__(self, max_simultaneous_connections=None): + self._curl = _curl_create(max_simultaneous_connections) + + def __del__(self): + self._curl.close() + + def fetch(self, request, **kwargs): + """Executes an HTTPRequest, returning an HTTPResponse. + + If an error occurs during the fetch, we raise an HTTPError. + """ + if not isinstance(request, HTTPRequest): + request = HTTPRequest(url=request, **kwargs) + buffer = cStringIO.StringIO() + headers = {} + try: + _curl_setup_request(self._curl, request, buffer, headers) + self._curl.perform() + code = self._curl.getinfo(pycurl.HTTP_CODE) + if code < 200 or code >= 300: + raise HTTPError(code) + effective_url = self._curl.getinfo(pycurl.EFFECTIVE_URL) + return HTTPResponse( + request=request, code=code, headers=headers, + body=buffer.getvalue(), effective_url=effective_url) + except pycurl.error, e: + raise CurlError(*e) + finally: + buffer.close() + + +class AsyncHTTPClient(object): + """An non-blocking HTTP client backed with pycurl. + + Example usage: + + import ioloop + + def handle_request(response): + if response.error: + print "Error:", response.error + else: + print response.body + ioloop.IOLoop.instance().stop() + + http_client = httpclient.AsyncHTTPClient() + http_client.fetch("http://www.google.com/", handle_request) + ioloop.IOLoop.instance().start() + + fetch() can take a string URL or an HTTPRequest instance, which offers + more options, like executing POST/PUT/DELETE requests. + + The keyword argument max_clients to the AsyncHTTPClient constructor + determines the maximum number of simultaneous fetch() operations that + can execute in parallel on each IOLoop. + """ + _ASYNC_CLIENTS = {} + + def __new__(cls, io_loop=None, max_clients=10, + max_simultaneous_connections=None): + # There is one client per IOLoop since they share curl instances + io_loop = io_loop or ioloop.IOLoop.instance() + if id(io_loop) in cls._ASYNC_CLIENTS: + return cls._ASYNC_CLIENTS[id(io_loop)] + else: + instance = super(AsyncHTTPClient, cls).__new__(cls) + instance.io_loop = io_loop + instance._multi = pycurl.CurlMulti() + instance._curls = [_curl_create(max_simultaneous_connections) + for i in xrange(max_clients)] + instance._free_list = instance._curls[:] + instance._requests = collections.deque() + instance._fds = {} + instance._events = {} + instance._added_perform_callback = False + instance._timeout = None + cls._ASYNC_CLIENTS[id(io_loop)] = instance + return instance + + def fetch(self, request, callback, **kwargs): + """Executes an HTTPRequest, calling callback with an HTTPResponse. + + If an error occurs during the fetch, the HTTPResponse given to the + callback has a non-None error attribute that contains the exception + encountered during the request. You can call response.reraise() to + throw the exception (if any) in the callback. + """ + if not isinstance(request, HTTPRequest): + request = HTTPRequest(url=request, **kwargs) + self._requests.append((request, callback)) + self._add_perform_callback() + + def _add_perform_callback(self): + if not self._added_perform_callback: + self.io_loop.add_callback(self._perform) + self._added_perform_callback = True + + def _handle_events(self, fd, events): + self._events[fd] = events + self._add_perform_callback() + + def _handle_timeout(self): + self._timeout = None + self._perform() + + def _perform(self): + self._added_perform_callback = False + + while True: + while True: + ret, num_handles = self._multi.perform() + if ret != pycurl.E_CALL_MULTI_PERFORM: + break + + # Handle completed fetches + completed = 0 + while True: + num_q, ok_list, err_list = self._multi.info_read() + for curl in ok_list: + self._finish(curl) + completed += 1 + for curl, errnum, errmsg in err_list: + self._finish(curl, errnum, errmsg) + completed += 1 + if num_q == 0: + break + + # Start fetching new URLs + started = 0 + while self._free_list and self._requests: + started += 1 + curl = self._free_list.pop() + (request, callback) = self._requests.popleft() + curl.info = { + "headers": {}, + "buffer": cStringIO.StringIO(), + "request": request, + "callback": callback, + "start_time": time.time(), + } + _curl_setup_request(curl, request, curl.info["buffer"], + curl.info["headers"]) + self._multi.add_handle(curl) + + if not started and not completed: + break + + if self._timeout is not None: + self.io_loop.remove_timeout(self._timeout) + self._timeout = None + + if num_handles: + self._timeout = self.io_loop.add_timeout( + time.time() + 0.2, self._handle_timeout) + + # Wait for more I/O + fds = {} + (readable, writable, exceptable) = self._multi.fdset() + for fd in readable: + fds[fd] = fds.get(fd, 0) | 0x1 | 0x2 + for fd in writable: + fds[fd] = fds.get(fd, 0) | 0x4 + for fd in exceptable: + fds[fd] = fds.get(fd, 0) | 0x8 | 0x10 + + for fd in self._fds: + if fd not in fds: + self.io_loop.remove_handler(fd) + + for fd, events in fds.iteritems(): + old_events = self._fds.get(fd, None) + if old_events is None: + self.io_loop.add_handler(fd, self._handle_events, events) + elif old_events != events: + try: + self.io_loop.update_handler(fd, events) + except OSError, e: + if e[0] == errno.ENOENT: + self.io_loop.add_handler(fd, self._handle_events, + events) + else: + raise + self._fds = fds + + def _finish(self, curl, curl_error=None, curl_message=None): + info = curl.info + curl.info = None + self._multi.remove_handle(curl) + self._free_list.append(curl) + if curl_error: + error = CurlError(curl_error, curl_message) + code = error.code + body = None + effective_url = None + else: + error = None + code = curl.getinfo(pycurl.HTTP_CODE) + body = info["buffer"].getvalue() + effective_url = curl.getinfo(pycurl.EFFECTIVE_URL) + info["buffer"].close() + info["callback"](HTTPResponse( + request=info["request"], code=code, headers=info["headers"], + body=body, effective_url=effective_url, error=error, + request_time=time.time() - info["start_time"])) + + +class HTTPRequest(object): + def __init__(self, url, method="GET", headers={}, body=None, + auth_username=None, auth_password=None, + connect_timeout=None, request_timeout=None, + if_modified_since=None, follow_redirects=True, + max_redirects=5, user_agent=None, use_gzip=True, + network_interface=None): + if if_modified_since: + timestamp = calendar.timegm(if_modified_since.utctimetuple()) + headers["If-Modified-Since"] = email.utils.formatdate( + timestamp, localtime=False, usegmt=True) + if "Pragma" not in headers: + headers["Pragma"] = "" + self.url = _utf8(url) + self.method = method + self.headers = headers + self.body = body + self.auth_username = _utf8(auth_username) + self.auth_password = _utf8(auth_password) + self.connect_timeout = connect_timeout or 20.0 + self.request_timeout = request_timeout or 20.0 + self.follow_redirects = follow_redirects + self.max_redirects = max_redirects + self.user_agent = user_agent + self.use_gzip = use_gzip + self.network_interface = network_interface + + +class HTTPResponse(object): + def __init__(self, request, code, headers={}, body="", effective_url=None, + error=None, request_time=None): + self.request = request + self.code = code + self.headers = headers + self.body = body + if effective_url is None: + self.effective_url = request.url + else: + self.effective_url = effective_url + if error is None: + if self.code < 200 or self.code >= 300: + self.error = HTTPError(self.code) + else: + self.error = None + else: + self.error = error + self.request_time = request_time + + def rethrow(self): + if self.error: + raise self.error + + def __repr__(self): + args = ",".join("%s=%r" % i for i in self.__dict__.iteritems()) + return "%s(%s)" % (self.__class__.__name__, args) + + +class HTTPError(Exception): + def __init__(self, code, message=None): + self.code = code + Exception.__init__(self, "HTTP %d" % self.code) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.code) + + def __str__(self): + return "%d: %s" % (self.code, httplib.responses.get( + self.code, "Unknown")) + + +class CurlError(HTTPError): + def __init__(self, errno, message): + HTTPError.__init__(self, 599, message) + self.errno = errno + + def __repr__(self): + return "%s(%r,%r)" % (self.__class__.__name__, self.errno, + self.message) + + def __str__(self): + return "%d: %s" % (self.errno, self.message) + + +def _curl_create(max_simultaneous_connections=None): + curl = pycurl.Curl() + if logging.getLogger().isEnabledFor(logging.DEBUG): + curl.setopt(pycurl.VERBOSE, 1) + curl.setopt(pycurl.DEBUGFUNCTION, _curl_debug) + curl.setopt(pycurl.MAXCONNECTS, max_simultaneous_connections or 5) + return curl + + +def _curl_setup_request(curl, request, buffer, headers): + curl.setopt(pycurl.URL, request.url) + curl.setopt(pycurl.HTTPHEADER, + ["%s: %s" % i for i in request.headers.iteritems()]) + try: + curl.setopt(pycurl.HEADERFUNCTION, + functools.partial(_curl_header_callback, headers)) + except: + # Old version of curl; response will not include headers + pass + curl.setopt(pycurl.WRITEFUNCTION, buffer.write) + curl.setopt(pycurl.FOLLOWLOCATION, request.follow_redirects) + curl.setopt(pycurl.MAXREDIRS, request.max_redirects) + curl.setopt(pycurl.CONNECTTIMEOUT, int(request.connect_timeout)) + curl.setopt(pycurl.TIMEOUT, int(request.request_timeout)) + if request.user_agent: + curl.setopt(pycurl.USERAGENT, request.user_agent) + else: + curl.setopt(pycurl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)") + if request.network_interface: + curl.setopt(pycurl.INTERFACE, request.network_interface) + if request.use_gzip: + curl.setopt(pycurl.ENCODING, "gzip,deflate") + else: + curl.setopt(pycurl.ENCODING, "none") + + # Set the request method through curl's retarded interface which makes + # up names for every single method + curl_options = { + "GET": pycurl.HTTPGET, + "POST": pycurl.POST, + "PUT": pycurl.UPLOAD, + "HEAD": pycurl.NOBODY, + } + for o in curl_options.values(): + curl.setopt(o, False) + curl.setopt(curl_options[request.method], True) + + # Handle curl's cryptic options for every individual HTTP method + if request.method in ("POST", "PUT"): + request_buffer = cStringIO.StringIO(request.body) + curl.setopt(pycurl.READFUNCTION, request_buffer.read) + if request.method == "POST": + def ioctl(cmd): + if cmd == curl.IOCMD_RESTARTREAD: + request_buffer.seek(0) + curl.setopt(pycurl.IOCTLFUNCTION, ioctl) + curl.setopt(pycurl.POSTFIELDSIZE, len(request.body)) + else: + curl.setopt(pycurl.INFILESIZE, len(request.body)) + + if request.auth_username and request.auth_password: + userpwd = "%s:%s" % (request.auth_username, request.auth_password) + curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) + curl.setopt(pycurl.USERPWD, userpwd) + logging.info("%s %s (username: %r)", request.method, request.url, + request.auth_username) + else: + curl.unsetopt(pycurl.USERPWD) + logging.info("%s %s", request.method, request.url) + + +def _curl_header_callback(headers, header_line): + if header_line.startswith("HTTP/"): + headers.clear() + return + if header_line == "\r\n": + return + parts = header_line.split(": ") + if len(parts) != 2: + logging.warning("Invalid HTTP response header line %r", header_line) + return + headers[parts[0].strip()] = parts[1].strip() + + +def _curl_debug(debug_type, debug_msg): + debug_types = ('I', '<', '>', '<', '>') + if debug_type == 0: + logging.debug('%s', debug_msg.strip()) + elif debug_type in (1, 2): + for line in debug_msg.splitlines(): + logging.debug('%s %s', debug_types[debug_type], line) + elif debug_type == 4: + logging.debug('%s %r', debug_types[debug_type], debug_msg) + + +def _utf8(value): + if value is None: + return value + if isinstance(value, unicode): + return value.encode("utf-8") + assert isinstance(value, str) + return value diff --git a/tornado/httpserver.py b/tornado/httpserver.py new file mode 100644 index 000000000..0f1d8e00f --- /dev/null +++ b/tornado/httpserver.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A non-blocking, single-threaded HTTP server.""" + +import cgi +import errno +import fcntl +import functools +import ioloop +import iostream +import logging +import socket +import time +import urlparse + + +class HTTPServer(object): + """A non-blocking, single-threaded HTTP server. + + A server is defined by a request callback that takes an HTTPRequest + instance as an argument and writes a valid HTTP response with + request.write(). request.finish() finishes the request (but does not + necessarily close the connection in the case of HTTP/1.1 keep-alive + requests). A simple example server that echoes back the URI you + requested: + + import httpserver + import ioloop + + def handle_request(request): + message = "You requested %s\n" % request.uri + request.write("HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" % ( + len(message), message)) + request.finish() + + http_server = httpserver.HTTPServer(handle_request) + http_server.listen(8888) + ioloop.IOLoop.instance().start() + + HTTPServer is a very basic connection handler. Beyond parsing the + HTTP request body and headers, the only HTTP semantics implemented + in HTTPServer is HTTP/1.1 keep-alive connections. We do not, however, + implement chunked encoding, so the request callback must provide a + Content-Length header or implement chunked encoding for HTTP/1.1 + requests for the server to run correctly for HTTP/1.1 clients. If + the request handler is unable to do this, you can provide the + no_keep_alive argument to the HTTPServer constructor, which will + ensure the connection is closed on every request no matter what HTTP + version the client is using. + + If xheaders is True, we support the X-Real-Ip and X-Scheme headers, + which override the remote IP and HTTP scheme for all requests. These + headers are useful when running Tornado behind a reverse proxy or + load balancer. + """ + def __init__(self, request_callback, no_keep_alive=False, io_loop=None, + xheaders=False): + self.request_callback = request_callback + self.no_keep_alive = no_keep_alive + self.io_loop = io_loop or ioloop.IOLoop.instance() + self.xheaders = xheaders + self._socket = None + + def listen(self, port): + assert not self._socket + self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) + flags = fcntl.fcntl(self._socket.fileno(), fcntl.F_GETFD) + flags |= fcntl.FD_CLOEXEC + fcntl.fcntl(self._socket.fileno(), fcntl.F_SETFD, flags) + self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self._socket.setblocking(0) + self._socket.bind(("", port)) + self._socket.listen(128) + self.io_loop.add_handler(self._socket.fileno(), self._handle_events, + self.io_loop.READ) + + def _handle_events(self, fd, events): + while True: + try: + connection, address = self._socket.accept() + except socket.error, e: + if e[0] in (errno.EWOULDBLOCK, errno.EAGAIN): + return + raise + try: + stream = iostream.IOStream(connection, io_loop=self.io_loop) + HTTPConnection(stream, address, self.request_callback, + self.no_keep_alive, self.xheaders) + except: + logging.error("Error in connection callback", exc_info=True) + + +class HTTPConnection(object): + """Handles a connection to an HTTP client, executing HTTP requests. + + We parse HTTP headers and bodies, and execute the request callback + until the HTTP conection is closed. + """ + def __init__(self, stream, address, request_callback, no_keep_alive=False, + xheaders=False): + self.stream = stream + self.address = address + self.request_callback = request_callback + self.no_keep_alive = no_keep_alive + self.xheaders = xheaders + self._request = None + self._request_finished = False + self.stream.read_until("\r\n\r\n", self._on_headers) + + def write(self, chunk): + assert self._request, "Request closed" + self.stream.write(chunk, self._on_write_complete) + + def finish(self): + assert self._request, "Request closed" + self._request_finished = True + if not self.stream.writing(): + self._finish_request() + + def _on_write_complete(self): + if self._request_finished: + self._finish_request() + + def _finish_request(self): + if self.no_keep_alive: + disconnect = True + else: + connection_header = self._request.headers.get("Connection") + if self._request.supports_http_1_1(): + disconnect = connection_header == "close" + elif ("Content-Length" in self._request.headers + or self._request.method in ("HEAD", "GET")): + disconnect = connection_header != "Keep-Alive" + else: + disconnect = True + self._request = None + self._request_finished = False + if disconnect: + self.stream.close() + return + self.stream.read_until("\r\n\r\n", self._on_headers) + + def _on_headers(self, data): + eol = data.find("\r\n") + start_line = data[:eol] + method, uri, version = start_line.split(" ") + if not version.startswith("HTTP/"): + raise Exception("Malformed HTTP version in HTTP Request-Line") + headers = HTTPHeaders.parse(data[eol:]) + self._request = HTTPRequest( + connection=self, method=method, uri=uri, version=version, + headers=headers, remote_ip=self.address[0]) + + content_length = headers.get("Content-Length") + if content_length: + content_length = int(content_length) + if content_length > self.stream.max_buffer_size: + raise Exception("Content-Length too long") + if headers.get("Expect") == "100-continue": + self.stream.write("HTTP/1.1 100 (Continue)\r\n\r\n") + self.stream.read_bytes(content_length, self._on_request_body) + return + + self.request_callback(self._request) + + def _on_request_body(self, data): + self._request.body = data + content_type = self._request.headers.get("Content-Type", "") + if self._request.method == "POST": + if content_type.startswith("application/x-www-form-urlencoded"): + arguments = cgi.parse_qs(self._request.body) + for name, values in arguments.iteritems(): + values = [v for v in values if v] + if values: + self._request.arguments.setdefault(name, []).extend( + values) + elif content_type.startswith("multipart/form-data"): + boundary = content_type[30:] + if boundary: self._parse_mime_body(boundary, data) + self.request_callback(self._request) + + def _parse_mime_body(self, boundary, data): + if data.endswith("\r\n"): + footer_length = len(boundary) + 6 + else: + footer_length = len(boundary) + 4 + parts = data[:-footer_length].split("--" + boundary + "\r\n") + for part in parts: + if not part: continue + eoh = part.find("\r\n\r\n") + if eoh == -1: + logging.warning("multipart/form-data missing headers") + continue + headers = HTTPHeaders.parse(part[:eoh]) + name_header = headers.get("Content-Disposition", "") + if not name_header.startswith("form-data;") or \ + not part.endswith("\r\n"): + logging.warning("Invalid multipart/form-data") + continue + value = part[eoh + 4:-2] + name_values = {} + for name_part in name_header[10:].split(";"): + name, name_value = name_part.strip().split("=", 1) + name_values[name] = name_value.strip('"').decode("utf-8") + if not name_values.get("name"): + logging.warning("multipart/form-data value missing name") + continue + name = name_values["name"] + if name_values.get("filename"): + ctype = headers.get("Content-Type", "application/unknown") + self._request.files.setdefault(name, []).append(dict( + filename=name_values["filename"], body=value, + content_type=ctype)) + else: + self._request.arguments.setdefault(name, []).append(value) + + +class HTTPRequest(object): + """A single HTTP request. + + GET/POST arguments are available in the arguments property, which + maps arguments names to lists of values (to support multiple values + for individual names). Names and values are both unicode always. + + File uploads are available in the files property, which maps file + names to list of files. Each file is a dictionary of the form + {"filename":..., "content_type":..., "body":...}. The content_type + comes from the provided HTTP header and should not be trusted + outright given that it can be easily forged. + + An HTTP request is attached to a single HTTP connection, which can + be accessed through the "connection" attribute. Since connections + are typically kept open in HTTP/1.1, multiple requests can be handled + sequentially on a single connection. + """ + def __init__(self, method, uri, version="HTTP/1.0", headers=None, + body=None, remote_ip=None, protocol=None, host=None, + files=None, connection=None): + self.method = method + self.uri = uri + self.version = version + self.headers = headers or HTTPHeaders() + self.body = body or "" + if connection and connection.xheaders: + self.remote_ip = headers.get("X-Real-Ip", remote_ip) + self.protocol = headers.get("X-Scheme", protocol) or "http" + else: + self.remote_ip = remote_ip + self.protocol = protocol or "http" + self.host = host or headers.get("Host") or "127.0.0.1" + self.files = files or {} + self.connection = connection + self._start_time = time.time() + self._finish_time = None + + scheme, netloc, path, query, fragment = urlparse.urlsplit(uri) + self.path = path + self.query = query + arguments = cgi.parse_qs(query) + self.arguments = {} + for name, values in arguments.iteritems(): + values = [v for v in values if v] + if values: self.arguments[name] = values + + def supports_http_1_1(self): + """Returns True if this request supports HTTP/1.1 semantics""" + return self.version == "HTTP/1.1" + + def write(self, chunk): + """Writes the given chunk to the response stream.""" + assert isinstance(chunk, str) + self.connection.write(chunk) + + def finish(self): + """Finishes this HTTP request on the open connection.""" + self.connection.finish() + self._finish_time = time.time() + + def full_url(self): + """Reconstructs the full URL for this request.""" + return self.protocol + "://" + self.host + self.uri + + def request_time(self): + """Returns the amount of time it took for this request to execute.""" + if self._finish_time is None: + return time.time() - self._start_time + else: + return self._finish_time - self._start_time + + def __repr__(self): + attrs = ("protocol", "host", "method", "uri", "version", "remote_ip", + "remote_ip", "body") + args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs]) + return "%s(%s, headers=%s)" % ( + self.__class__.__name__, args, dict(self.headers)) + + +class HTTPHeaders(dict): + """A dictionary that maintains Http-Header-Case for all keys.""" + def __setitem__(self, name, value): + dict.__setitem__(self, self._normalize_name(name), value) + + def __getitem__(self, name): + return dict.__getitem__(self, self._normalize_name(name)) + + def _normalize_name(self, name): + return intern("-".join([w.capitalize() for w in name.split("-")])) + + @classmethod + def parse(cls, headers_string): + headers = cls() + for line in headers_string.splitlines(): + if line: + name, value = line.split(": ", 1) + headers[name] = value + return headers diff --git a/tornado/ioloop.py b/tornado/ioloop.py new file mode 100644 index 000000000..8bbd8dced --- /dev/null +++ b/tornado/ioloop.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A level-triggered I/O loop for non-blocking sockets.""" + +import bisect +import errno +import fcntl +import logging +import os +import select +import time + + +class IOLoop(object): + """A level-triggered I/O loop. + + We use epoll if it is available, or else we fall back on select(). If + you are implementing a system that needs to handle 1000s of simultaneous + connections, you should use Linux and either compile our epoll module or + use Python 2.6+ to get epoll support. + + Example usage for a simple TCP server: + + import errno + import functools + import ioloop + import socket + + def connection_ready(sock, fd, events): + while True: + try: + connection, address = sock.accept() + except socket.error, e: + if e[0] not in (errno.EWOULDBLOCK, errno.EAGAIN): + raise + return + connection.setblocking(0) + handle_connection(connection, address) + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.setblocking(0) + sock.bind(("", port)) + sock.listen(128) + + io_loop = ioloop.IOLoop.instance() + callback = functools.partial(connection_ready, sock) + io_loop.add_handler(sock.fileno(), callback, io_loop.READ) + io_loop.start() + + """ + # Constants from the epoll module + _EPOLLIN = 0x001 + _EPOLLPRI = 0x002 + _EPOLLOUT = 0x004 + _EPOLLERR = 0x008 + _EPOLLHUP = 0x010 + _EPOLLRDHUP = 0x2000 + _EPOLLONESHOT = (1 << 30) + _EPOLLET = (1 << 31) + + # Our events map exactly to the epoll events + NONE = 0 + READ = _EPOLLIN + WRITE = _EPOLLOUT + ERROR = _EPOLLERR | _EPOLLHUP | _EPOLLRDHUP + + def __init__(self, impl=None): + self._impl = impl or _poll() + self._handlers = {} + self._events = {} + self._callbacks = set() + self._timeouts = [] + self._running = False + + # Create a pipe that we send bogus data to when we want to wake + # the I/O loop when it is idle + r, w = os.pipe() + self._set_nonblocking(r) + self._set_nonblocking(w) + self._waker_reader = os.fdopen(r, "r", 0) + self._waker_writer = os.fdopen(w, "w", 0) + self.add_handler(r, self._read_waker, self.WRITE) + + @classmethod + def instance(cls): + """Returns a global IOLoop instance. + + Most single-threaded applications have a single, global IOLoop. + Use this method instead of passing around IOLoop instances + throughout your code. + + A common pattern for classes that depend on IOLoops is to use + a default argument to enable programs with multiple IOLoops + but not require the argument for simpler applications: + + class MyClass(object): + def __init__(self, io_loop=None): + self.io_loop = io_loop or IOLoop.instance() + """ + if not hasattr(cls, "_instance"): + cls._instance = cls() + return cls._instance + + def add_handler(self, fd, handler, events): + """Registers the given handler to receive the given events for fd.""" + self._handlers[fd] = handler + self._impl.register(fd, events | self.ERROR) + + def update_handler(self, fd, events): + """Changes the events we listen for fd.""" + self._impl.modify(fd, events | self.ERROR) + + def remove_handler(self, fd): + """Stop listening for events on fd.""" + self._handlers.pop(fd, None) + self._events.pop(fd, None) + try: + self._impl.unregister(fd) + except OSError: + logging.debug("Error deleting fd from IOLoop", exc_info=True) + + def start(self): + """Starts the I/O loop. + + The loop will run until one of the I/O handlers calls stop(), which + will make the loop stop after the current event iteration completes. + """ + self._running = True + while True: + # Never use an infinite timeout here - it can stall epoll + poll_timeout = 0.2 + + # Prevent IO event starvation by delaying new callbacks + # to the next iteration of the event loop. + callbacks = list(self._callbacks) + for callback in callbacks: + # A callback can add or remove other callbacks + if callback in self._callbacks: + self._callbacks.remove(callback) + self._run_callback(callback) + + if self._callbacks: + poll_timeout = 0.0 + + if self._timeouts: + now = time.time() + while self._timeouts and self._timeouts[0].deadline <= now: + timeout = self._timeouts.pop(0) + self._run_callback(timeout.callback) + if self._timeouts: + milliseconds = self._timeouts[0].deadline - now + poll_timeout = min(milliseconds, poll_timeout) + + if not self._running: + break + + try: + event_pairs = self._impl.poll(poll_timeout) + except Exception, e: + if e.args == (4, "Interrupted system call"): + logging.warning("Interrupted system call", exc_info=1) + continue + else: + raise + + # Pop one fd at a time from the set of pending fds and run + # its handler. Since that handler may perform actions on + # other file descriptors, there may be reentrant calls to + # this IOLoop that update self._events + self._events.update(event_pairs) + while self._events: + fd, events = self._events.popitem() + try: + self._handlers[fd](fd, events) + except KeyboardInterrupt: + raise + except OSError, e: + if e[0] == errno.EPIPE: + # Happens when the client closes the connection + pass + else: + logging.error("Exception in I/O handler for fd %d", + fd, exc_info=True) + except: + logging.error("Exception in I/O handler for fd %d", + fd, exc_info=True) + + def stop(self): + """Stop the loop after the current event loop iteration is complete.""" + self._running = False + self._wake() + + def running(self): + """Returns true if this IOLoop is currently running.""" + return self._running + + def add_timeout(self, deadline, callback): + """Calls the given callback at the time deadline from the I/O loop.""" + timeout = _Timeout(deadline, callback) + bisect.insort(self._timeouts, timeout) + return timeout + + def remove_timeout(self, timeout): + self._timeouts.remove(timeout) + + def add_callback(self, callback): + """Calls the given callback on the next I/O loop iteration.""" + self._callbacks.add(callback) + self._wake() + + def remove_callback(self, callback): + """Removes the given callback from the next I/O loop iteration.""" + self._callbacks.pop(callback) + + def _wake(self): + try: + self._waker_writer.write("x") + except IOError: + pass + + def _run_callback(self, callback): + try: + callback() + except (KeyboardInterrupt, SystemExit): + raise + except: + logging.error("Exception in callback %r", callback, exc_info=True) + + def _read_waker(self, fd, events): + try: + while True: + self._waker_reader.read() + except IOError: + pass + + def _set_nonblocking(self, fd): + flags = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK) + + +class _Timeout(object): + """An IOLoop timeout, a UNIX timestamp and a callback""" + def __init__(self, deadline, callback): + self.deadline = deadline + self.callback = callback + + def __cmp__(self, other): + return cmp((self.deadline, id(self.callback)), + (other.deadline, id(other.callback))) + + +class _EPoll(object): + """An epoll-based event loop using our C module for Python 2.5 systems""" + _EPOLL_CTL_ADD = 1 + _EPOLL_CTL_DEL = 2 + _EPOLL_CTL_MOD = 3 + + def __init__(self): + self._epoll_fd = epoll.epoll_create() + + def register(self, fd, events): + epoll.epoll_ctl(self._epoll_fd, self._EPOLL_CTL_ADD, fd, events) + + def modify(self, fd, events): + epoll.epoll_ctl(self._epoll_fd, self._EPOLL_CTL_MOD, fd, events) + + def unregister(self, fd): + epoll.epoll_ctl(self._epoll_fd, self._EPOLL_CTL_DEL, fd, 0) + + def poll(self, timeout): + return epoll.epoll_wait(self._epoll_fd, int(timeout * 1000)) + + +class _Select(object): + """A simple, select()-based IOLoop implementation for non-Linux systems""" + def __init__(self): + self.read_fds = set() + self.write_fds = set() + self.error_fds = set() + self.fd_sets = (self.read_fds, self.write_fds, self.error_fds) + + def register(self, fd, events): + if events & IOLoop.READ: self.read_fds.add(fd) + if events & IOLoop.WRITE: self.write_fds.add(fd) + if events & IOLoop.ERROR: self.error_fds.add(fd) + + def modify(self, fd, events): + self.unregister(fd) + self.register(fd, events) + + def unregister(self, fd): + self.read_fds.discard(fd) + self.write_fds.discard(fd) + self.error_fds.discard(fd) + + def poll(self, timeout): + readable, writeable, errors = select.select( + self.read_fds, self.write_fds, self.error_fds, timeout) + events = {} + for fd in readable: + events[fd] = events.get(fd, 0) | IOLoop.READ + for fd in writeable: + events[fd] = events.get(fd, 0) | IOLoop.WRITE + for fd in errors: + events[fd] = events.get(fd, 0) | IOLoop.ERROR + return events.items() + + +# Choose a poll implementation. Use epoll if it is available, fall back to +# select() for non-Linux platforms +if hasattr(select, "epoll"): + # Python 2.6+ on Linux + _poll = select.epoll +else: + try: + # Linux systems with our C module installed + import epoll + _poll = _EPoll + except: + # All other systems + import sys + if "linux" in sys.platform: + logging.warning("epoll module not found; using select()") + _poll = _Select diff --git a/tornado/iostream.py b/tornado/iostream.py new file mode 100644 index 000000000..20d017ffe --- /dev/null +++ b/tornado/iostream.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A utility class to write to and read from a non-blocking socket.""" + +import errno +import ioloop +import logging +import socket + + +class IOStream(object): + """A utility class to write to and read from a non-blocking socket. + + We support three methods: write(), read_until(), and read_bytes(). + All of the methods take callbacks (since writing and reading are + non-blocking and asynchronous). read_until() reads the socket until + a given delimiter, and read_bytes() reads until a specified number + of bytes have been read from the socket. + + A very simple (and broken) HTTP client using this class: + + import ioloop + import iostream + import socket + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) + s.connect(("friendfeed.com", 80)) + stream = IOStream(s) + + def on_headers(data): + headers = {} + for line in data.split("\r\n"): + parts = line.split(":") + if len(parts) == 2: + headers[parts[0].strip()] = parts[1].strip() + stream.read_bytes(int(headers["Content-Length"]), on_body) + + def on_body(data): + print data + stream.close() + ioloop.IOLoop.instance().stop() + + stream.write("GET / HTTP/1.0\r\n\r\n") + stream.read_until("\r\n\r\n", on_headers) + ioloop.IOLoop.instance().start() + + """ + def __init__(self, socket, io_loop=None, max_buffer_size=104857600, + read_chunk_size=4096): + self.socket = socket + self.socket.setblocking(False) + self.io_loop = io_loop or ioloop.IOLoop.instance() + self.max_buffer_size = max_buffer_size + self.read_chunk_size = read_chunk_size + self._read_buffer = "" + self._write_buffer = "" + self._read_delimiter = None + self._read_bytes = None + self._read_callback = None + self._write_callback = None + self._close_callback = None + self._state = self.io_loop.ERROR + self.io_loop.add_handler( + self.socket.fileno(), self._handle_events, self._state) + + def read_until(self, delimiter, callback): + """Call callback when we read the given delimiter.""" + assert not self._read_callback, "Already reading" + loc = self._read_buffer.find(delimiter) + if loc != -1: + callback(self._consume(loc + len(delimiter))) + return + self._check_closed() + self._read_delimiter = delimiter + self._read_callback = callback + self._add_io_state(self.io_loop.READ) + + def read_bytes(self, num_bytes, callback): + """Call callback when we read the given number of bytes.""" + assert not self._read_callback, "Already reading" + if len(self._read_buffer) >= num_bytes: + callback(self._consume(num_bytes)) + return + self._check_closed() + self._read_bytes = num_bytes + self._read_callback = callback + self._add_io_state(self.io_loop.READ) + + def write(self, data, callback=None): + """Write the given data to this stream. + + If callback is given, we call it when all of the buffered write + data has been successfully written to the stream. If there was + previously buffered write data and an old write callback, that + callback is simply overwritten with this new callback. + """ + self._check_closed() + self._write_buffer += data + self._add_io_state(self.io_loop.WRITE) + self._write_callback = callback + + def set_close_callback(self, callback): + """Call the given callback when the stream is closed.""" + self._close_callback = callback + + def close(self): + """Close this stream.""" + if self.socket is not None: + self.io_loop.remove_handler(self.socket.fileno()) + self.socket.close() + self.socket = None + if self._close_callback: self._close_callback() + + def reading(self): + """Returns true if we are currently reading from the stream.""" + return self._read_callback is not None + + def writing(self): + """Returns true if we are currently writing to the stream.""" + return len(self._write_buffer) > 0 + + def closed(self): + return self.socket is None + + def _handle_events(self, fd, events): + if not self.socket: + logging.warning("Got events for closed stream %d", fd) + return + if events & self.io_loop.READ: + self._handle_read() + if not self.socket: + return + if events & self.io_loop.WRITE: + self._handle_write() + if not self.socket: + return + if events & self.io_loop.ERROR: + self.close() + return + state = self.io_loop.ERROR + if self._read_delimiter or self._read_bytes: + state |= self.io_loop.READ + if self._write_buffer: + state |= self.io_loop.WRITE + if state != self._state: + self._state = state + self.io_loop.update_handler(self.socket.fileno(), self._state) + + def _handle_read(self): + try: + chunk = self.socket.recv(self.read_chunk_size) + except socket.error, e: + if e[0] in (errno.EWOULDBLOCK, errno.EAGAIN): + return + else: + logging.warning("Read error on %d: %s", + self.socket.fileno(), e) + self.close() + return + if not chunk: + self.close() + return + self._read_buffer += chunk + if len(self._read_buffer) >= self.max_buffer_size: + logging.error("Reached maximum read buffer size") + self.close() + return + if self._read_bytes: + if len(self._read_buffer) >= self._read_bytes: + num_bytes = self._read_bytes + callback = self._read_callback + self._read_callback = None + self._read_bytes = None + callback(self._consume(num_bytes)) + elif self._read_delimiter: + loc = self._read_buffer.find(self._read_delimiter) + if loc != -1: + callback = self._read_callback + delimiter_len = len(self._read_delimiter) + self._read_callback = None + self._read_delimiter = None + callback(self._consume(loc + delimiter_len)) + + def _handle_write(self): + while self._write_buffer: + try: + num_bytes = self.socket.send(self._write_buffer) + self._write_buffer = self._write_buffer[num_bytes:] + except socket.error, e: + if e[0] in (errno.EWOULDBLOCK, errno.EAGAIN): + break + else: + logging.warning("Read error on %d: %s", + self.socket.fileno(), e) + self.close() + return + if not self._write_buffer and self._write_callback: + callback = self._write_callback + self._write_callback = None + callback() + + def _consume(self, loc): + result = self._read_buffer[:loc] + self._read_buffer = self._read_buffer[loc:] + return result + + def _check_closed(self): + if not self.socket: + raise IOError("Stream is closed") + + def _add_io_state(self, state): + if not self._state & state: + self._state = self._state | state + self.io_loop.update_handler(self.socket.fileno(), self._state) diff --git a/tornado/locale.py b/tornado/locale.py new file mode 100644 index 000000000..404b2719c --- /dev/null +++ b/tornado/locale.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Translation methods for generating localized strings. + +To load a locale and generate a translated string: + + user_locale = locale.get("es_LA") + print user_locale.translate("Sign out") + +locale.get() returns the closest matching locale, not necessarily the +specific locale you requested. You can support pluralization with +additional arguments to translate(), e.g.: + + people = [...] + message = user_locale.translate( + "%(list)s is online", "%(list)s are online", len(people)) + print message % {"list": user_locale.list(people)} + +The first string is chosen if len(people) == 1, otherwise the second +string is chosen. +""" + +import csv +import datetime +import logging +import os +import os.path +import re + +_default_locale = "en_US" +_translations = {} +_supported_locales = frozenset([_default_locale]) + + +def get(*locale_codes): + """Returns the closest match for the given locale codes. + + We iterate over all given locale codes in order. If we have a tight + or a loose match for the code (e.g., "en" for "en_US"), we return + the locale. Otherwise we move to the next code in the list. + + By default we return en_US if no translations are found for any of + the specified locales. You can change the default locale with + set_default_locale() below. + """ + return Locale.get_closest(*locale_codes) + + +def set_default_locale(code): + """Sets the default locale, used in get_closest_locale(). + + The default locale is assumed to be the language used for all strings + in the system. The translations loaded from disk are mappings from + the default locale to the destination locale. Consequently, you don't + need to create a translation file for the default locale. + """ + global _default_locale + global _supported_locales + _default_locale = code + _supported_locales = frozenset(_translations.keys() + [_default_locale]) + + +def load_translations(directory): + """Loads translations from CSV files in a directory. + + Translations are strings with optional Python-style named placeholders + (e.g., "My name is %(name)s") and their associated translations. + + The directory should have translation files of the form LOCALE.csv, + e.g. es_GT.csv. The CSV files should have two or three columns: string, + translation, and an optional plural indicator. Plural indicators should + be one of "plural" or "singular". A given string can have both singular + and plural forms. For example "%(name)s liked this" may have a + different verb conjugation depending on whether %(name)s is one + name or a list of names. There should be two rows in the CSV file for + that string, one with plural indicator "singular", and one "plural". + For strings with no verbs that would change on translation, simply + use "unknown" or the empty string (or don't include the column at all). + + Example translation es_LA.csv: + + "I love you","Te amo" + "%(name)s liked this","A %(name)s les gust\xf3 esto","plural" + "%(name)s liked this","A %(name)s le gust\xf3 esto","singular" + + """ + global _translations + global _supported_locales + _translations = {} + for path in os.listdir(directory): + if not path.endswith(".csv"): continue + locale, extension = path.split(".") + if locale not in LOCALE_NAMES: + logging.error("Unrecognized locale %r (path: %s)", locale, + os.path.join(directory, path)) + continue + f = open(os.path.join(directory, path), "r") + _translations[locale] = {} + for i, row in enumerate(csv.reader(f)): + if not row or len(row) < 2: continue + row = [c.decode("utf-8").strip() for c in row] + english, translation = row[:2] + if len(row) > 2: + plural = row[2] or "unknown" + else: + plural = "unknown" + if plural not in ("plural", "singular", "unknown"): + logging.error("Unrecognized plural indicator %r in %s line %d", + plural, path, i + 1) + continue + _translations[locale].setdefault(plural, {})[english] = translation + f.close() + _supported_locales = frozenset(_translations.keys() + [_default_locale]) + logging.info("Supported locales: %s", sorted(_supported_locales)) + + +def get_supported_locales(cls): + """Returns a list of all the supported locale codes.""" + return _supported_locales + + +class Locale(object): + @classmethod + def get_closest(cls, *locale_codes): + """Returns the closest match for the given locale code.""" + for code in locale_codes: + if not code: continue + code = code.replace("-", "_") + parts = code.split("_") + if len(parts) > 2: + continue + elif len(parts) == 2: + code = parts[0].lower() + "_" + parts[1].upper() + if code in _supported_locales: + return cls.get(code) + if parts[0].lower() in _supported_locales: + return cls.get(parts[0].lower()) + return cls.get(_default_locale) + + @classmethod + def get(cls, code): + """Returns the Locale for the given locale code. + + If it is not supported, we raise an exception. + """ + if not hasattr(cls, "_cache"): + cls._cache = {} + if code not in cls._cache: + assert code in _supported_locales + translations = _translations.get(code, {}) + cls._cache[code] = Locale(code, translations) + return cls._cache[code] + + def __init__(self, code, translations): + self.code = code + self.name = LOCALE_NAMES.get(code, {}).get("name", u"Unknown") + self.rtl = False + for prefix in ["fa", "ar", "he"]: + if self.code.startswith(prefix): + self.rtl = True + break + self.translations = translations + + # Initialize strings for date formatting + _ = self.translate + self._months = [ + _("January"), _("February"), _("March"), _("April"), + _("May"), _("June"), _("July"), _("August"), + _("September"), _("October"), _("November"), _("December")] + self._weekdays = [ + _("Monday"), _("Tuesday"), _("Wednesday"), _("Thursday"), + _("Friday"), _("Saturday"), _("Sunday")] + + def translate(self, message, plural_message=None, count=None): + """Returns the translation for the given message for this locale. + + If plural_message is given, you must also provide count. We return + plural_message when count != 1, and we return the singular form + for the given message when count == 1. + """ + if plural_message is not None: + assert count is not None + if count != 1: + message = plural_message + message_dict = self.translations.get("plural", {}) + else: + message_dict = self.translations.get("singular", {}) + else: + message_dict = self.translations.get("unknown", {}) + return message_dict.get(message, message) + + def format_date(self, date, gmt_offset=0, relative=True, shorter=False, + full_format=False): + """Formats the given date (which should be GMT). + + By default, we return a relative time (e.g., "2 minutes ago"). You + can return an absolute date string with relative=False. + + You can force a full format date ("July 10, 1980") with + full_format=True. + """ + if self.code.startswith("ru"): + relative = False + if type(date) in (int, long, float): + date = datetime.datetime.utcfromtimestamp(date) + now = datetime.datetime.utcnow() + # Round down to now. Due to click skew, things are somethings + # slightly in the future. + if date > now: date = now + local_date = date - datetime.timedelta(minutes=gmt_offset) + local_now = now - datetime.timedelta(minutes=gmt_offset) + local_yesterday = local_now - datetime.timedelta(hours=24) + difference = now - date + seconds = difference.seconds + days = difference.days + + _ = self.translate + format = None + if not full_format: + if relative and days == 0: + if seconds < 50: + return _("1 second ago", "%(seconds)d seconds ago", + seconds) % { "seconds": seconds } + + if seconds < 50 * 60: + minutes = round(seconds / 60.0) + return _("1 minute ago", "%(minutes)d minutes ago", + minutes) % { "minutes": minutes } + + hours = round(seconds / (60.0 * 60)) + return _("1 hour ago", "%(hours)d hours ago", + hours) % { "hours": hours } + + if days == 0: + format = _("%(time)s") + elif days == 1 and local_date.day == local_yesterday.day and \ + relative: + format = _("yesterday") if shorter else \ + _("yesterday at %(time)s") + elif days < 5: + format = _("%(weekday)s") if shorter else \ + _("%(weekday)s at %(time)s") + elif days < 334: # 11mo, since confusing for same month last year + format = _("%(month_name)s %(day)s") if shorter else \ + _("%(month_name)s %(day)s at %(time)s") + + if format is None: + format = _("%(month_name)s %(day)s, %(year)s") if shorter else \ + _("%(month_name)s %(day)s, %(year)s at %(time)s") + + tfhour_clock = self.code not in ("en", "en_US", "zh_CN") + if tfhour_clock: + str_time = "%d:%02d" % (local_date.hour, local_date.minute) + elif self.code == "zh_CN": + str_time = "%s%d:%02d" % ( + (u'\u4e0a\u5348', u'\u4e0b\u5348')[local_date.hour >= 12], + local_date.hour % 12 or 12, local_date.minute) + else: + str_time = "%d:%02d %s" % ( + local_date.hour % 12 or 12, local_date.minute, + ("am", "pm")[local_date.hour >= 12]) + + return format % { + "month_name": self._months[local_date.month - 1], + "weekday": self._weekdays[local_date.weekday()], + "day": str(local_date.day), + "year": str(local_date.year), + "time": str_time + } + + def format_day(self, date, gmt_offset=0, dow=True): + """Formats the given date as a day of week. + + Example: "Monday, January 22". You can remove the day of week with + dow=False. + """ + local_date = date - datetime.timedelta(minutes=gmt_offset) + _ = self.translate + if dow: + return _("%(weekday)s, %(month_name)s %(day)s") % { + "month_name": self._months[local_date.month - 1], + "weekday": self.WEEKDAYS[local_date.weekday()], + "day": str(local_date.day), + } + else: + return _("%(month_name)s %(day)s") % { + "month_name": self._months[local_date.month - 1], + "day": str(local_date.day), + } + + def list(self, parts): + """Returns a comma-separated list for the given list of parts. + + The format is, e.g., "A, B and C", "A and B" or just "A" for lists + of size 1. + """ + _ = self.translate + if len(parts) == 0: return "" + if len(parts) == 1: return parts[0] + comma = u' \u0648 ' if self.code.startswith("fa") else u", " + return _("%(commas)s and %(last)s") % { + "commas": comma.join(parts[:-1]), + "last": parts[len(parts) - 1], + } + + def friendly_number(self, value): + """Returns a comma-separated number for the given integer.""" + if self.code not in ("en", "en_US"): + return str(value) + value = str(value) + parts = [] + while value: + parts.append(value[-3:]) + value = value[:-3] + return ",".join(reversed(parts)) + + +LOCALE_NAMES = { + "af_ZA": {"name_en": u"Afrikaans", "name": u"Afrikaans"}, + "ar_AR": {"name_en": u"Arabic", "name": u"\u0627\u0644\u0639\u0631\u0628\u064a\u0629"}, + "bg_BG": {"name_en": u"Bulgarian", "name": u"\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"}, + "bn_IN": {"name_en": u"Bengali", "name": u"\u09ac\u09be\u0982\u09b2\u09be"}, + "bs_BA": {"name_en": u"Bosnian", "name": u"Bosanski"}, + "ca_ES": {"name_en": u"Catalan", "name": u"Catal\xe0"}, + "cs_CZ": {"name_en": u"Czech", "name": u"\u010ce\u0161tina"}, + "cy_GB": {"name_en": u"Welsh", "name": u"Cymraeg"}, + "da_DK": {"name_en": u"Danish", "name": u"Dansk"}, + "de_DE": {"name_en": u"German", "name": u"Deutsch"}, + "el_GR": {"name_en": u"Greek", "name": u"\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"}, + "en_GB": {"name_en": u"English (UK)", "name": u"English (UK)"}, + "en_US": {"name_en": u"English (US)", "name": u"English (US)"}, + "es_ES": {"name_en": u"Spanish (Spain)", "name": u"Espa\xf1ol (Espa\xf1a)"}, + "es_LA": {"name_en": u"Spanish", "name": u"Espa\xf1ol"}, + "et_EE": {"name_en": u"Estonian", "name": u"Eesti"}, + "eu_ES": {"name_en": u"Basque", "name": u"Euskara"}, + "fa_IR": {"name_en": u"Persian", "name": u"\u0641\u0627\u0631\u0633\u06cc"}, + "fi_FI": {"name_en": u"Finnish", "name": u"Suomi"}, + "fr_CA": {"name_en": u"French (Canada)", "name": u"Fran\xe7ais (Canada)"}, + "fr_FR": {"name_en": u"French", "name": u"Fran\xe7ais"}, + "ga_IE": {"name_en": u"Irish", "name": u"Gaeilge"}, + "gl_ES": {"name_en": u"Galician", "name": u"Galego"}, + "he_IL": {"name_en": u"Hebrew", "name": u"\u05e2\u05d1\u05e8\u05d9\u05ea"}, + "hi_IN": {"name_en": u"Hindi", "name": u"\u0939\u093f\u0928\u094d\u0926\u0940"}, + "hr_HR": {"name_en": u"Croatian", "name": u"Hrvatski"}, + "hu_HU": {"name_en": u"Hungarian", "name": u"Magyar"}, + "id_ID": {"name_en": u"Indonesian", "name": u"Bahasa Indonesia"}, + "is_IS": {"name_en": u"Icelandic", "name": u"\xcdslenska"}, + "it_IT": {"name_en": u"Italian", "name": u"Italiano"}, + "ja_JP": {"name_en": u"Japanese", "name": u"\xe6\xe6\xe8"}, + "ko_KR": {"name_en": u"Korean", "name": u"\xed\xea\xec"}, + "lt_LT": {"name_en": u"Lithuanian", "name": u"Lietuvi\u0173"}, + "lv_LV": {"name_en": u"Latvian", "name": u"Latvie\u0161u"}, + "mk_MK": {"name_en": u"Macedonian", "name": u"\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438"}, + "ml_IN": {"name_en": u"Malayalam", "name": u"\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02"}, + "ms_MY": {"name_en": u"Malay", "name": u"Bahasa Melayu"}, + "nb_NO": {"name_en": u"Norwegian (bokmal)", "name": u"Norsk (bokm\xe5l)"}, + "nl_NL": {"name_en": u"Dutch", "name": u"Nederlands"}, + "nn_NO": {"name_en": u"Norwegian (nynorsk)", "name": u"Norsk (nynorsk)"}, + "pa_IN": {"name_en": u"Punjabi", "name": u"\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40"}, + "pl_PL": {"name_en": u"Polish", "name": u"Polski"}, + "pt_BR": {"name_en": u"Portuguese (Brazil)", "name": u"Portugu\xeas (Brasil)"}, + "pt_PT": {"name_en": u"Portuguese (Portugal)", "name": u"Portugu\xeas (Portugal)"}, + "ro_RO": {"name_en": u"Romanian", "name": u"Rom\xe2n\u0103"}, + "ru_RU": {"name_en": u"Russian", "name": u"\u0420\u0443\u0441\u0441\u043a\u0438\u0439"}, + "sk_SK": {"name_en": u"Slovak", "name": u"Sloven\u010dina"}, + "sl_SI": {"name_en": u"Slovenian", "name": u"Sloven\u0161\u010dina"}, + "sq_AL": {"name_en": u"Albanian", "name": u"Shqip"}, + "sr_RS": {"name_en": u"Serbian", "name": u"\u0421\u0440\u043f\u0441\u043a\u0438"}, + "sv_SE": {"name_en": u"Swedish", "name": u"Svenska"}, + "sw_KE": {"name_en": u"Swahili", "name": u"Kiswahili"}, + "ta_IN": {"name_en": u"Tamil", "name": u"\u0ba4\u0bae\u0bbf\u0bb4\u0bcd"}, + "te_IN": {"name_en": u"Telugu", "name": u"\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41"}, + "th_TH": {"name_en": u"Thai", "name": u"\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22"}, + "tl_PH": {"name_en": u"Filipino", "name": u"Filipino"}, + "tr_TR": {"name_en": u"Turkish", "name": u"T\xfcrk\xe7e"}, + "uk_UA": {"name_en": u"Ukraini ", "name": u"\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"}, + "vi_VN": {"name_en": u"Vietnamese", "name": u"Ti\u1ebfng Vi\u1ec7t"}, + "zh_CN": {"name_en": u"Chinese (Simplified)", "name": u"\xe4\xe6(\xe7\xe4)"}, + "zh_HK": {"name_en": u"Chinese (Hong Kong)", "name": u"\xe4\xe6(\xe9\xe6)"}, + "zh_TW": {"name_en": u"Chinese (Taiwan)", "name": u"\xe4\xe6(\xe5\xe7)"}, +} diff --git a/tornado/options.py b/tornado/options.py new file mode 100644 index 000000000..1f676ff01 --- /dev/null +++ b/tornado/options.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A command line parsing module that lets modules define their own options. + +Each module defines its own options, e.g., + + from tornado.options import define, options + + define("mysql_host", default="127.0.0.1:3306", help="Main user DB") + define("memcache_hosts", default="127.0.0.1:11011", multiple=True, + help="Main user memcache servers") + + def connect(): + db = database.Connection(options.mysql_host) + ... + +The main() method of your application does not need to be aware of all of +the options used throughout your program; they are all automatically loaded +when the modules are loaded. Your main() method can parse the command line +or parse a config file with: + + import tornado.options + tornado.options.parse_config_file("/etc/server.conf") + tornado.options.parse_command_line() + +Command line formats are what you would expect ("--myoption=myvalue"). +Config files are just Python files. Global names become options, e.g., + + myoption = "myvalue" + myotheroption = "myothervalue" + +We support datetimes, timedeltas, ints, and floats (just pass a 'type' +kwarg to define). We also accept multi-value options. See the documentation +for define() below. +""" + +import datetime +import logging +import re +import sys +import time + +# For pretty log messages, if available +try: + import curses +except: + curses = None + + +def define(name, default=None, type=str, help=None, metavar=None, + multiple=False): + """Defines a new command line option. + + If type is given (one of str, float, int, datetime, or timedelta), + we parse the command line arguments based on the given type. If + multiple is True, we accept comma-separated values, and the option + value is always a list. + + For multi-value integers, we also accept the syntax x:y, which + turns into range(x, y) - very useful for long integer ranges. + + help and metavar are used to construct the automatically generated + command line help string. The help message is formatted like: + + --name=METAVAR help string + + Command line option names must be unique globally. They can be parsed + from the command line with parse_command_line() or parsed from a + config file with parse_config_file. + """ + if name in options: + raise Error("Option %r already defined in %s", name, + options[name].file_name) + frame = sys._getframe(0) + options_file = frame.f_code.co_filename + file_name = frame.f_back.f_code.co_filename + if file_name == options_file: file_name = "" + options[name] = _Option(name, file_name=file_name, default=default, + type=type, help=help, metavar=metavar, + multiple=multiple) + + +def parse_command_line(args=None): + """Parses all options given on the command line. + + We return all command line arguments that are not options as a list. + """ + if args is None: args = sys.argv + for i in xrange(1, len(args)): + # All things after the last option are command line arguments + if not args[i].startswith("-"): + return args[i:] + if args[i] == "--": + continue + arg = args[i].lstrip("-") + name, equals, value = arg.partition("=") + name = name.replace('-', '_') + if not name in options: + print_help() + raise Error('Unrecognized command line option: %r' % name) + option = options[name] + if not equals: + if option.type == bool: + value = "true" + else: + raise Error('Option %r requires a value' % name) + option.parse(value) + if options.help: + print_help() + sys.exit(0) + + # Set up log level and pretty console logging by default + logging.getLogger().setLevel(getattr(logging, options.logging.upper())) + enable_pretty_logging() + + return [] + + +def parse_config_file(path, overwrite=True): + """Parses and loads the Python config file at the given path.""" + config = {} + execfile(path, config, config) + for name in config: + if name in options: + options[name].set(config[name]) + + +def print_help(file=sys.stdout): + """Prints all the command line options to stdout.""" + print >> file, "Usage: %s [OPTIONS]" % sys.argv[0] + print >> file, "" + print >> file, "Options:" + by_file = {} + for option in options.itervalues(): + by_file.setdefault(option.file_name, []).append(option) + + for filename, o in sorted(by_file.items()): + if filename: print >> file, filename + o.sort(key=lambda option: option.name) + for option in o: + prefix = option.name + if option.metavar: + prefix += "=" + option.metavar + print >> file, " --%-30s %s" % (prefix, option.help or "") + print >> file + + +class _Options(dict): + """Our global program options, an dictionary with object-like access.""" + @classmethod + def instance(cls): + if not hasattr(cls, "_instance"): + cls._instance = cls() + return cls._instance + + def __getattr__(self, name): + if isinstance(self.get(name), _Option): + return self[name].value() + raise Error("Unrecognized option %r" % name) + + +class _Option(object): + def __init__(self, name, default=None, type=str, help=None, metavar=None, + multiple=False, file_name=None): + if default is None and multiple: + default = [] + self.name = name + self.type = type + self.help = help + self.metavar = metavar + self.multiple = multiple + self.file_name = file_name + self.default = default + self._value = None + + def value(self): + return self.default if self._value is None else self._value + + def parse(self, value): + _parse = { + datetime.datetime: self._parse_datetime, + datetime.timedelta: self._parse_timedelta, + bool: self._parse_bool, + str: self._parse_string, + }.get(self.type, self.type) + if self.multiple: + if self._value is None: + self._value = [] + for part in value.split(","): + if self.type in (int, long): + # allow ranges of the form X:Y (inclusive at both ends) + lo, _, hi = part.partition(":") + lo = _parse(lo) + hi = _parse(hi) if hi else lo + self._value.extend(range(lo, hi+1)) + else: + self._value.append(_parse(part)) + else: + self._value = _parse(value) + return self.value() + + def set(self, value): + if self.multiple: + if not isinstance(value, list): + raise Error("Option %r is required to be a list of %s" % + (self.name, self.type.__name__)) + for item in value: + if item != None and not isinstance(item, self.type): + raise Error("Option %r is required to be a list of %s" % + (self.name, self.type.__name__)) + else: + if value != None and not isinstance(value, self.type): + raise Error("Option %r is required to be a %s" % + (self.name, self.type.__name__)) + self._value = value + + # Supported date/time formats in our options + _DATETIME_FORMATS = [ + "%a %b %d %H:%M:%S %Y", + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M", + "%Y-%m-%dT%H:%M", + "%Y%m%d %H:%M:%S", + "%Y%m%d %H:%M", + "%Y-%m-%d", + "%Y%m%d", + "%H:%M:%S", + "%H:%M", + ] + + def _parse_datetime(self, value): + for format in self._DATETIME_FORMATS: + try: + return datetime.datetime.strptime(value, format) + except ValueError: + pass + raise Error('Unrecognized date/time format: %r' % value) + + _TIMEDELTA_ABBREVS = [ + ('hours', ['h']), + ('minutes', ['m', 'min']), + ('seconds', ['s', 'sec']), + ('milliseconds', ['ms']), + ('microseconds', ['us']), + ('days', ['d']), + ('weeks', ['w']), + ] + + _TIMEDELTA_ABBREV_DICT = dict( + (abbrev, full) for full, abbrevs in _TIMEDELTA_ABBREVS + for abbrev in abbrevs) + + _FLOAT_PATTERN = r'[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?' + + _TIMEDELTA_PATTERN = re.compile( + r'\s*(%s)\s*(\w*)\s*' % _FLOAT_PATTERN, re.IGNORECASE) + + def _parse_timedelta(self, value): + try: + sum = datetime.timedelta() + start = 0 + while start < len(value): + m = self._TIMEDELTA_PATTERN.match(value, start) + if not m: + raise Exception() + num = float(m.group(1)) + units = m.group(2) or 'seconds' + units = self._TIMEDELTA_ABBREV_DICT.get(units, units) + sum += datetime.timedelta(**{units: num}) + start = m.end() + return sum + except: + raise + + def _parse_bool(self, value): + return value.lower() not in ("false", "0", "f") + + def _parse_string(self, value): + return value.decode("utf-8") + + +class Error(Exception): + pass + + +def enable_pretty_logging(): + """Turns on colored logging output for stderr if we are in a tty.""" + if not curses: return + try: + if not sys.stderr.isatty(): return + curses.setupterm() + except: + return + channel = logging.StreamHandler() + channel.setFormatter(_ColorLogFormatter()) + logging.getLogger().addHandler(channel) + + +class _ColorLogFormatter(logging.Formatter): + def __init__(self, *args, **kwargs): + logging.Formatter.__init__(self, *args, **kwargs) + fg_color = curses.tigetstr("setaf") or curses.tigetstr("setf") or "" + self._colors = { + logging.DEBUG: curses.tparm(fg_color, 4), # Blue + logging.INFO: curses.tparm(fg_color, 2), # Green + logging.WARNING: curses.tparm(fg_color, 3), # Yellow + logging.ERROR: curses.tparm(fg_color, 1), # Red + } + self._normal = curses.tigetstr("sgr0") + + def format(self, record): + try: + record.message = record.getMessage() + except Exception, e: + record.message = "Bad message (%r): %r" % (e, record.__dict__) + record.asctime = time.strftime( + "%y%m%d %H:%M:%S", self.converter(record.created)) + prefix = '[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d]' % \ + record.__dict__ + color = self._colors.get(record.levelno, self._normal) + formatted = color + prefix + self._normal + " " + record.message + if record.exc_info: + if not record.exc_text: + record.exc_text = self.formatException(record.exc_info) + if record.exc_text: + formatted = formatted.rstrip() + "\n" + record.exc_text + return formatted.replace("\n", "\n ") + + +options = _Options.instance() + + +# Default options +define("help", type=bool, help="show this help information") +define("logging", default="info", help="set the Python log level", + metavar="info|warning|error") diff --git a/tornado/s3server.py b/tornado/s3server.py new file mode 100644 index 000000000..2e8a97de2 --- /dev/null +++ b/tornado/s3server.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Implementation of an S3-like storage server based on local files. + +Useful to test features that will eventually run on S3, or if you want to +run something locally that was once running on S3. + +We don't support all the features of S3, but it does work with the +standard S3 client for the most basic semantics. To use the standard +S3 client with this module: + + c = S3.AWSAuthConnection("", "", server="localhost", port=8888, + is_secure=False) + c.create_bucket("mybucket") + c.put("mybucket", "mykey", "a value") + print c.get("mybucket", "mykey").body + +""" + +import bisect +import datetime +import escape +import hashlib +import httpserver +import ioloop +import os +import os.path +import urllib +import web + + +def start(port, root_directory="/tmp/s3", bucket_depth=0): + """Starts the mock S3 server on the given port at the given path.""" + application = S3Application(root_directory, bucket_depth) + http_server = httpserver.HTTPServer(application) + http_server.listen(port) + ioloop.IOLoop.instance().start() + + +class S3Application(web.Application): + """Implementation of an S3-like storage server based on local files. + + If bucket depth is given, we break files up into multiple directories + to prevent hitting file system limits for number of files in each + directories. 1 means one level of directories, 2 means 2, etc. + """ + def __init__(self, root_directory, bucket_depth=0): + web.Application.__init__(self, [ + (r"/", RootHandler), + (r"/([^/]+)/(.+)", ObjectHandler), + (r"/([^/]+)/", BucketHandler), + ]) + self.directory = os.path.abspath(root_directory) + if not os.path.exists(self.directory): + os.makedirs(self.directory) + self.bucket_depth = bucket_depth + + +class BaseRequestHandler(web.RequestHandler): + SUPPORTED_METHODS = ("PUT", "GET", "DELETE") + + def render_xml(self, value): + assert isinstance(value, dict) and len(value) == 1 + self.set_header("Content-Type", "application/xml; charset=UTF-8") + name = value.keys()[0] + parts = [] + parts.append('<' + escape.utf8(name) + + ' xmlns="http://doc.s3.amazonaws.com/2006-03-01">') + self._render_parts(value.values()[0], parts) + parts.append('') + self.finish('\n' + + ''.join(parts)) + + def _render_parts(self, value, parts=[]): + if isinstance(value, basestring): + parts.append(escape.xhtml_escape(value)) + elif isinstance(value, int) or isinstance(value, long): + parts.append(str(value)) + elif isinstance(value, datetime.datetime): + parts.append(value.strftime("%Y-%m-%dT%H:%M:%S.000Z")) + elif isinstance(value, dict): + for name, subvalue in value.iteritems(): + if not isinstance(subvalue, list): + subvalue = [subvalue] + for subsubvalue in subvalue: + parts.append('<' + escape.utf8(name) + '>') + self._render_parts(subsubvalue, parts) + parts.append('') + else: + raise Exception("Unknown S3 value type %r", value) + + def _object_path(self, bucket, object_name): + if self.application.bucket_depth < 1: + return os.path.abspath(os.path.join( + self.application.directory, bucket, object_name)) + hash = hashlib.md5(object_name).hexdigest() + path = os.path.abspath(os.path.join( + self.application.directory, bucket)) + for i in range(self.application.bucket_depth): + path = os.path.join(path, hash[:2 * (i + 1)]) + return os.path.join(path, object_name) + + +class RootHandler(BaseRequestHandler): + def get(self): + names = os.listdir(self.application.directory) + buckets = [] + for name in names: + path = os.path.join(self.application.directory, name) + info = os.stat(path) + buckets.append({ + "Name": name, + "CreationDate": datetime.datetime.utcfromtimestamp( + info.st_ctime), + }) + self.render_xml({"ListAllMyBucketsResult": { + "Buckets": {"Bucket": buckets}, + }}) + + +class BucketHandler(BaseRequestHandler): + def get(self, bucket_name): + prefix = self.get_argument("prefix", u"") + marker = self.get_argument("marker", u"") + max_keys = int(self.get_argument("max-keys", 50000)) + path = os.path.abspath(os.path.join(self.application.directory, + bucket_name)) + terse = int(self.get_argument("terse", 0)) + if not path.startswith(self.application.directory) or \ + not os.path.isdir(path): + raise web.HTTPError(404) + object_names = [] + for root, dirs, files in os.walk(path): + for file_name in files: + object_names.append(os.path.join(root, file_name)) + skip = len(path) + 1 + for i in range(self.application.bucket_depth): + skip += 2 * (i + 1) + 1 + object_names = [n[skip:] for n in object_names] + object_names.sort() + contents = [] + + start_pos = 0 + if marker: + start_pos = bisect.bisect_right(object_names, marker, start_pos) + if prefix: + start_pos = bisect.bisect_left(object_names, prefix, start_pos) + + truncated = False + for object_name in object_names[start_pos:]: + if not object_name.startswith(prefix): + break + if len(contents) >= max_keys: + truncated = True + break + object_path = self._object_path(bucket_name, object_name) + c = {"Key": object_name} + if not terse: + info = os.stat(object_path) + c.update({ + "LastModified": datetime.datetime.utcfromtimestamp( + info.st_mtime), + "Size": info.st_size, + }) + contents.append(c) + marker = object_name + self.render_xml({"ListBucketResult": { + "Name": bucket_name, + "Prefix": prefix, + "Marker": marker, + "MaxKeys": max_keys, + "IsTruncated": truncated, + "Contents": contents, + }}) + + def put(self, bucket_name): + path = os.path.abspath(os.path.join( + self.application.directory, bucket_name)) + if not path.startswith(self.application.directory) or \ + os.path.exists(path): + raise web.HTTPError(403) + os.makedirs(path) + self.finish() + + def delete(self, bucket_name): + path = os.path.abspath(os.path.join( + self.application.directory, bucket_name)) + if not path.startswith(self.application.directory) or \ + not os.path.isdir(path): + raise web.HTTPError(404) + if len(os.listdir(path)) > 0: + raise web.HTTPError(403) + os.rmdir(path) + self.set_status(204) + self.finish() + + +class ObjectHandler(BaseRequestHandler): + def get(self, bucket, object_name): + object_name = urllib.unquote(object_name) + path = self._object_path(bucket, object_name) + if not path.startswith(self.application.directory) or \ + not os.path.isfile(path): + raise web.HTTPError(404) + info = os.stat(path) + self.set_header("Content-Type", "application/unknown") + self.set_header("Last-Modified", datetime.datetime.utcfromtimestamp( + info.st_mtime)) + object_file = open(path, "r") + try: + self.finish(object_file.read()) + finally: + object_file.close() + + def put(self, bucket, object_name): + object_name = urllib.unquote(object_name) + bucket_dir = os.path.abspath(os.path.join( + self.application.directory, bucket)) + if not bucket_dir.startswith(self.application.directory) or \ + not os.path.isdir(bucket_dir): + raise web.HTTPError(404) + path = self._object_path(bucket, object_name) + if not path.startswith(bucket_dir) or os.path.isdir(path): + raise web.HTTPError(403) + directory = os.path.dirname(path) + if not os.path.exists(directory): + os.makedirs(directory) + object_file = open(path, "w") + object_file.write(self.request.body) + object_file.close() + self.finish() + + def delete(self, bucket, object_name): + object_name = urllib.unquote(object_name) + path = self._object_path(bucket, object_name) + if not path.startswith(self.application.directory) or \ + not os.path.isfile(path): + raise web.HTTPError(404) + os.unlink(path) + self.set_status(204) + self.finish() diff --git a/tornado/template.py b/tornado/template.py new file mode 100644 index 000000000..eafee778f --- /dev/null +++ b/tornado/template.py @@ -0,0 +1,561 @@ +#!/usr/bin/env python +# +# Copyright 2009 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A simple template system that compiles templates to Python code. + +Basic usage looks like: + + t = template.Template("{{ myvalue }}") + print t.generate(myvalue="XXX") + +Loader is a class that loads templates from a root directory and caches +the compiled templates: + + loader = template.Loader("/home/btaylor") + print loader.load("test.html").generate(myvalue="XXX") + +We compile all templates to raw Python. Error-reporting is currently... uh, +interesting. Syntax for the templates + + ### base.html + + + {% block title %}Default title{% end %} + + +
    + {% for student in students %} + {% block student %} +
  • {{ escape(student.name) }}
  • + {% end %} + {% end %} +
+ + + + ### bold.html + {% extends "base.html" %} + + {% block title %}A bolder title{% end %} + + {% block student %} +
  • {{ escape(student.name) }}
  • + {% block %} + +Unlike most other template systems, we do not put any restrictions on the +expressions you can include in your statements. if and for blocks get +translated exactly into Python, do you can do complex expressions like: + + {% for student in [p for p in people if p.student and p.age > 23] %} +
  • {{ escape(student.name) }}
  • + {% end %} + +Translating directly to Python means you can apply functions to expressions +easily, like the escape() function in the examples above. You can pass +functions in to your template just like any other variable: + + ### Python code + def add(x, y): + return x + y + template.execute(add=add) + + ### The template + {{ add(1, 2) }} + +We provide the functions escape(), url_escape(), json_encode(), and squeeze() +to all templates by default. +""" + +from __future__ import with_statement + +import cStringIO +import datetime +import escape +import logging +import os.path +import re + + +class Template(object): + """A compiled template. + + We compile into Python from the given template_string. You can generate + the template from variables with generate(). + """ + def __init__(self, template_string, name="", loader=None, + compress_whitespace=None): + self.name = name + if compress_whitespace is None: + compress_whitespace = name.endswith(".html") or \ + name.endswith(".js") + reader = _TemplateReader(name, template_string) + self.file = _File(_parse(reader)) + self.code = self._generate_python(loader, compress_whitespace) + try: + self.compiled = compile(self.code, self.name, "exec") + except: + formatted_code = _format_code(self.code).rstrip() + logging.error("%s code:\n%s", self.name, formatted_code) + raise + + def generate(self, **kwargs): + """Generate this template with the given arguments.""" + namespace = { + "escape": escape.xhtml_escape, + "url_escape": escape.url_escape, + "json_encode": escape.json_encode, + "squeeze": escape.squeeze, + "datetime": datetime, + } + namespace.update(kwargs) + exec self.compiled in namespace + execute = namespace["_execute"] + try: + return execute() + except: + formatted_code = _format_code(self.code).rstrip() + logging.error("%s code:\n%s", self.name, formatted_code) + raise + + def _generate_python(self, loader, compress_whitespace): + buffer = cStringIO.StringIO() + try: + named_blocks = {} + ancestors = self._get_ancestors(loader) + ancestors.reverse() + for ancestor in ancestors: + ancestor.find_named_blocks(loader, named_blocks) + self.file.find_named_blocks(loader, named_blocks) + writer = _CodeWriter(buffer, named_blocks, loader, self, + compress_whitespace) + ancestors[0].generate(writer) + return buffer.getvalue() + finally: + buffer.close() + + def _get_ancestors(self, loader): + ancestors = [self.file] + for chunk in self.file.body.chunks: + if isinstance(chunk, _ExtendsBlock): + if not loader: + raise ParseError("{% extends %} block found, but no " + "template loader") + template = loader.load(chunk.name, self.name) + ancestors.extend(template._get_ancestors(loader)) + return ancestors + + +class Loader(object): + """A template loader that loads from a single root directory. + + You must use a template loader to use template constructs like + {% extends %} and {% include %}. Loader caches all templates after + they are loaded the first time. + """ + def __init__(self, root_directory): + self.root = os.path.abspath(root_directory) + self.templates = {} + + def load(self, name, parent_path=None): + if parent_path and not parent_path.startswith("<") and \ + not parent_path.startswith("/") and \ + not name.startswith("/"): + current_path = os.path.join(self.root, parent_path) + file_dir = os.path.dirname(os.path.abspath(current_path)) + relative_path = os.path.abspath(os.path.join(file_dir, name)) + if relative_path.startswith(self.root): + name = relative_path[len(self.root) + 1:] + if name not in self.templates: + path = os.path.join(self.root, name) + f = open(path, "r") + self.templates[name] = Template(f.read(), name=name, loader=self) + f.close() + return self.templates[name] + + +class _Node(object): + def each_child(self): + return () + + def generate(self, writer): + raise NotImplementedError() + + def find_named_blocks(self, loader, named_blocks): + for child in self.each_child(): + child.find_named_blocks(loader, named_blocks) + + +class _File(_Node): + def __init__(self, body): + self.body = body + + def generate(self, writer): + writer.write_line("def _execute():") + with writer.indent(): + writer.write_line("_buffer = []") + self.body.generate(writer) + writer.write_line("return ''.join(_buffer)") + + def each_child(self): + return (self.body,) + + + +class _ChunkList(_Node): + def __init__(self, chunks): + self.chunks = chunks + + def generate(self, writer): + for chunk in self.chunks: + chunk.generate(writer) + + def each_child(self): + return self.chunks + + +class _NamedBlock(_Node): + def __init__(self, name, body=None): + self.name = name + self.body = body + + def each_child(self): + return (self.body,) + + def generate(self, writer): + writer.named_blocks[self.name].generate(writer) + + def find_named_blocks(self, loader, named_blocks): + named_blocks[self.name] = self.body + _Node.find_named_blocks(self, loader, named_blocks) + + +class _ExtendsBlock(_Node): + def __init__(self, name): + self.name = name + + +class _IncludeBlock(_Node): + def __init__(self, name, reader): + self.name = name + self.template_name = reader.name + + def find_named_blocks(self, loader, named_blocks): + included = loader.load(self.name, self.template_name) + included.file.find_named_blocks(loader, named_blocks) + + def generate(self, writer): + included = writer.loader.load(self.name, self.template_name) + old = writer.current_template + writer.current_template = included + included.file.body.generate(writer) + writer.current_template = old + + +class _ApplyBlock(_Node): + def __init__(self, method, body=None): + self.method = method + self.body = body + + def each_child(self): + return (self.body,) + + def generate(self, writer): + method_name = "apply%d" % writer.apply_counter + writer.apply_counter += 1 + writer.write_line("def %s():" % method_name) + with writer.indent(): + writer.write_line("_buffer = []") + self.body.generate(writer) + writer.write_line("return ''.join(_buffer)") + writer.write_line("_buffer.append(%s(%s()))" % ( + self.method, method_name)) + + +class _ControlBlock(_Node): + def __init__(self, statement, body=None): + self.statement = statement + self.body = body + + def each_child(self): + return (self.body,) + + def generate(self, writer): + writer.write_line("%s:" % self.statement) + with writer.indent(): + self.body.generate(writer) + + +class _IntermediateControlBlock(_Node): + def __init__(self, statement): + self.statement = statement + + def generate(self, writer): + writer.write_line("%s:" % self.statement, writer.indent_size() - 1) + + +class _Statement(_Node): + def __init__(self, statement): + self.statement = statement + + def generate(self, writer): + writer.write_line(self.statement) + + +class _Expression(_Node): + def __init__(self, expression): + self.expression = expression + + def generate(self, writer): + writer.write_line("_tmp = %s" % self.expression) + writer.write_line("if isinstance(_tmp, str): _buffer.append(_tmp)") + writer.write_line("elif isinstance(_tmp, unicode): " + "_buffer.append(_tmp.encode('utf-8'))") + writer.write_line("else: _buffer.append(str(_tmp))") + + +class _Text(_Node): + def __init__(self, value): + self.value = value + + def generate(self, writer): + value = self.value + + # Compress lots of white space to a single character. If the whitespace + # breaks a line, have it continue to break a line, but just with a + # single \n character + if writer.compress_whitespace and "
    " not in value:
    +            value = re.sub(r"([\t ]+)", " ", value)
    +            value = re.sub(r"(\s*\n\s*)", "\n", value)
    +
    +        if value:
    +            writer.write_line('_buffer.append(%r)' % value)
    +
    +
    +class ParseError(Exception):
    +    """Raised for template syntax errors."""
    +    pass
    +
    +
    +class _CodeWriter(object):
    +    def __init__(self, file, named_blocks, loader, current_template,
    +                 compress_whitespace):
    +        self.file = file
    +        self.named_blocks = named_blocks
    +        self.loader = loader
    +        self.current_template = current_template
    +        self.compress_whitespace = compress_whitespace
    +        self.apply_counter = 0
    +        self._indent = 0
    +
    +    def indent(self):
    +        return self
    +
    +    def indent_size(self):
    +        return self._indent
    +
    +    def __enter__(self):
    +        self._indent += 1
    +        return self
    +
    +    def __exit__(self, *args):
    +        assert self._indent > 0
    +        self._indent -= 1
    +
    +    def write_line(self, line, indent=None):
    +        if indent == None:
    +            indent = self._indent
    +        for i in xrange(indent):
    +            self.file.write("    ")
    +        print >> self.file, line
    +
    +
    +class _TemplateReader(object):
    +    def __init__(self, name, text):
    +        self.name = name
    +        self.text = text
    +        self.line = 0
    +        self.pos = 0
    +
    +    def find(self, needle, start=0, end=None):
    +        assert start >= 0, start
    +        pos = self.pos
    +        start += pos
    +        if end is None:
    +            index = self.text.find(needle, start)
    +        else:
    +            end += pos
    +            assert end >= start
    +            index = self.text.find(needle, start, end)
    +        if index != -1:
    +            index -= pos
    +        return index
    +
    +    def consume(self, count=None):
    +        if count is None:
    +            count = len(self.text) - self.pos
    +        newpos = self.pos + count
    +        self.line += self.text.count("\n", self.pos, newpos)
    +        s = self.text[self.pos:newpos]
    +        self.pos = newpos
    +        return s
    +
    +    def remaining(self):
    +        return len(self.text) - self.pos
    +
    +    def __len__(self):
    +        return self.remaining()
    +
    +    def __getitem__(self, key):
    +        if type(key) is slice:
    +            size = len(self)
    +            start, stop, step = slice.indices(size)
    +            if start is None: start = self.pos
    +            else: start += self.pos
    +            if stop is not None: stop += self.pos
    +            return self.text[slice(start, stop, step)]
    +        elif key < 0:
    +            return self.text[key]
    +        else:
    +            return self.text[self.pos + key]
    +
    +    def __str__(self):
    +        return self.text[self.pos:]
    +
    +
    +def _format_code(code):
    +    lines = code.splitlines()
    +    format = "%%%dd  %%s\n" % len(repr(len(lines) + 1))
    +    return "".join([format % (i + 1, line) for (i, line) in enumerate(lines)])
    +
    +
    +def _parse(reader, in_block=None):
    +    body = _ChunkList([])
    +    while True:
    +        # Find next template directive
    +        curly = 0
    +        while True:
    +            curly = reader.find("{", curly)
    +            if curly == -1 or curly + 1 == reader.remaining():
    +                # EOF
    +                if in_block:
    +                    raise ParseError("Missing {%% end %%} block for %s" %
    +                                     in_block)
    +                body.chunks.append(_Text(reader.consume()))
    +                return body
    +            # If the first curly brace is not the start of a special token,
    +            # start searching from the character after it
    +            if reader[curly + 1] not in ("{", "%"):
    +                curly += 1
    +                continue
    +            break
    +
    +        # Append any text before the special token
    +        if curly > 0:
    +            body.chunks.append(_Text(reader.consume(curly)))
    +
    +        start_brace = reader.consume(2)
    +        line = reader.line
    +
    +        # Expression
    +        if start_brace == "{{":
    +            end = reader.find("}}")
    +            if end == -1 or reader.find("\n", 0, end) != -1:
    +                raise ParseError("Missing end expression }} on line %d" % line)
    +            contents = reader.consume(end).strip()
    +            reader.consume(2)
    +            if not contents:
    +                raise ParseError("Empty expression on line %d" % line)
    +            body.chunks.append(_Expression(contents))
    +            continue
    +
    +        # Block
    +        assert start_brace == "{%", start_brace
    +        end = reader.find("%}")
    +        if end == -1 or reader.find("\n", 0, end) != -1:
    +            raise ParseError("Missing end block %%} on line %d" % line)
    +        contents = reader.consume(end).strip()
    +        reader.consume(2)
    +        if not contents:
    +            raise ParseError("Empty block tag ({%% %%}) on line %d" % line)
    +
    +        operator, space, suffix = contents.partition(" ")
    +        suffix = suffix.strip()
    +
    +        # Intermediate ("else", "elif", etc) blocks
    +        intermediate_blocks = {
    +            "else": set(["if", "for", "while"]),
    +            "elif": set(["if"]),
    +            "except": set(["try"]),
    +            "finally": set(["try"]),
    +        }
    +        allowed_parents = intermediate_blocks.get(operator)
    +        if allowed_parents is not None:
    +            if not in_block:
    +                raise ParseError("%s outside %s block" %
    +                            (operator, allowed_parents))
    +            if in_block not in allowed_parents:
    +                raise ParseError("%s block cannot be attached to %s block" % (operator, in_block))
    +            body.chunks.append(_IntermediateControlBlock(contents))
    +            continue
    +
    +        # End tag
    +        elif operator == "end":
    +            if not in_block:
    +                raise ParseError("Extra {%% end %%} block on line %d" % line)
    +            return body
    +
    +        elif operator in ("extends", "include", "set", "import", "comment"):
    +            if operator == "comment":
    +                continue
    +            if operator == "extends":
    +                suffix = suffix.strip('"').strip("'")
    +                if not suffix:
    +                    raise ParseError("extends missing file path on line %d" % line)
    +                block = _ExtendsBlock(suffix)
    +            elif operator == "import":
    +                if not suffix:
    +                    raise ParseError("import missing statement on line %d" % line)
    +                block = _Statement(contents)
    +            elif operator == "include":
    +                suffix = suffix.strip('"').strip("'")
    +                if not suffix:
    +                    raise ParseError("include missing file path on line %d" % line)
    +                block = _IncludeBlock(suffix, reader)
    +            elif operator == "set":
    +                if not suffix:
    +                    raise ParseError("set missing statement on line %d" % line)
    +                block = _Statement(suffix)
    +            body.chunks.append(block)
    +            continue
    +
    +        elif operator in ("apply", "block", "try", "if", "for", "while"):
    +            # parse inner body recursively
    +            block_body = _parse(reader, operator)
    +            if operator == "apply":
    +                if not suffix:
    +                    raise ParseError("apply missing method name on line %d" % line)
    +                block = _ApplyBlock(suffix, block_body)
    +            elif operator == "block":
    +                if not suffix:
    +                    raise ParseError("block missing name on line %d" % line)
    +                block = _NamedBlock(suffix, block_body)
    +            else:
    +                block = _ControlBlock(contents, block_body)
    +            body.chunks.append(block)
    +            continue
    +
    +        else:
    +            raise ParseError("unknown operator: %r" % operator)
    diff --git a/tornado/web.py b/tornado/web.py
    new file mode 100644
    index 000000000..f65532855
    --- /dev/null
    +++ b/tornado/web.py
    @@ -0,0 +1,1185 @@
    +#!/usr/bin/env python
    +#
    +# Copyright 2009 Facebook
    +#
    +# Licensed under the Apache License, Version 2.0 (the "License"); you may
    +# not use this file except in compliance with the License. You may obtain
    +# a copy of the License at
    +#
    +#     http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
    +# License for the specific language governing permissions and limitations
    +# under the License.
    +
    +"""The Tornado web framework.
    +
    +The Tornado web framework looks a bit like web.py (http://webpy.org/) or
    +Google's webapp (http://code.google.com/appengine/docs/python/tools/webapp/),
    +but with additional tools and optimizations to take advantage of the
    +Tornado non-blocking web server and tools.
    +
    +Here is the canonical "Hello, world" example app:
    +
    +    import tornado.httpserver
    +    import tornado.ioloop
    +    import tornado.web
    +
    +    class MainHandler(tornado.web.RequestHandler):
    +        def get(self):
    +            self.write("Hello, world")
    +
    +    if __name__ == "__main__":
    +        application = tornado.web.Application([
    +            (r"/", MainHandler),
    +        ])
    +        http_server = tornado.httpserver.HTTPServer(application)
    +        http_server.listen(8888)
    +        tornado.ioloop.IOLoop.instance().start()
    +
    +See the Tornado walkthrough on Google Code for more details and a good
    +getting started guide.
    +"""
    +
    +import base64
    +import binascii
    +import calendar
    +import Cookie
    +import datetime
    +import email.utils
    +import escape
    +import functools
    +import hashlib
    +import hmac
    +import httplib
    +import locale
    +import logging
    +import mimetypes
    +import os.path
    +import re
    +import stat
    +import sys
    +import template
    +import time
    +import types
    +import urllib
    +import urlparse
    +import uuid
    +
    +
    +class RequestHandler(object):
    +    """Subclass this class and define get() or post() to make a handler.
    +
    +    If you want to support more methods than the standard GET/HEAD/POST, you
    +    should override the class variable SUPPORTED_METHODS in your
    +    RequestHandler class.
    +    """
    +    SUPPORTED_METHODS = ("GET", "HEAD", "POST", "DELETE", "PUT")
    +
    +    def __init__(self, application, request, transforms=None):
    +        self.application = application
    +        self.request = request
    +        self._headers_written = False
    +        self._finished = False
    +        self._auto_finish = True
    +        self._transforms = transforms or []
    +        self.ui = _O((n, self._ui_method(m)) for n, m in
    +                     application.ui_methods.iteritems())
    +        self.ui["modules"] = _O((n, self._ui_module(n, m)) for n, m in
    +                                application.ui_modules.iteritems())
    +        self.clear()
    +
    +    @property
    +    def settings(self):
    +        return self.application.settings
    +
    +    def head(self, *args, **kwargs):
    +        raise HTTPError(405)
    +
    +    def get(self, *args, **kwargs):
    +        raise HTTPError(405)
    +
    +    def post(self, *args, **kwargs):
    +        raise HTTPError(405)
    +
    +    def delete(self, *args, **kwargs):
    +        raise HTTPError(405)
    +
    +    def put(self, *args, **kwargs):
    +        raise HTTPError(405)
    +
    +    def prepare(self):
    +        """Called before the actual handler method.
    +
    +        Useful to override in a handler if you want a common bottleneck for
    +        all of your requests.
    +        """
    +        pass
    +
    +    def clear(self):
    +        """Resets all headers and content for this response."""
    +        self._headers = {
    +            "Server": "TornadoServer/0.1",
    +            "Content-Type": "text/html; charset=UTF-8",
    +        }
    +        if not self.request.supports_http_1_1():
    +            if self.request.headers.get("Connection") == "Keep-Alive":
    +                self.set_header("Connection", "Keep-Alive")
    +        self._write_buffer = []
    +        self._status_code = 200
    +
    +    def set_status(self, status_code):
    +        """Sets the status code for our response."""
    +        assert status_code in httplib.responses
    +        self._status_code = status_code
    +
    +    def set_header(self, name, value):
    +        """Sets the given response header name and value.
    +
    +        If a datetime is given, we automatically format it according to the
    +        HTTP specification. If the value is not a string, we convert it to
    +        a string. All header values are then encoded as UTF-8.
    +        """
    +        if isinstance(value, datetime.datetime):
    +            t = calendar.timegm(value.utctimetuple())
    +            value = email.utils.formatdate(t, localtime=False, usegmt=True)
    +        elif isinstance(value, int) or isinstance(value, long):
    +            value = str(value)
    +        else:
    +            value = _utf8(value)
    +            # If \n is allowed into the header, it is possible to inject
    +            # additional headers or split the request. Also cap length to
    +            # prevent obviously erroneous values.
    +            safe_value = re.sub(r"[\x00-\x1f]", " ", value)[:4000]
    +            if safe_value != value:
    +                raise ValueError("Unsafe header value %r", value)
    +        self._headers[name] = value
    +
    +    _ARG_DEFAULT = []
    +    def get_argument(self, name, default=_ARG_DEFAULT, strip=True):
    +        """Returns the value of the argument with the given name.
    +
    +        If default is not provided, the argument is considered to be
    +        required, and we throw an HTTP 404 exception if it is missing.
    +
    +        The returned value is always unicode.
    +        """
    +        values = self.request.arguments.get(name, None)
    +        if values is None:
    +            if default is self._ARG_DEFAULT:
    +                raise HTTPError(404, "Missing argument %s" % name)
    +            return default
    +        # Get rid of any weird control chars
    +        value = re.sub(r"[\x00-\x08\x0e-\x1f]", " ", values[-1])
    +        value = _unicode(value)
    +        if strip: value = value.strip()
    +        return value
    +
    +    @property
    +    def cookies(self):
    +        """A dictionary of Cookie.Morsel objects."""
    +        if not hasattr(self, "_cookies"):
    +            self._cookies = Cookie.BaseCookie()
    +            if "Cookie" in self.request.headers:
    +                try:
    +                    self._cookies.load(self.request.headers["Cookie"])
    +                except:
    +                    self.clear_all_cookies()
    +        return self._cookies
    +
    +    def get_cookie(self, name, default=None):
    +        """Gets the value of the cookie with the given name, else default."""
    +        if name in self.cookies:
    +            return self.cookies[name].value
    +        return default
    +
    +    def set_cookie(self, name, value, domain=None, expires=None, path="/",
    +                   expires_days=None):
    +        """Sets the given cookie name/value with the given options."""
    +        name = _utf8(name)
    +        value = _utf8(value)
    +        if re.search(r"[\x00-\x20]", name + value):
    +            # Don't let us accidentally inject bad stuff
    +            raise ValueError("Invalid cookie %r: %r" % (name, value))
    +        if not hasattr(self, "_new_cookies"):
    +            self._new_cookies = []
    +        new_cookie = Cookie.BaseCookie()
    +        self._new_cookies.append(new_cookie)
    +        new_cookie[name] = value
    +        if domain:
    +            new_cookie[name]["domain"] = domain
    +        if expires_days is not None and not expires:
    +            expires = datetime.datetime.utcnow() + datetime.timedelta(
    +                days=expires_days)
    +        if expires:
    +            timestamp = calendar.timegm(expires.utctimetuple())
    +            new_cookie[name]["expires"] = email.utils.formatdate(
    +                timestamp, localtime=False, usegmt=True)
    +        if path:
    +            new_cookie[name]["path"] = path
    +
    +    def clear_cookie(self, name, path="/", domain=None):
    +        """Deletes the cookie with the given name."""
    +        expires = datetime.datetime.utcnow() - datetime.timedelta(days=365)
    +        self.set_cookie(name, value="", path=path, expires=expires,
    +                        domain=domain)
    +
    +    def clear_all_cookies(self):
    +        """Deletes all the cookies the user sent with this request."""
    +        for name in self.cookies.iterkeys():
    +            self.clear_cookie(name)
    +
    +    def set_secure_cookie(self, name, value, expires_days=30, **kwargs):
    +        """Signs and timestamps a cookie so it cannot be forged.
    +
    +        You must specify the 'cookie_secret' setting in your Application
    +        to use this method. It should be a long, random sequence of bytes
    +        to be used as the HMAC secret for the signature.
    +
    +        To read a cookie set with this method, use get_secure_cookie().
    +        """
    +        timestamp = str(int(time.time()))
    +        value = base64.b64encode(value)
    +        signature = self._cookie_signature(value, timestamp)
    +        value = "|".join([value, timestamp, signature])
    +        self.set_cookie(name, value, expires_days=expires_days, **kwargs)
    +
    +    def get_secure_cookie(self, name):
    +        """Returns the given signed cookie if it validates, or None."""
    +        value = self.get_cookie(name)
    +        if not value: return None
    +        parts = value.split("|")
    +        if len(parts) != 3: return None
    +        if self._cookie_signature(parts[0], parts[1]) != parts[2]:
    +            logging.warning("Invalid cookie signature %r", value)
    +            return None
    +        timestamp = int(parts[1])
    +        if timestamp < time.time() - 31 * 86400:
    +            logging.warning("Expired cookie %r", value)
    +            return None
    +        try:
    +            return base64.b64decode(parts[0])
    +        except:
    +            return None
    +
    +    def _cookie_signature(self, *parts):
    +        self.require_setting("cookie_secret", "secure cookies")
    +        hash = hmac.new(self.application.settings["cookie_secret"],
    +                        digestmod=hashlib.sha1)
    +        for part in parts: hash.update(part)
    +        return hash.hexdigest()
    +
    +    def redirect(self, url, permanent=False):
    +        """Sends a redirect to the given (optionally relative) URL."""
    +        if self._headers_written:
    +            raise Exception("Cannot redirect after headers have been written")
    +        self.set_status(301 if permanent else 302)
    +        # Remove whitespace
    +        url = re.sub(r"[\x00-\x20]+", "", _utf8(url))
    +        self.set_header("Location", urlparse.urljoin(self.request.uri, url))
    +        self.finish()
    +
    +    def write(self, chunk):
    +        """Writes the given chunk to the output buffer.
    +
    +        To write the output to the network, use the flush() method below.
    +
    +        If the given chunk is a dictionary, we write it as JSON and set
    +        the Content-Type of the response to be text/javascript.
    +        """
    +        assert not self._finished
    +        if isinstance(chunk, dict):
    +            chunk = escape.json_encode(chunk)
    +            self.set_header("Content-Type", "text/javascript; charset=UTF-8")
    +        chunk = _utf8(chunk)
    +        self._write_buffer.append(chunk)
    +
    +    def render(self, template_name, **kwargs):
    +        """Renders the template with the given arguments as the response."""
    +        html = self.render_string(template_name, **kwargs)
    +
    +        # Insert the additional JS and CSS added by the modules on the page
    +        js_embed = []
    +        js_files = []
    +        css_embed = []
    +        css_files = []
    +        html_heads = []
    +        for module in getattr(self, "_active_modules", {}).itervalues():
    +            embed_part = module.embedded_javascript()
    +            if embed_part: js_embed.append(_utf8(embed_part))
    +            file_part = module.javascript_files()
    +            if file_part:
    +                if isinstance(file_part, basestring):
    +                    js_files.append(file_part)
    +                else:
    +                    js_files.extend(file_part)
    +            embed_part = module.embedded_css()
    +            if embed_part: css_embed.append(_utf8(embed_part))
    +            file_part = module.css_files()
    +            if file_part:
    +                if isinstance(file_part, basestring):
    +                    css_files.append(file_part)
    +                else:
    +                    css_files.extend(file_part)
    +            head_part = module.html_head()
    +            if head_part: html_heads.append(_utf8(head_part))
    +        if js_embed:
    +            js_embed = ''
    +            sloc = html.rindex('')
    +            html = html[:sloc] + js_embed + '\n' + html[sloc:]
    +        if js_files:
    +            paths = set()
    +            for path in js_files:
    +                if not path.startswith("/") and not path.startswith("http:"):
    +                    paths.add(self.static_url(path))
    +                else:
    +                    paths.add(path)
    +            js_embed = ''.join(''
    +                                 for p in paths)
    +            sloc = html.rindex('')
    +            html = html[:sloc] + js_embed + '\n' + html[sloc:]
    +        if css_embed:
    +            css_embed = ''
    +            hloc = html.index('')
    +            html = html[:hloc] + css_embed + '\n' + html[hloc:]
    +        if css_files:
    +            paths = set()
    +            for path in css_files:
    +                if not path.startswith("/") and not path.startswith("http:"):
    +                    paths.add(self.static_url(path))
    +                else:
    +                    paths.add(path)
    +            css_embed = ''.join(''
    +                                for p in paths)
    +            hloc = html.index('')
    +            html = html[:hloc] + css_embed + '\n' + html[hloc:]
    +        if html_heads:
    +            hloc = html.index('')
    +            html = html[:hloc] + ''.join(html_heads) + '\n' + html[hloc:]
    +
    +        self.finish(html)
    +
    +    def render_string(self, template_name, **kwargs):
    +        """Generate the given template with the given arguments.
    +
    +        We return the generated string. To generate and write a template
    +        as a response, use render() above.
    +        """
    +        # If no template_path is specified, use the path of the calling file
    +        template_path = self.application.settings.get("template_path")
    +        if not template_path:
    +            frame = sys._getframe(0)
    +            web_file = frame.f_code.co_filename
    +            while frame.f_code.co_filename == web_file:
    +                frame = frame.f_back
    +            template_path = os.path.dirname(frame.f_code.co_filename)
    +        if not getattr(RequestHandler, "_templates", None):
    +            RequestHandler._templates = {}
    +        if template_path not in RequestHandler._templates:
    +            RequestHandler._templates[template_path] = template.Loader(
    +                template_path)
    +        t = RequestHandler._templates[template_path].load(template_name)
    +        args = dict(
    +            handler=self,
    +            request=self.request,
    +            current_user=self.current_user,
    +            locale=self.locale,
    +            _=self.locale.translate,
    +            static_url=self.static_url,
    +            xsrf_form_html=self.xsrf_form_html,
    +        )
    +        args.update(self.ui)
    +        args.update(kwargs)
    +        return t.generate(**args)
    +
    +    def flush(self, include_footers=False):
    +        """Flushes the current output buffer to the nextwork."""
    +        if self.application._wsgi:
    +            raise Exception("WSGI applications do not support flush()")
    +        if not self._headers_written:
    +            self._headers_written = True
    +            headers = self._generate_headers()
    +        else:
    +            headers = ""
    +
    +        # Ignore the chunk and only write the headers for HEAD requests
    +        if self.request.method == "HEAD":
    +            if headers: self.request.write(headers)
    +            return
    +
    +        if self._write_buffer:
    +            chunk = "".join(self._write_buffer)
    +            self._write_buffer = []
    +            if chunk:
    +                # Don't write out empty chunks because that means
    +                # END-OF-STREAM with chunked encoding
    +                for transform in self._transforms:
    +                    chunk = transform.transform_chunk(chunk)
    +        else:
    +            chunk = ""
    +        if include_footers:
    +            footers = []
    +            for transform in self._transforms:
    +                footer = transform.footer()
    +                if footer: chunk += footer
    +
    +        if headers or chunk:
    +            self.request.write(headers + chunk)
    +
    +    def finish(self, chunk=None):
    +        """Finishes this response, ending the HTTP request."""
    +        assert not self._finished
    +        if chunk: self.write(chunk)
    +
    +        # Automatically support ETags and add the Content-Length header if
    +        # we have not flushed any content yet.
    +        if not self._headers_written:
    +            if self._status_code == 200 and self.request.method == "GET":
    +                hasher = hashlib.sha1()
    +                for part in self._write_buffer:
    +                    hasher.update(part)
    +                etag = '"%s"' % hasher.hexdigest()
    +                inm = self.request.headers.get("If-None-Match")
    +                if inm and inm.find(etag) != -1:
    +                    self._write_buffer = []
    +                    self.set_status(304)
    +                else:
    +                    self.set_header("Etag", etag)
    +            if "Content-Length" not in self._headers:
    +                content_length = sum(len(part) for part in self._write_buffer)
    +                self.set_header("Content-Length", content_length)
    +
    +        if not self.application._wsgi:
    +            self.flush(include_footers=True)
    +            self.request.finish()
    +            self._log()
    +        self._finished = True
    +
    +    def send_error(self, status_code=500):
    +        """Sends the given HTTP error code to the browser.
    +
    +        We also send the error HTML for the given error code as returned by
    +        get_error_html. Override that method if you want custom error pages
    +        for your application.
    +        """
    +        if self._headers_written:
    +            logging.error("Cannot send error response after headers written")
    +            if not self._finished:
    +                self.finish()
    +            return
    +        self.clear()
    +        self.set_status(status_code)
    +        message = self.get_error_html(status_code)
    +        self.finish(message)
    +
    +    def get_error_html(self, status_code):
    +        """Override to implement custom error pages."""
    +        return "%(code)d: %(message)s" \
    +               "%(code)d: %(message)s" % {
    +            "code": status_code,
    +            "message": httplib.responses[status_code],
    +        }
    +
    +    @property
    +    def locale(self):
    +        """The local for the current session.
    +
    +        Determined by either get_user_locale, which you can override to
    +        set the locale based on, e.g., a user preference stored in a
    +        database, or get_browser_locale, which uses the Accept-Language
    +        header.
    +        """
    +        if not hasattr(self, "_locale"):
    +            self._locale = self.get_user_locale()
    +            if not self._locale:
    +                self._locale = self.get_browser_locale()
    +                assert self._locale
    +        return self._locale
    +            
    +    def get_user_locale(self):
    +        """Override to determine the locale from the authenticated user.
    +
    +        If None is returned, we use the Accept-Language header.
    +        """
    +        return None
    +
    +    def get_browser_locale(self, default="en_US"):
    +        """Determines the user's locale from Accept-Language header.
    +
    +        See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
    +        """
    +        if "Accept-Language" in self.request.headers:
    +            languages = self.request.headers["Accept-Language"].split(",")
    +            locales = []
    +            for language in languages:
    +                parts = language.strip().split(";")
    +                if len(parts) > 1 and parts[1].startswith("q="):
    +                    try:
    +                        score = float(parts[1][2:])
    +                    except (ValueError, TypeError):
    +                        score = 0.0
    +                else:
    +                    score = 1.0
    +                locales.append((parts[0], score))
    +            if locales:
    +                locales.sort(key=lambda (l, s): s, reverse=True)
    +                codes = [l[0] for l in locales]
    +                return locale.get(*codes)
    +        return locale.get(default)
    +
    +    @property
    +    def current_user(self):
    +        """The authenticated user for this request.
    +
    +        Determined by either get_current_user, which you can override to
    +        set the user based on, e.g., a cookie. If that method is not
    +        overridden, this method always returns None.
    +
    +        We lazy-load the current user the first time this method is called
    +        and cache the result after that.
    +        """
    +        if not hasattr(self, "_current_user"):
    +            self._current_user = self.get_current_user()
    +        return self._current_user
    +
    +    def get_current_user(self):
    +        """Override to determine the current user from, e.g., a cookie."""
    +        return None
    +
    +    def get_login_url(self):
    +        """Override to customize the login URL based on the request.
    +
    +        By default, we use the 'login_url' application setting.
    +        """
    +        self.require_setting("login_url", "@tornado.web.authenticated")
    +        return self.application.settings["login_url"]
    +
    +    @property
    +    def xsrf_token(self):
    +        """The XSRF-prevention token for the current user/session.
    +
    +        To prevent cross-site request forgery, we set an '_xsrf' cookie
    +        and include the same '_xsrf' value as an argument with all POST
    +        requests. If the two do not match, we reject the form submission
    +        as a potential forgery.
    +
    +        See http://en.wikipedia.org/wiki/Cross-site_request_forgery
    +        """
    +        if not hasattr(self, "_xsrf_token"):
    +            token = self.get_cookie("_xsrf")
    +            if not token:
    +                token = binascii.b2a_hex(uuid.uuid4().bytes)
    +                expires_days = 30 if self.current_user else None
    +                self.set_cookie("_xsrf", token, expires_days=expires_days)
    +            self._xsrf_token = token
    +        return self._xsrf_token
    +
    +    def check_xsrf_cookie(self):
    +        """Verifies that the '_xsrf' cookie matches the '_xsrf' argument.
    +
    +        To prevent cross-site request forgery, we set an '_xsrf' cookie
    +        and include the same '_xsrf' value as an argument with all POST
    +        requests. If the two do not match, we reject the form submission
    +        as a potential forgery.
    +
    +        See http://en.wikipedia.org/wiki/Cross-site_request_forgery
    +        """
    +        token = self.get_argument("_xsrf", None)
    +        if not token:
    +            raise HTTPError(403, "'_xsrf' argument missing from POST")
    +        if self.xsrf_token != token:
    +            raise HTTPError(403, "XSRF cookie does not match POST argument")
    +
    +    def xsrf_form_html(self):
    +        """An HTML  element to be included with all POST forms.
    +
    +        It defines the _xsrf input value, which we check on all POST
    +        requests to prevent cross-site request forgery. If you have set
    +        the 'xsrf_cookies' application setting, you must include this
    +        HTML within all of your HTML forms.
    +
    +        See check_xsrf_cookie() above for more information.
    +        """
    +        return ''
    +
    +    def static_url(self, path):
    +        """Returns a static URL for the given relative static file path.
    +
    +        This method requires you set the 'static_path' setting in your
    +        application (which specifies the root directory of your static
    +        files).
    +
    +        We append ?v= to the returned URL, which makes our
    +        static file handler set an infinite expiration header on the
    +        returned content. The signature is based on the content of the
    +        file.
    +
    +        If this handler has a "include_host" attribute, we include the
    +        full host for every static URL, including the "http://". Set
    +        this attribute for handlers whose output needs non-relative static
    +        path names.
    +        """
    +        self.require_setting("static_path", "static_url")
    +        if not hasattr(RequestHandler, "_static_hashes"):
    +            RequestHandler._static_hashes = {}
    +        hashes = RequestHandler._static_hashes
    +        if path not in hashes:
    +            try:
    +                f = open(os.path.join(
    +                    self.application.settings["static_path"], path))
    +                hashes[path] = hashlib.md5(f.read()).hexdigest()
    +                f.close()
    +            except:
    +                logging.error("Could not open static file %r", path)
    +                hashes[path] = None
    +        base = self.request.protocol + "://" + self.request.host \
    +            if getattr(self, "include_host", False) else ""
    +        if hashes.get(path):
    +            return base + "/static/" + path + "?v=" + hashes[path][:5]
    +        else:
    +            return base + "/static/" + path
    +
    +    def async_callback(self, callback, *args, **kwargs):
    +        """Wrap callbacks with this if they are used on asynchronous requests.
    +
    +        Catches exceptions and properly finishes the request.
    +        """
    +        if callback is None:
    +            return None
    +        if args or kwargs:
    +            callback = functools.partial(callback, *args, **kwargs)
    +        def wrapper(*args, **kwargs):
    +            try:
    +                return callback(*args, **kwargs)
    +            except Exception, e:
    +                if self._headers_written:
    +                    logging.error("Exception after headers written",
    +                                  exc_info=True)
    +                else:
    +                    self._handle_request_exception(e)
    +        return wrapper
    +
    +    def require_setting(self, name, feature="this feature"):
    +        """Raises an exception if the given app setting is not defined."""
    +        if not self.application.settings.get(name):
    +            raise Exception("You must define the '%s' setting in your "
    +                            "application to use %s" % (name, feature))
    +
    +    def _execute(self, transforms, *args, **kwargs):
    +        """Executes this request with the given output transforms."""
    +        self._transforms = transforms
    +        try:
    +            if self.request.method not in self.SUPPORTED_METHODS:
    +                raise HTTPError(405)
    +            # If XSRF cookies are turned on, reject form submissions without
    +            # the proper cookie
    +            if self.request.method == "POST" and \
    +               self.application.settings.get("xsrf_cookies"):
    +                self.check_xsrf_cookie()
    +            self.prepare()
    +            if not self._finished:  
    +                getattr(self, self.request.method.lower())(*args, **kwargs)
    +                if self._auto_finish and not self._finished:
    +                    self.finish()
    +        except Exception, e:
    +            self._handle_request_exception(e)
    +
    +    def _generate_headers(self):
    +        for transform in self._transforms:
    +            headers = transform.transform_headers(self._headers)
    +        lines = [self.request.version + " " + str(self._status_code) + " " +
    +                 httplib.responses[self._status_code]]
    +        lines.extend(["%s: %s" % (n, v) for n, v in self._headers.iteritems()])
    +        for cookie_dict in getattr(self, "_new_cookies", []):
    +            for cookie in cookie_dict.values():
    +                lines.append("Set-Cookie: " + cookie.OutputString(None))
    +        return "\r\n".join(lines) + "\r\n\r\n"
    +
    +    def _log(self):
    +        if self._status_code < 400:
    +            log_method = logging.info
    +        elif self._status_code < 500:
    +            log_method = logging.warning
    +        else:
    +            log_method = logging.error
    +        request_time = 1000.0 * self.request.request_time()
    +        log_method("%d %s %.2fms", self._status_code,
    +                   self._request_summary(), request_time)
    +
    +    def _request_summary(self):
    +        return self.request.method + " " + self.request.uri + " (" + \
    +            self.request.remote_ip + ")"
    +
    +    def _handle_request_exception(self, e):
    +        if isinstance(e, HTTPError):
    +            if e.log_message:
    +                format = "%d %s: " + e.log_message
    +                args = [e.status_code, self._request_summary()] + list(e.args)
    +                logging.warning(format, *args)
    +            if e.status_code not in httplib.responses:
    +                logging.error("Bad HTTP status code: %d", e.status_code)
    +                self.send_error(500)
    +            else:
    +                self.send_error(e.status_code)
    +        else:
    +            logging.error("Uncaught exception %s\n%r", self._request_summary(),
    +                          self.request, exc_info=e)
    +            self.send_error(500)
    +
    +    def _ui_module(self, name, module):
    +        def render(*args, **kwargs):
    +            if not hasattr(self, "_active_modules"):
    +                self._active_modules = {}
    +            if name not in self._active_modules:
    +                self._active_modules[name] = module(self)
    +            rendered = self._active_modules[name].render(*args, **kwargs)
    +            return rendered
    +        return render
    +
    +    def _ui_method(self, method):
    +        return lambda *args, **kwargs: method(self, *args, **kwargs)
    +
    +
    +def asynchronous(method):
    +    """Wrap request handler methods with this if they are asynchronous.
    +
    +    If this decorator is given, the response is not finished when the
    +    method returns. It is up to the request handler to call self.finish()
    +    to finish the HTTP request. Without this decorator, the request is
    +    automatically finished when the get() or post() method returns.
    +
    +       class MyRequestHandler(web.RequestHandler):
    +           @web.asynchronous
    +           def get(self):
    +              http = httpclient.AsyncHTTPClient()
    +              http.fetch("http://friendfeed.com/", self._on_download)
    +
    +           def _on_download(self, response):
    +              self.write("Downloaded!")
    +              self.finish()
    +
    +    """
    +    @functools.wraps(method)
    +    def wrapper(self, *args, **kwargs):
    +        if self.application._wsgi:
    +            raise Exception("@asynchronous is not supported for WSGI apps")
    +        self._auto_finish = False
    +        return method(self, *args, **kwargs)
    +    return wrapper
    +
    +
    +class Application(object):
    +    """A collection of request handlers that make up a web application.
    +
    +    Instances of this class are callable and can be passed directly to
    +    HTTPServer to serve the application:
    +
    +        application = web.Application([
    +            (r"/", MainPageHandler),
    +        ])
    +        http_server = httpserver.HTTPServer(application)
    +        http_server.listen(8080)
    +        ioloop.IOLoop.instance().start()
    +
    +    The constructor for this class takes in a list of (regexp, request_class)
    +    tuples. When we receive requests, we iterate over the list in order and
    +    instantiate an instance of the first request class whose regexp matches
    +    the request path.
    +
    +    Each tuple can contain an optional third element, which should be a
    +    dictionary if it is present. That dictionary is passed as keyword
    +    arguments to the contructor of the handler. This pattern is used
    +    for the StaticFileHandler below:
    +
    +        application = web.Application([
    +            (r"/static/(.*)", web.StaticFileHandler, {"path": "/var/www"}),
    +        ])
    +
    +    We support virtual hosts with the add_handlers method, which takes in
    +    a host regular expression as the first argument:
    +
    +        application.add_handlers(r"www\.myhost\.com", [
    +            (r"/article/([0-9]+)", ArticleHandler),
    +        ])
    +
    +    You can serve static files by sending the static_path setting as a
    +    keyword argument. We will serve those files from the /static/ URI,
    +    and we will serve /favicon.ico and /robots.txt from the same directory.
    +    """
    +    def __init__(self, handlers=None, default_host="", transforms=None,
    +                 **settings):
    +        if transforms is None:
    +            self.transforms = [ChunkedTransferEncoding]
    +        else:
    +            self.transforms = transforms
    +        self.handlers = []
    +        self.default_host = default_host
    +        self.settings = settings
    +        self.ui_modules = {}
    +        self.ui_methods = {}
    +        self._wsgi = False
    +        self._load_ui_modules(settings.get("ui_modules", {}))
    +        self._load_ui_methods(settings.get("ui_methods", {}))
    +        if self.settings.get("static_path"):
    +            path = self.settings["static_path"]
    +            handlers = list(handlers or [])
    +            handlers.extend([
    +                (r"/static/(.*)", StaticFileHandler, dict(path=path)),
    +                (r"/(favicon\.ico)", StaticFileHandler, dict(path=path)),
    +                (r"/(robots\.txt)", StaticFileHandler, dict(path=path)),
    +            ])
    +        if handlers: self.add_handlers(".*$", handlers)
    +
    +    def add_handlers(self, host_pattern, host_handlers):
    +        """Appends the given handlers to our handler list."""
    +        if not host_pattern.endswith("$"):
    +            host_pattern += "$"
    +        handlers = []
    +        self.handlers.append((re.compile(host_pattern), handlers))
    +
    +        for handler_tuple in host_handlers:
    +            assert len(handler_tuple) in (2, 3)
    +            pattern = handler_tuple[0]
    +            handler = handler_tuple[1]
    +            if len(handler_tuple) == 3:
    +                kwargs = handler_tuple[2]
    +            else:
    +                kwargs = {}
    +            if not pattern.endswith("$"):
    +                pattern += "$"
    +            handlers.append((re.compile(pattern), handler, kwargs))
    +
    +    def add_transform(self, transform_class):
    +        """Adds the given OutputTransform to our transform list."""
    +        self.transforms.append(transform_class)
    +
    +    def _get_host_handlers(self, request):
    +        host = request.host.lower().split(':')[0]
    +        for pattern, handlers in self.handlers:
    +            if pattern.match(host):
    +                return handlers
    +        # Look for default host if not behind load balancer (for debugging)
    +        if "X-Real-Ip" not in request.headers:
    +            for pattern, handlers in self.handlers:
    +                if pattern.match(self.default_host):
    +                    return handlers
    +        return None
    +
    +    def _load_ui_methods(self, methods):
    +        if type(methods) is types.ModuleType:
    +            self._load_ui_methods(dict((n, getattr(methods, n))
    +                                       for n in dir(methods)))
    +        elif isinstance(methods, list):
    +            for m in list: self._load_ui_methods(m)
    +        else:
    +            for name, fn in methods.iteritems():
    +                if not name.startswith("_") and hasattr(fn, "__call__") \
    +                   and name[0].lower() == name[0]:
    +                    self.ui_methods[name] = fn
    +
    +    def _load_ui_modules(self, modules):
    +        if type(modules) is types.ModuleType:
    +            self._load_ui_modules(dict((n, getattr(modules, n))
    +                                       for n in dir(modules)))
    +        elif isinstance(modules, list):
    +            for m in list: self._load_ui_modules(m)
    +        else:
    +            assert isinstance(modules, dict)
    +            for name, cls in modules.iteritems():
    +                try:
    +                    if issubclass(cls, UIModule):
    +                        self.ui_modules[name] = cls
    +                except TypeError:
    +                    pass
    +
    +    def __call__(self, request):
    +        """Called by HTTPServer to execute the request."""
    +        transforms = [t(request) for t in self.transforms]
    +        handler = None
    +        args = []
    +        handlers = self._get_host_handlers(request)
    +        if not handlers: 
    +            handler = RedirectHandler(
    +                request, "http://" + self.default_host + "/")
    +        else:
    +            for pattern, handler_class, kwargs in handlers:
    +                match = pattern.match(request.path)
    +                if match:
    +                    handler = handler_class(self, request, **kwargs)
    +                    args = match.groups()
    +                    break
    +            if not handler:
    +                handler = ErrorHandler(self, request, 404)
    +
    +        # In debug mode, re-compile templates and reload static files on every
    +        # request so you don't need to restart to see changes
    +        if self.settings.get("debug"):
    +            RequestHandler._templates = None
    +            RequestHandler._static_hashes = {}
    +
    +        handler._execute(transforms, *args)
    +        return handler
    +
    +
    +class HTTPError(Exception):
    +    """An exception that will turn into an HTTP error response."""
    +    def __init__(self, status_code, log_message=None, *args):
    +        self.status_code = status_code
    +        self.log_message = log_message
    +        self.args = args
    +
    +    def __str__(self):
    +        message = "HTTP %d: %s" % (
    +            self.status_code, httplib.responses[self.status_code])
    +        if self.log_message:
    +            return message + " (" + (self.log_message % self.args) + ")"
    +        else:
    +            return message
    +
    +
    +class ErrorHandler(RequestHandler):
    +    """Generates an error response with status_code for all requests."""
    +    def __init__(self, application, request, status_code):
    +        RequestHandler.__init__(self, application, request)
    +        self.set_status(status_code)
    +
    +    def prepare(self):
    +        raise HTTPError(self._status_code)
    +
    +
    +class RedirectHandler(RequestHandler):
    +    """Redirects the client to the given URL for all GET requests.
    +
    +    You should provide the keyword argument "url" to the handler, e.g.:
    +
    +        application = web.Application([
    +            (r"/oldpath", web.RedirectHandler, {"url": "/newpath"}),
    +        ])
    +    """
    +    def __init__(self, application, request, url, permanent=True):
    +        RequestHandler.__init__(self, application, request)
    +        self._url = url
    +        self._permanent = permanent
    +        
    +    def get(self):
    +        self.redirect(self._url, permanent=self._permanent)
    +
    +
    +class StaticFileHandler(RequestHandler):
    +    """A simple handler that can serve static content from a directory.
    +
    +    To map a path to this handler for a static data directory /var/www,
    +    you would add a line to your application like:
    +
    +        application = web.Application([
    +            (r"/static/(.*)", web.StaticFileHandler, {"path": "/var/www"}),
    +        ])
    +
    +    The local root directory of the content should be passed as the "path"
    +    argument to the handler.
    +
    +    To support aggressive browser caching, if the argument "v" is given
    +    with the path, we set an infinite HTTP expiration header. So, if you
    +    want browsers to cache a file indefinitely, send them to, e.g.,
    +    /static/images/myimage.png?v=xxx.
    +    """
    +    def __init__(self, application, request, path):
    +        RequestHandler.__init__(self, application, request)
    +        self.root = os.path.abspath(path) + "/"
    +
    +    def head(self, path):
    +        self.get(path, include_body=False)
    +
    +    def get(self, path, include_body=True):
    +        abspath = os.path.abspath(os.path.join(self.root, path))
    +        if not abspath.startswith(self.root):
    +            raise HTTPError(403, "%s is not in root static directory", path)
    +        if not os.path.exists(abspath):
    +            raise HTTPError(404)
    +        if not os.path.isfile(abspath):
    +            raise HTTPError(403, "%s is not a file", path)
    +
    +        # Check the If-Modified-Since, and don't send the result if the
    +        # content has not been modified
    +        stat_result = os.stat(abspath)
    +        modified = datetime.datetime.fromtimestamp(stat_result[stat.ST_MTIME])
    +        ims_value = self.request.headers.get("If-Modified-Since")
    +        if ims_value is not None:
    +            date_tuple = email.utils.parsedate(ims_value)
    +            if_since = datetime.datetime.fromtimestamp(time.mktime(date_tuple))
    +            if if_since >= modified:
    +                self.set_status(304)
    +                return
    +
    +        self.set_header("Last-Modified", modified)
    +        self.set_header("Content-Length", stat_result[stat.ST_SIZE])
    +        if "v" in self.request.arguments:
    +            self.set_header("Expires", datetime.datetime.utcnow() + \
    +                                       datetime.timedelta(days=365*10))
    +            self.set_header("Cache-Control", "max-age=" + str(86400*365*10))
    +        else:
    +            self.set_header("Cache-Control", "public")
    +        mime_type, encoding = mimetypes.guess_type(abspath)
    +        if mime_type:
    +            self.set_header("Content-Type", mime_type)
    +
    +        if not include_body:
    +            return
    +        file = open(abspath, "r")
    +        try:
    +            self.write(file.read())
    +        finally:
    +            file.close()
    +
    +
    +class OutputTransform(object):
    +    """A transform modifies the result of an HTTP request (e.g., GZip encoding)
    +
    +    A new transform instance is created for every request. The sequence of
    +    calls is:
    +
    +         t = Transform(request) # Constructor
    +         # Request processing
    +         headers = t.transform_headers(headers)
    +         # Write headers
    +         for block in result:
    +             write(t.transform_chunk(block)
    +         write(t.footer())
    +
    +    See the ChunkedTransferEncoding example below if you want to implement a
    +    new Transform.
    +    """
    +    def __init__(self, request):
    +        pass
    +
    +    def transform_headers(self, headers):
    +        return headers
    +
    +    def transform_chunk(self, block):
    +        return block
    +
    +    def footer(self):
    +        return None
    +
    +
    +class ChunkedTransferEncoding(OutputTransform):
    +    """Applies the chunked transfer encoding to the response.
    +
    +    See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
    +    """
    +    def __init__(self, request):
    +        self._chunking = request.supports_http_1_1()
    +
    +    def transform_headers(self, headers):
    +        if self._chunking:
    +            # No need to chunk the output if a Content-Length is specified
    +            if "Content-Length" in headers or "Transfer-Encoding" in headers:
    +                self._chunking = False
    +            else:
    +                headers["Transfer-Encoding"] = "chunked"
    +        return headers
    +        
    +    def transform_chunk(self, block):
    +        if self._chunking:
    +            return ("%x" % len(block)) + "\r\n" + block + "\r\n"
    +        else:
    +            return block
    +
    +    def footer(self):
    +        if self._chunking:
    +            return "0\r\n\r\n"
    +        else:
    +            return None
    +
    +
    +def authenticated(method):
    +    """Decorate methods with this to require that the user be logged in."""
    +    @functools.wraps(method)
    +    def wrapper(self, *args, **kwargs):
    +        if not self.current_user:
    +            if self.request.method == "GET":
    +                url = self.get_login_url()
    +                if "?" not in url:
    +                    url += "?" + urllib.urlencode(dict(next=self.request.uri))
    +                self.redirect(url)
    +                return
    +            raise HTTPError(403)
    +        return method(self, *args, **kwargs)
    +    return wrapper
    +
    +
    +class UIModule(object):
    +    """A UI re-usable, modular unit on a page.
    +
    +    UI modules often execute additional queries, and they can include
    +    additional CSS and JavaScript that will be included in the output
    +    page, which is automatically inserted on page render.
    +    """
    +    def __init__(self, handler):
    +        self.handler = handler
    +        self.request = handler.request
    +        self.ui = handler.ui
    +        self.current_user = handler.current_user
    +        self.locale = handler.locale
    +
    +    def render(self, *args, **kwargs):
    +        raise NotImplementedError()
    +
    +    def embedded_javascript(self):
    +        """Returns a JavaScript string that will be embedded in the page."""
    +        return None
    +
    +    def javascript_files(self):
    +        """Returns a list of JavaScript files required by this module."""
    +        return None
    +
    +    def embedded_css(self):
    +        """Returns a CSS string that will be embedded in the page."""
    +        return None
    +
    +    def css_files(self):
    +        """Returns a list of JavaScript files required by this module."""
    +        return None
    +
    +    def html_head(self):
    +        """Returns a CSS string that will be put in the  element"""
    +        return None
    +
    +    def render_string(self, path, **kwargs):
    +        return self.handler.render_string(path, **kwargs)
    +
    +
    +def _utf8(s):
    +    if isinstance(s, unicode):
    +        return s.encode("utf-8")
    +    assert isinstance(s, str)
    +    return s
    +
    +
    +def _unicode(s):
    +    if isinstance(s, str):
    +        try:
    +            return s.decode("utf-8")
    +        except UnicodeDecodeError:
    +            raise HTTPError(400, "Non-utf8 argument")
    +    assert isinstance(s, unicode)
    +    return s
    +
    +
    +class _O(dict):
    +    """Makes a dictionary behave like an object."""
    +    def __getattr__(self, name):
    +        try:
    +            return self[name]
    +        except KeyError:
    +            raise AttributeError(name)
    +
    +    def __setattr__(self, name, value):
    +        self[name] = value
    diff --git a/tornado/wsgi.py b/tornado/wsgi.py
    new file mode 100644
    index 000000000..714b08ed4
    --- /dev/null
    +++ b/tornado/wsgi.py
    @@ -0,0 +1,190 @@
    +#!/usr/bin/env python
    +#
    +# Copyright 2009 Facebook
    +#
    +# Licensed under the Apache License, Version 2.0 (the "License"); you may
    +# not use this file except in compliance with the License. You may obtain
    +# a copy of the License at
    +#
    +#     http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
    +# License for the specific language governing permissions and limitations
    +# under the License.
    +
    +"""WSGI support for the Tornado web framework.
    +
    +We export WSGIApplication, which is very similar to web.Application, except
    +no asynchronous methods are supported (since WSGI does not support
    +non-blocking requests properly). If you call self.flush() or other
    +asynchronous methods in your request handlers running in a WSGIApplication,
    +we throw an exception.
    +
    +Example usage:
    +
    +    import tornado.web
    +    import tornado.wsgi
    +    import wsgiref.simple_server
    +
    +    class MainHandler(tornado.web.RequestHandler):
    +        def get(self):
    +            self.write("Hello, world")
    +
    +    if __name__ == "__main__":
    +        application = tornado.wsgi.WSGIApplication([
    +            (r"/", MainHandler),
    +        ])
    +        server = wsgiref.simple_server.make_server('', 8888, application)
    +        server.serve_forever()
    +
    +See the 'appengine' demo for an example of using this module to run
    +a Tornado app on Google AppEngine.
    +
    +Since no asynchronous methods are available for WSGI applications, the
    +httpclient and auth modules are both not available for WSGI applications.
    +"""
    +
    +import cgi
    +import httplib
    +import logging
    +import time
    +import urllib
    +import web
    +
    +
    +class WSGIApplication(web.Application):
    +    """A WSGI-equivalent of web.Application.
    +
    +    We support the same interface, but handlers running in a WSGIApplication
    +    do not support flush() or asynchronous methods.
    +    """
    +    def __init__(self, handlers=None, default_host="", **settings):
    +        web.Application.__init__(self, handlers, default_host, transforms=[],
    +                                 **settings)
    +        self._wsgi = True
    +
    +    def __call__(self, environ, start_response):
    +        handler = web.Application.__call__(self, HTTPRequest(environ))
    +        assert handler._finished
    +        status = str(handler._status_code) + " " + \
    +            httplib.responses[handler._status_code]
    +        headers = handler._headers.items()
    +        for cookie_dict in getattr(handler, "_new_cookies", []):
    +            for cookie in cookie_dict.values():
    +                headers.append(("Set-Cookie", cookie.OutputString(None)))
    +        start_response(status, headers)
    +        return handler._write_buffer
    +
    +
    +class HTTPRequest(object):
    +    """Mimics httpserver.HTTPRequest for WSGI applications."""
    +    def __init__(self, environ):
    +        """Parses the given WSGI environ to construct the request."""
    +        self.method = environ["REQUEST_METHOD"]
    +        self.path = urllib.quote(environ.get("SCRIPT_NAME", ""))
    +        self.path += urllib.quote(environ.get("PATH_INFO", ""))
    +        self.uri = self.path
    +        self.arguments = {}
    +        self.query = environ.get("QUERY_STRING", "")
    +        if self.query:
    +            self.uri += "?" + self.query
    +            arguments = cgi.parse_qs(self.query)
    +            for name, values in arguments.iteritems():
    +                values = [v for v in values if v]
    +                if values: self.arguments[name] = values
    +        self.version = "HTTP/1.1"
    +        self.headers = HTTPHeaders()
    +        if environ.get("CONTENT_TYPE"):
    +            self.headers["Content-Type"] = environ["CONTENT_TYPE"]
    +        if environ.get("CONTENT_LENGTH"):
    +            self.headers["Content-Length"] = int(environ["CONTENT_LENGTH"])
    +        for key in environ:
    +            if key.startswith("HTTP_"):
    +                self.headers[key[5:].replace("_", "-")] = environ[key]
    +        if self.headers.get("Content-Length"):
    +            self.body = environ["wsgi.input"].read()
    +        else:
    +            self.body = ""
    +        self.protocol = environ["wsgi.url_scheme"]
    +        self.remote_ip = environ.get("REMOTE_ADDR", "")
    +        if environ.get("HTTP_HOST"):
    +            self.host = environ["HTTP_HOST"]
    +        else:
    +            self.host = environ["SERVER_NAME"]
    +
    +        # Parse request body
    +        self.files = {}
    +        content_type = self.headers.get("Content-Type", "")
    +        if content_type.startswith("application/x-www-form-urlencoded"):
    +            for name, values in cgi.parse_qs(self.body).iteritems():
    +                self.arguments.setdefault(name, []).extend(values)
    +        elif content_type.startswith("multipart/form-data"):
    +            boundary = content_type[30:]
    +            if boundary: self._parse_mime_body(boundary, data)
    +
    +        self._start_time = time.time()
    +        self._finish_time = None
    +
    +    def supports_http_1_1(self):
    +        """Returns True if this request supports HTTP/1.1 semantics"""
    +        return self.version == "HTTP/1.1"
    +
    +    def full_url(self):
    +        """Reconstructs the full URL for this request."""
    +        return self.protocol + "://" + self.host + self.uri
    +
    +    def request_time(self):
    +        """Returns the amount of time it took for this request to execute."""
    +        if self._finish_time is None:
    +            return time.time() - self._start_time
    +        else:
    +            return self._finish_time - self._start_time
    +
    +    def _parse_mime_body(self, boundary):
    +        if self.body.endswith("\r\n"):
    +            footer_length = len(boundary) + 6
    +        else:
    +            footer_length = len(boundary) + 4
    +        parts = self.body[:-footer_length].split("--" + boundary + "\r\n")
    +        for part in parts:
    +            if not part: continue
    +            eoh = part.find("\r\n\r\n")
    +            if eoh == -1:
    +                logging.warning("multipart/form-data missing headers")
    +                continue
    +            headers = HTTPHeaders.parse(part[:eoh])
    +            name_header = headers.get("Content-Disposition", "")
    +            if not name_header.startswith("form-data;") or \
    +               not part.endswith("\r\n"):
    +                logging.warning("Invalid multipart/form-data")
    +                continue
    +            value = part[eoh + 4:-2]
    +            name_values = {}
    +            for name_part in name_header[10:].split(";"):
    +                name, name_value = name_part.strip().split("=", 1)
    +                name_values[name] = name_value.strip('"').decode("utf-8")
    +            if not name_values.get("name"):
    +                logging.warning("multipart/form-data value missing name")
    +                continue
    +            name = name_values["name"]
    +            if name_values.get("filename"):
    +                ctype = headers.get("Content-Type", "application/unknown")
    +                self.files.setdefault(name, []).append(dict(
    +                    filename=name_values["filename"], body=value,
    +                    content_type=ctype))
    +            else:
    +                self.arguments.setdefault(name, []).append(value)
    +
    +
    +class HTTPHeaders(dict):
    +    """A dictionary that maintains Http-Header-Case for all keys."""
    +    def __setitem__(self, name, value):
    +        dict.__setitem__(self, self._normalize_name(name), value)
    +
    +    def __getitem__(self, name):
    +        return dict.__getitem__(self, self._normalize_name(name))
    +
    +    def _normalize_name(self, name):
    +        return intern("-".join([w.capitalize() for w in name.split("-")]))
    diff --git a/website/app.yaml b/website/app.yaml
    new file mode 100644
    index 000000000..13f6459af
    --- /dev/null
    +++ b/website/app.yaml
    @@ -0,0 +1,19 @@
    +application: python-tornado
    +version: 1
    +runtime: python
    +api_version: 1
    +
    +handlers:
    +- url: /static/
    +  static_dir: static
    +
    +- url: /robots\.txt
    +  static_files: static/robots.txt
    +  upload: static/robots.txt
    +
    +- url: /favicon\.ico
    +  static_files: static/favicon.ico
    +  upload: static/favicon.ico
    +
    +- url: /.*
    +  script: website.py
    diff --git a/website/index.yaml b/website/index.yaml
    new file mode 100644
    index 000000000..e69de29bb
    diff --git a/website/markdown/__init__.py b/website/markdown/__init__.py
    new file mode 100644
    index 000000000..0d1c50497
    --- /dev/null
    +++ b/website/markdown/__init__.py
    @@ -0,0 +1,603 @@
    +"""
    +Python Markdown
    +===============
    +
    +Python Markdown converts Markdown to HTML and can be used as a library or
    +called from the command line.
    +
    +## Basic usage as a module:
    +
    +    import markdown
    +    md = Markdown()
    +    html = md.convert(your_text_string)
    +
    +## Basic use from the command line:
    +
    +    python markdown.py source.txt > destination.html
    +
    +Run "python markdown.py --help" to see more options.
    +
    +## Extensions
    +
    +See  for more
    +information and instructions on how to extend the functionality of
    +Python Markdown.  Read that before you try modifying this file.
    +
    +## Authors and License
    +
    +Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
    +maintained  by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
    +Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
    +
    +Contact: markdown@freewisdom.org
    +
    +Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
    +Copyright 200? Django Software Foundation (OrderedDict implementation)
    +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
    +Copyright 2004 Manfred Stienstra (the original version)
    +
    +License: BSD (see docs/LICENSE for details).
    +"""
    +
    +version = "2.0"
    +version_info = (2,0,0, "Final")
    +
    +import re
    +import codecs
    +import sys
    +import warnings
    +import logging
    +from logging import DEBUG, INFO, WARN, ERROR, CRITICAL
    +
    +
    +"""
    +CONSTANTS
    +=============================================================================
    +"""
    +
    +"""
    +Constants you might want to modify
    +-----------------------------------------------------------------------------
    +"""
    +
    +# default logging level for command-line use
    +COMMAND_LINE_LOGGING_LEVEL = CRITICAL
    +TAB_LENGTH = 4               # expand tabs to this many spaces
    +ENABLE_ATTRIBUTES = True     # @id = xyz -> <... id="xyz">
    +SMART_EMPHASIS = True        # this_or_that does not become thisorthat
    +DEFAULT_OUTPUT_FORMAT = 'xhtml1'     # xhtml or html4 output
    +HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
    +BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
    +                                  "|script|noscript|form|fieldset|iframe|math"
    +                                  "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
    +                                  "|tr|th|td")
    +DOC_TAG = "div"     # Element used to wrap document - later removed
    +
    +# Placeholders
    +STX = u'\u0002'  # Use STX ("Start of text") for start-of-placeholder
    +ETX = u'\u0003'  # Use ETX ("End of text") for end-of-placeholder
    +INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
    +INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
    +AMP_SUBSTITUTE = STX+"amp"+ETX
    +
    +
    +"""
    +Constants you probably do not need to change
    +-----------------------------------------------------------------------------
    +"""
    +
    +RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
    +                     # Hebrew (0590-05FF), Arabic (0600-06FF),
    +                     # Syriac (0700-074F), Arabic supplement (0750-077F),
    +                     # Thaana (0780-07BF), Nko (07C0-07FF).
    +                    (u'\u2D30', u'\u2D7F'), # Tifinagh
    +                    )
    +
    +
    +"""
    +AUXILIARY GLOBAL FUNCTIONS
    +=============================================================================
    +"""
    +
    +
    +def message(level, text):
    +    """ A wrapper method for logging debug messages. """
    +    logger =  logging.getLogger('MARKDOWN')
    +    if logger.handlers:
    +        # The logger is configured
    +        logger.log(level, text)
    +        if level > WARN:
    +            sys.exit(0)
    +    elif level > WARN:
    +        raise MarkdownException, text
    +    else:
    +        warnings.warn(text, MarkdownWarning)
    +
    +
    +def isBlockLevel(tag):
    +    """Check if the tag is a block level HTML tag."""
    +    return BLOCK_LEVEL_ELEMENTS.match(tag)
    +
    +"""
    +MISC AUXILIARY CLASSES
    +=============================================================================
    +"""
    +
    +class AtomicString(unicode):
    +    """A string which should not be further processed."""
    +    pass
    +
    +
    +class MarkdownException(Exception):
    +    """ A Markdown Exception. """
    +    pass
    +
    +
    +class MarkdownWarning(Warning):
    +    """ A Markdown Warning. """
    +    pass
    +
    +
    +"""
    +OVERALL DESIGN
    +=============================================================================
    +
    +Markdown processing takes place in four steps:
    +
    +1. A bunch of "preprocessors" munge the input text.
    +2. BlockParser() parses the high-level structural elements of the
    +   pre-processed text into an ElementTree.
    +3. A bunch of "treeprocessors" are run against the ElementTree. One such
    +   treeprocessor runs InlinePatterns against the ElementTree, detecting inline
    +   markup.
    +4. Some post-processors are run against the text after the ElementTree has
    +   been serialized into text.
    +5. The output is written to a string.
    +
    +Those steps are put together by the Markdown() class.
    +
    +"""
    +
    +import preprocessors
    +import blockprocessors
    +import treeprocessors
    +import inlinepatterns
    +import postprocessors
    +import blockparser
    +import etree_loader
    +import odict
    +
    +# Extensions should use "markdown.etree" instead of "etree" (or do `from
    +# markdown import etree`).  Do not import it by yourself.
    +
    +etree = etree_loader.importETree()
    +
    +# Adds the ability to output html4
    +import html4
    +
    +
    +class Markdown:
    +    """Convert Markdown to HTML."""
    +
    +    def __init__(self,
    +                 extensions=[],
    +                 extension_configs={},
    +                 safe_mode = False, 
    +                 output_format=DEFAULT_OUTPUT_FORMAT):
    +        """
    +        Creates a new Markdown instance.
    +
    +        Keyword arguments:
    +
    +        * extensions: A list of extensions.
    +           If they are of type string, the module mdx_name.py will be loaded.
    +           If they are a subclass of markdown.Extension, they will be used
    +           as-is.
    +        * extension-configs: Configuration setting for extensions.
    +        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
    +        * output_format: Format of output. Supported formats are:
    +            * "xhtml1": Outputs XHTML 1.x. Default.
    +            * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
    +            * "html4": Outputs HTML 4
    +            * "html": Outputs latest supported version of HTML (currently HTML 4).
    +            Note that it is suggested that the more specific formats ("xhtml1" 
    +            and "html4") be used as "xhtml" or "html" may change in the future
    +            if it makes sense at that time. 
    +
    +        """
    +        
    +        self.safeMode = safe_mode
    +        self.registeredExtensions = []
    +        self.docType = ""
    +        self.stripTopLevelTags = True
    +
    +        # Preprocessors
    +        self.preprocessors = odict.OrderedDict()
    +        self.preprocessors["html_block"] = \
    +                preprocessors.HtmlBlockPreprocessor(self)
    +        self.preprocessors["reference"] = \
    +                preprocessors.ReferencePreprocessor(self)
    +        # footnote preprocessor will be inserted with "amp_substitute"
    +
    +        # Map format keys to serializers
    +        self.output_formats = {
    +            'html'  : html4.to_html_string, 
    +            'html4' : html4.to_html_string,
    +            'xhtml' : etree.tostring, 
    +            'xhtml1': etree.tostring,
    +        }
    +
    +        self.references = {}
    +        self.htmlStash = preprocessors.HtmlStash()
    +        self.registerExtensions(extensions = extensions,
    +                                configs = extension_configs)
    +        self.set_output_format(output_format)
    +        self.reset()
    +
    +    def registerExtensions(self, extensions, configs):
    +        """
    +        Register extensions with this instance of Markdown.
    +
    +        Keyword aurguments:
    +
    +        * extensions: A list of extensions, which can either
    +           be strings or objects.  See the docstring on Markdown.
    +        * configs: A dictionary mapping module names to config options.
    +
    +        """
    +        for ext in extensions:
    +            if isinstance(ext, basestring):
    +                ext = load_extension(ext, configs.get(ext, []))
    +            try:
    +                ext.extendMarkdown(self, globals())
    +            except AttributeError:
    +                message(ERROR, "Incorrect type! Extension '%s' is "
    +                               "neither a string or an Extension." %(repr(ext)))
    +            
    +
    +    def registerExtension(self, extension):
    +        """ This gets called by the extension """
    +        self.registeredExtensions.append(extension)
    +
    +    def reset(self):
    +        """
    +        Resets all state variables so that we can start with a new text.
    +        """
    +        self.htmlStash.reset()
    +        self.references.clear()
    +
    +        for extension in self.registeredExtensions:
    +            extension.reset()
    +
    +    def set_output_format(self, format):
    +        """ Set the output format for the class instance. """
    +        try:
    +            self.serializer = self.output_formats[format.lower()]
    +        except KeyError:
    +            message(CRITICAL, 'Invalid Output Format: "%s". Use one of %s.' \
    +                               % (format, self.output_formats.keys()))
    +
    +    def convert(self, source):
    +        """
    +        Convert markdown to serialized XHTML or HTML.
    +
    +        Keyword arguments:
    +
    +        * source: Source text as a Unicode string.
    +
    +        """
    +
    +        # Fixup the source text
    +        if not source.strip():
    +            return u""  # a blank unicode string
    +        try:
    +            source = unicode(source)
    +        except UnicodeDecodeError:
    +            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
    +            return u""
    +
    +        source = source.replace(STX, "").replace(ETX, "")
    +        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
    +        source = re.sub(r'\n\s+\n', '\n\n', source)
    +        source = source.expandtabs(TAB_LENGTH)
    +
    +        # Split into lines and run the line preprocessors.
    +        self.lines = source.split("\n")
    +        for prep in self.preprocessors.values():
    +            self.lines = prep.run(self.lines)
    +
    +        # Parse the high-level elements.
    +        root = self.parser.parseDocument(self.lines).getroot()
    +
    +        # Run the tree-processors
    +        for treeprocessor in self.treeprocessors.values():
    +            newRoot = treeprocessor.run(root)
    +            if newRoot:
    +                root = newRoot
    +
    +        # Serialize _properly_.  Strip top-level tags.
    +        output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf8"))
    +        if self.stripTopLevelTags:
    +            start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2
    +            end = output.rindex(''%DOC_TAG)
    +            output = output[start:end].strip()
    +
    +        # Run the text post-processors
    +        for pp in self.postprocessors.values():
    +            output = pp.run(output)
    +
    +        return output.strip()
    +
    +    def convertFile(self, input=None, output=None, encoding=None):
    +        """Converts a markdown file and returns the HTML as a unicode string.
    +
    +        Decodes the file using the provided encoding (defaults to utf-8),
    +        passes the file content to markdown, and outputs the html to either
    +        the provided stream or the file with provided name, using the same
    +        encoding as the source file.
    +
    +        **Note:** This is the only place that decoding and encoding of unicode
    +        takes place in Python-Markdown.  (All other code is unicode-in /
    +        unicode-out.)
    +
    +        Keyword arguments:
    +
    +        * input: Name of source text file.
    +        * output: Name of output file. Writes to stdout if `None`.
    +        * encoding: Encoding of input and output files. Defaults to utf-8.
    +
    +        """
    +
    +        encoding = encoding or "utf-8"
    +
    +        # Read the source
    +        input_file = codecs.open(input, mode="r", encoding=encoding)
    +        text = input_file.read()
    +        input_file.close()
    +        text = text.lstrip(u'\ufeff') # remove the byte-order mark
    +
    +        # Convert
    +        html = self.convert(text)
    +
    +        # Write to file or stdout
    +        if isinstance(output, (str, unicode)):
    +            output_file = codecs.open(output, "w", encoding=encoding)
    +            output_file.write(html)
    +            output_file.close()
    +        else:
    +            output.write(html.encode(encoding))
    +
    +
    +"""
    +Extensions
    +-----------------------------------------------------------------------------
    +"""
    +
    +class Extension:
    +    """ Base class for extensions to subclass. """
    +    def __init__(self, configs = {}):
    +        """Create an instance of an Extention.
    +
    +        Keyword arguments:
    +
    +        * configs: A dict of configuration setting used by an Extension.
    +        """
    +        self.config = configs
    +
    +    def getConfig(self, key):
    +        """ Return a setting for the given key or an empty string. """
    +        if key in self.config:
    +            return self.config[key][0]
    +        else:
    +            return ""
    +
    +    def getConfigInfo(self):
    +        """ Return all config settings as a list of tuples. """
    +        return [(key, self.config[key][1]) for key in self.config.keys()]
    +
    +    def setConfig(self, key, value):
    +        """ Set a config setting for `key` with the given `value`. """
    +        self.config[key][0] = value
    +
    +    def extendMarkdown(self, md, md_globals):
    +        """
    +        Add the various proccesors and patterns to the Markdown Instance.
    +
    +        This method must be overriden by every extension.
    +
    +        Keyword arguments:
    +
    +        * md: The Markdown instance.
    +
    +        * md_globals: Global variables in the markdown module namespace.
    +
    +        """
    +        pass
    +
    +
    +def load_extension(ext_name, configs = []):
    +    """Load extension by name, then return the module.
    +
    +    The extension name may contain arguments as part of the string in the
    +    following format: "extname(key1=value1,key2=value2)"
    +
    +    """
    +
    +    # Parse extensions config params (ignore the order)
    +    configs = dict(configs)
    +    pos = ext_name.find("(") # find the first "("
    +    if pos > 0:
    +        ext_args = ext_name[pos+1:-1]
    +        ext_name = ext_name[:pos]
    +        pairs = [x.split("=") for x in ext_args.split(",")]
    +        configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
    +
    +    # Setup the module names
    +    ext_module = 'markdown.extensions'
    +    module_name_new_style = '.'.join([ext_module, ext_name])
    +    module_name_old_style = '_'.join(['mdx', ext_name])
    +
    +    # Try loading the extention first from one place, then another
    +    try: # New style (markdown.extensons.)
    +        module = __import__(module_name_new_style, {}, {}, [ext_module])
    +    except ImportError:
    +        try: # Old style (mdx.)
    +            module = __import__(module_name_old_style)
    +        except ImportError:
    +           message(WARN, "Failed loading extension '%s' from '%s' or '%s'"
    +               % (ext_name, module_name_new_style, module_name_old_style))
    +           # Return None so we don't try to initiate none-existant extension
    +           return None
    +
    +    # If the module is loaded successfully, we expect it to define a
    +    # function called makeExtension()
    +    try:
    +        return module.makeExtension(configs.items())
    +    except AttributeError:
    +        message(CRITICAL, "Failed to initiate extension '%s'" % ext_name)
    +
    +
    +def load_extensions(ext_names):
    +    """Loads multiple extensions"""
    +    extensions = []
    +    for ext_name in ext_names:
    +        extension = load_extension(ext_name)
    +        if extension:
    +            extensions.append(extension)
    +    return extensions
    +
    +
    +"""
    +EXPORTED FUNCTIONS
    +=============================================================================
    +
    +Those are the two functions we really mean to export: markdown() and
    +markdownFromFile().
    +"""
    +
    +def markdown(text,
    +             extensions = [],
    +             safe_mode = False,
    +             output_format = DEFAULT_OUTPUT_FORMAT):
    +    """Convert a markdown string to HTML and return HTML as a unicode string.
    +
    +    This is a shortcut function for `Markdown` class to cover the most
    +    basic use case.  It initializes an instance of Markdown, loads the
    +    necessary extensions and runs the parser on the given text.
    +
    +    Keyword arguments:
    +
    +    * text: Markdown formatted text as Unicode or ASCII string.
    +    * extensions: A list of extensions or extension names (may contain config args).
    +    * safe_mode: Disallow raw html.  One of "remove", "replace" or "escape".
    +    * output_format: Format of output. Supported formats are:
    +        * "xhtml1": Outputs XHTML 1.x. Default.
    +        * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
    +        * "html4": Outputs HTML 4
    +        * "html": Outputs latest supported version of HTML (currently HTML 4).
    +        Note that it is suggested that the more specific formats ("xhtml1" 
    +        and "html4") be used as "xhtml" or "html" may change in the future
    +        if it makes sense at that time. 
    +
    +    Returns: An HTML document as a string.
    +
    +    """
    +    md = Markdown(extensions=load_extensions(extensions),
    +                  safe_mode=safe_mode, 
    +                  output_format=output_format)
    +    return md.convert(text)
    +
    +
    +def markdownFromFile(input = None,
    +                     output = None,
    +                     extensions = [],
    +                     encoding = None,
    +                     safe_mode = False,
    +                     output_format = DEFAULT_OUTPUT_FORMAT):
    +    """Read markdown code from a file and write it to a file or a stream."""
    +    md = Markdown(extensions=load_extensions(extensions), 
    +                  safe_mode=safe_mode,
    +                  output_format=output_format)
    +    md.convertFile(input, output, encoding)
    +
    +
    +
    diff --git a/website/markdown/blockparser.py b/website/markdown/blockparser.py
    new file mode 100644
    index 000000000..e18b33848
    --- /dev/null
    +++ b/website/markdown/blockparser.py
    @@ -0,0 +1,95 @@
    +
    +import markdown
    +
    +class State(list):
    +    """ Track the current and nested state of the parser. 
    +    
    +    This utility class is used to track the state of the BlockParser and 
    +    support multiple levels if nesting. It's just a simple API wrapped around
    +    a list. Each time a state is set, that state is appended to the end of the
    +    list. Each time a state is reset, that state is removed from the end of
    +    the list.
    +
    +    Therefore, each time a state is set for a nested block, that state must be 
    +    reset when we back out of that level of nesting or the state could be
    +    corrupted.
    +
    +    While all the methods of a list object are available, only the three
    +    defined below need be used.
    +
    +    """
    +
    +    def set(self, state):
    +        """ Set a new state. """
    +        self.append(state)
    +
    +    def reset(self):
    +        """ Step back one step in nested state. """
    +        self.pop()
    +
    +    def isstate(self, state):
    +        """ Test that top (current) level is of given state. """
    +        if len(self):
    +            return self[-1] == state
    +        else:
    +            return False
    +
    +class BlockParser:
    +    """ Parse Markdown blocks into an ElementTree object. 
    +    
    +    A wrapper class that stitches the various BlockProcessors together,
    +    looping through them and creating an ElementTree object.
    +    """
    +
    +    def __init__(self):
    +        self.blockprocessors = markdown.odict.OrderedDict()
    +        self.state = State()
    +
    +    def parseDocument(self, lines):
    +        """ Parse a markdown document into an ElementTree. 
    +        
    +        Given a list of lines, an ElementTree object (not just a parent Element)
    +        is created and the root element is passed to the parser as the parent.
    +        The ElementTree object is returned.
    +        
    +        This should only be called on an entire document, not pieces.
    +
    +        """
    +        # Create a ElementTree from the lines
    +        self.root = markdown.etree.Element(markdown.DOC_TAG)
    +        self.parseChunk(self.root, '\n'.join(lines))
    +        return markdown.etree.ElementTree(self.root)
    +
    +    def parseChunk(self, parent, text):
    +        """ Parse a chunk of markdown text and attach to given etree node. 
    +        
    +        While the ``text`` argument is generally assumed to contain multiple
    +        blocks which will be split on blank lines, it could contain only one
    +        block. Generally, this method would be called by extensions when
    +        block parsing is required. 
    +        
    +        The ``parent`` etree Element passed in is altered in place. 
    +        Nothing is returned.
    +
    +        """
    +        self.parseBlocks(parent, text.split('\n\n'))
    +
    +    def parseBlocks(self, parent, blocks):
    +        """ Process blocks of markdown text and attach to given etree node. 
    +        
    +        Given a list of ``blocks``, each blockprocessor is stepped through
    +        until there are no blocks left. While an extension could potentially
    +        call this method directly, it's generally expected to be used internally.
    +
    +        This is a public method as an extension may need to add/alter additional
    +        BlockProcessors which call this method to recursively parse a nested
    +        block.
    +
    +        """
    +        while blocks:
    +           for processor in self.blockprocessors.values():
    +               if processor.test(parent, blocks[0]):
    +                   processor.run(parent, blocks)
    +                   break
    +
    +
    diff --git a/website/markdown/blockprocessors.py b/website/markdown/blockprocessors.py
    new file mode 100644
    index 000000000..79f4db93b
    --- /dev/null
    +++ b/website/markdown/blockprocessors.py
    @@ -0,0 +1,460 @@
    +"""
    +CORE MARKDOWN BLOCKPARSER
    +=============================================================================
    +
    +This parser handles basic parsing of Markdown blocks.  It doesn't concern itself
    +with inline elements such as **bold** or *italics*, but rather just catches 
    +blocks, lists, quotes, etc.
    +
    +The BlockParser is made up of a bunch of BlockProssors, each handling a 
    +different type of block. Extensions may add/replace/remove BlockProcessors
    +as they need to alter how markdown blocks are parsed.
    +
    +"""
    +
    +import re
    +import markdown
    +
    +class BlockProcessor:
    +    """ Base class for block processors. 
    +    
    +    Each subclass will provide the methods below to work with the source and
    +    tree. Each processor will need to define it's own ``test`` and ``run``
    +    methods. The ``test`` method should return True or False, to indicate
    +    whether the current block should be processed by this processor. If the
    +    test passes, the parser will call the processors ``run`` method.
    +
    +    """
    +
    +    def __init__(self, parser=None):
    +        self.parser = parser
    +
    +    def lastChild(self, parent):
    +        """ Return the last child of an etree element. """
    +        if len(parent):
    +            return parent[-1]
    +        else:
    +            return None
    +
    +    def detab(self, text):
    +        """ Remove a tab from the front of each line of the given text. """
    +        newtext = []
    +        lines = text.split('\n')
    +        for line in lines:
    +            if line.startswith(' '*markdown.TAB_LENGTH):
    +                newtext.append(line[markdown.TAB_LENGTH:])
    +            elif not line.strip():
    +                newtext.append('')
    +            else:
    +                break
    +        return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
    +
    +    def looseDetab(self, text, level=1):
    +        """ Remove a tab from front of lines but allowing dedented lines. """
    +        lines = text.split('\n')
    +        for i in range(len(lines)):
    +            if lines[i].startswith(' '*markdown.TAB_LENGTH*level):
    +                lines[i] = lines[i][markdown.TAB_LENGTH*level:]
    +        return '\n'.join(lines)
    +
    +    def test(self, parent, block):
    +        """ Test for block type. Must be overridden by subclasses. 
    +        
    +        As the parser loops through processors, it will call the ``test`` method
    +        on each to determine if the given block of text is of that type. This
    +        method must return a boolean ``True`` or ``False``. The actual method of
    +        testing is left to the needs of that particular block type. It could 
    +        be as simple as ``block.startswith(some_string)`` or a complex regular
    +        expression. As the block type may be different depending on the parent
    +        of the block (i.e. inside a list), the parent etree element is also 
    +        provided and may be used as part of the test.
    +
    +        Keywords:
    +        
    +        * ``parent``: A etree element which will be the parent of the block.
    +        * ``block``: A block of text from the source which has been split at 
    +            blank lines.
    +        """
    +        pass
    +
    +    def run(self, parent, blocks):
    +        """ Run processor. Must be overridden by subclasses. 
    +        
    +        When the parser determines the appropriate type of a block, the parser
    +        will call the corresponding processor's ``run`` method. This method
    +        should parse the individual lines of the block and append them to
    +        the etree. 
    +
    +        Note that both the ``parent`` and ``etree`` keywords are pointers
    +        to instances of the objects which should be edited in place. Each
    +        processor must make changes to the existing objects as there is no
    +        mechanism to return new/different objects to replace them.
    +
    +        This means that this method should be adding SubElements or adding text
    +        to the parent, and should remove (``pop``) or add (``insert``) items to
    +        the list of blocks.
    +
    +        Keywords:
    +
    +        * ``parent``: A etree element which is the parent of the current block.
    +        * ``blocks``: A list of all remaining blocks of the document.
    +        """
    +        pass
    +
    +
    +class ListIndentProcessor(BlockProcessor):
    +    """ Process children of list items. 
    +    
    +    Example:
    +        * a list item
    +            process this part
    +
    +            or this part
    +
    +    """
    +
    +    INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH)
    +    ITEM_TYPES = ['li']
    +    LIST_TYPES = ['ul', 'ol']
    +
    +    def test(self, parent, block):
    +        return block.startswith(' '*markdown.TAB_LENGTH) and \
    +                not self.parser.state.isstate('detabbed') and  \
    +                (parent.tag in self.ITEM_TYPES or \
    +                    (len(parent) and parent[-1] and \
    +                        (parent[-1].tag in self.LIST_TYPES)
    +                    )
    +                )
    +
    +    def run(self, parent, blocks):
    +        block = blocks.pop(0)
    +        level, sibling = self.get_level(parent, block)
    +        block = self.looseDetab(block, level)
    +
    +        self.parser.state.set('detabbed')
    +        if parent.tag in self.ITEM_TYPES:
    +            # The parent is already a li. Just parse the child block.
    +            self.parser.parseBlocks(parent, [block])
    +        elif sibling.tag in self.ITEM_TYPES:
    +            # The sibling is a li. Use it as parent.
    +            self.parser.parseBlocks(sibling, [block])
    +        elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES:
    +            # The parent is a list (``ol`` or ``ul``) which has children.
    +            # Assume the last child li is the parent of this block.
    +            if sibling[-1].text:
    +                # If the parent li has text, that text needs to be moved to a p
    +                block = '%s\n\n%s' % (sibling[-1].text, block)
    +                sibling[-1].text = ''
    +            self.parser.parseChunk(sibling[-1], block)
    +        else:
    +            self.create_item(sibling, block)
    +        self.parser.state.reset()
    +
    +    def create_item(self, parent, block):
    +        """ Create a new li and parse the block with it as the parent. """
    +        li = markdown.etree.SubElement(parent, 'li')
    +        self.parser.parseBlocks(li, [block])
    + 
    +    def get_level(self, parent, block):
    +        """ Get level of indent based on list level. """
    +        # Get indent level
    +        m = self.INDENT_RE.match(block)
    +        if m:
    +            indent_level = len(m.group(1))/markdown.TAB_LENGTH
    +        else:
    +            indent_level = 0
    +        if self.parser.state.isstate('list'):
    +            # We're in a tightlist - so we already are at correct parent.
    +            level = 1
    +        else:
    +            # We're in a looselist - so we need to find parent.
    +            level = 0
    +        # Step through children of tree to find matching indent level.
    +        while indent_level > level:
    +            child = self.lastChild(parent)
    +            if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES):
    +                if child.tag in self.LIST_TYPES:
    +                    level += 1
    +                parent = child
    +            else:
    +                # No more child levels. If we're short of indent_level,
    +                # we have a code block. So we stop here.
    +                break
    +        return level, parent
    +
    +
    +class CodeBlockProcessor(BlockProcessor):
    +    """ Process code blocks. """
    +
    +    def test(self, parent, block):
    +        return block.startswith(' '*markdown.TAB_LENGTH)
    +    
    +    def run(self, parent, blocks):
    +        sibling = self.lastChild(parent)
    +        block = blocks.pop(0)
    +        theRest = ''
    +        if sibling and sibling.tag == "pre" and len(sibling) \
    +                    and sibling[0].tag == "code":
    +            # The previous block was a code block. As blank lines do not start
    +            # new code blocks, append this block to the previous, adding back
    +            # linebreaks removed from the split into a list.
    +            code = sibling[0]
    +            block, theRest = self.detab(block)
    +            code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
    +        else:
    +            # This is a new codeblock. Create the elements and insert text.
    +            pre = markdown.etree.SubElement(parent, 'pre')
    +            code = markdown.etree.SubElement(pre, 'code')
    +            block, theRest = self.detab(block)
    +            code.text = markdown.AtomicString('%s\n' % block.rstrip())
    +        if theRest:
    +            # This block contained unindented line(s) after the first indented 
    +            # line. Insert these lines as the first block of the master blocks
    +            # list for future processing.
    +            blocks.insert(0, theRest)
    +
    +
    +class BlockQuoteProcessor(BlockProcessor):
    +
    +    RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
    +
    +    def test(self, parent, block):
    +        return bool(self.RE.search(block))
    +
    +    def run(self, parent, blocks):
    +        block = blocks.pop(0)
    +        m = self.RE.search(block)
    +        if m:
    +            before = block[:m.start()] # Lines before blockquote
    +            # Pass lines before blockquote in recursively for parsing forst.
    +            self.parser.parseBlocks(parent, [before])
    +            # Remove ``> `` from begining of each line.
    +            block = '\n'.join([self.clean(line) for line in 
    +                            block[m.start():].split('\n')])
    +        sibling = self.lastChild(parent)
    +        if sibling and sibling.tag == "blockquote":
    +            # Previous block was a blockquote so set that as this blocks parent
    +            quote = sibling
    +        else:
    +            # This is a new blockquote. Create a new parent element.
    +            quote = markdown.etree.SubElement(parent, 'blockquote')
    +        # Recursively parse block with blockquote as parent.
    +        self.parser.parseChunk(quote, block)
    +
    +    def clean(self, line):
    +        """ Remove ``>`` from beginning of a line. """
    +        m = self.RE.match(line)
    +        if line.strip() == ">":
    +            return ""
    +        elif m:
    +            return m.group(2)
    +        else:
    +            return line
    +
    +class OListProcessor(BlockProcessor):
    +    """ Process ordered list blocks. """
    +
    +    TAG = 'ol'
    +    # Detect an item (``1. item``). ``group(1)`` contains contents of item.
    +    RE = re.compile(r'^[ ]{0,3}\d+\.[ ](.*)')
    +    # Detect items on secondary lines. they can be of either list type.
    +    CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ](.*)')
    +    # Detect indented (nested) items of either type
    +    INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ].*')
    +
    +    def test(self, parent, block):
    +        return bool(self.RE.match(block))
    +
    +    def run(self, parent, blocks):
    +        # Check fr multiple items in one block.
    +        items = self.get_items(blocks.pop(0))
    +        sibling = self.lastChild(parent)
    +        if sibling and sibling.tag in ['ol', 'ul']:
    +            # Previous block was a list item, so set that as parent
    +            lst = sibling
    +            # make sure previous item is in a p.
    +            if len(lst) and lst[-1].text and not len(lst[-1]):
    +                p = markdown.etree.SubElement(lst[-1], 'p')
    +                p.text = lst[-1].text
    +                lst[-1].text = ''
    +            # parse first block differently as it gets wrapped in a p.
    +            li = markdown.etree.SubElement(lst, 'li')
    +            self.parser.state.set('looselist')
    +            firstitem = items.pop(0)
    +            self.parser.parseBlocks(li, [firstitem])
    +            self.parser.state.reset()
    +        else:
    +            # This is a new list so create parent with appropriate tag.
    +            lst = markdown.etree.SubElement(parent, self.TAG)
    +        self.parser.state.set('list')
    +        # Loop through items in block, recursively parsing each with the
    +        # appropriate parent.
    +        for item in items:
    +            if item.startswith(' '*markdown.TAB_LENGTH):
    +                # Item is indented. Parse with last item as parent
    +                self.parser.parseBlocks(lst[-1], [item])
    +            else:
    +                # New item. Create li and parse with it as parent
    +                li = markdown.etree.SubElement(lst, 'li')
    +                self.parser.parseBlocks(li, [item])
    +        self.parser.state.reset()
    +
    +    def get_items(self, block):
    +        """ Break a block into list items. """
    +        items = []
    +        for line in block.split('\n'):
    +            m = self.CHILD_RE.match(line)
    +            if m:
    +                # This is a new item. Append
    +                items.append(m.group(3))
    +            elif self.INDENT_RE.match(line):
    +                # This is an indented (possibly nested) item.
    +                if items[-1].startswith(' '*markdown.TAB_LENGTH):
    +                    # Previous item was indented. Append to that item.
    +                    items[-1] = '%s\n%s' % (items[-1], line)
    +                else:
    +                    items.append(line)
    +            else:
    +                # This is another line of previous item. Append to that item.
    +                items[-1] = '%s\n%s' % (items[-1], line)
    +        return items
    +
    +
    +class UListProcessor(OListProcessor):
    +    """ Process unordered list blocks. """
    +
    +    TAG = 'ul'
    +    RE = re.compile(r'^[ ]{0,3}[*+-][ ](.*)')
    +
    +
    +class HashHeaderProcessor(BlockProcessor):
    +    """ Process Hash Headers. """
    +
    +    # Detect a header at start of any line in block
    +    RE = re.compile(r'(^|\n)(?P#{1,6})(?P
    .*?)#*(\n|$)') + + def test(self, parent, block): + return bool(self.RE.search(block)) + + def run(self, parent, blocks): + block = blocks.pop(0) + m = self.RE.search(block) + if m: + before = block[:m.start()] # All lines before header + after = block[m.end():] # All lines after header + if before: + # As the header was not the first line of the block and the + # lines before the header must be parsed first, + # recursively parse this lines as a block. + self.parser.parseBlocks(parent, [before]) + # Create header using named groups from RE + h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level'))) + h.text = m.group('header').strip() + if after: + # Insert remaining lines as first block for future parsing. + blocks.insert(0, after) + else: + # This should never happen, but just in case... + message(CRITICAL, "We've got a problem header!") + + +class SetextHeaderProcessor(BlockProcessor): + """ Process Setext-style Headers. """ + + # Detect Setext-style header. Must be first 2 lines of block. + RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE) + + def test(self, parent, block): + return bool(self.RE.match(block)) + + def run(self, parent, blocks): + lines = blocks.pop(0).split('\n') + # Determine level. ``=`` is 1 and ``-`` is 2. + if lines[1].startswith('='): + level = 1 + else: + level = 2 + h = markdown.etree.SubElement(parent, 'h%d' % level) + h.text = lines[0].strip() + if len(lines) > 2: + # Block contains additional lines. Add to master blocks for later. + blocks.insert(0, '\n'.join(lines[2:])) + + +class HRProcessor(BlockProcessor): + """ Process Horizontal Rules. """ + + RE = r'[ ]{0,3}(?P[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*' + # Detect hr on any line of a block. + SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE) + # Match a hr on a single line of text. + MATCH_RE = re.compile(r'^%s$' % RE) + + def test(self, parent, block): + return bool(self.SEARCH_RE.search(block)) + + def run(self, parent, blocks): + lines = blocks.pop(0).split('\n') + prelines = [] + # Check for lines in block before hr. + for line in lines: + m = self.MATCH_RE.match(line) + if m: + break + else: + prelines.append(line) + if len(prelines): + # Recursively parse lines before hr so they get parsed first. + self.parser.parseBlocks(parent, ['\n'.join(prelines)]) + # create hr + hr = markdown.etree.SubElement(parent, 'hr') + # check for lines in block after hr. + lines = lines[len(prelines)+1:] + if len(lines): + # Add lines after hr to master blocks for later parsing. + blocks.insert(0, '\n'.join(lines)) + + +class EmptyBlockProcessor(BlockProcessor): + """ Process blocks and start with an empty line. """ + + # Detect a block that only contains whitespace + # or only whitespace on the first line. + RE = re.compile(r'^\s*\n') + + def test(self, parent, block): + return bool(self.RE.match(block)) + + def run(self, parent, blocks): + block = blocks.pop(0) + m = self.RE.match(block) + if m: + # Add remaining line to master blocks for later. + blocks.insert(0, block[m.end():]) + sibling = self.lastChild(parent) + if sibling and sibling.tag == 'pre' and sibling[0] and \ + sibling[0].tag == 'code': + # Last block is a codeblock. Append to preserve whitespace. + sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text ) + + +class ParagraphProcessor(BlockProcessor): + """ Process Paragraph blocks. """ + + def test(self, parent, block): + return True + + def run(self, parent, blocks): + block = blocks.pop(0) + if block.strip(): + # Not a blank block. Add to parent, otherwise throw it away. + if self.parser.state.isstate('list'): + # The parent is a tight-list. Append to parent.text + if parent.text: + parent.text = '%s\n%s' % (parent.text, block) + else: + parent.text = block.lstrip() + else: + # Create a regular paragraph + p = markdown.etree.SubElement(parent, 'p') + p.text = block.lstrip() diff --git a/website/markdown/commandline.py b/website/markdown/commandline.py new file mode 100644 index 000000000..1eedc6dbb --- /dev/null +++ b/website/markdown/commandline.py @@ -0,0 +1,96 @@ +""" +COMMAND-LINE SPECIFIC STUFF +============================================================================= + +The rest of the code is specifically for handling the case where Python +Markdown is called from the command line. +""" + +import markdown +import sys +import logging +from logging import DEBUG, INFO, WARN, ERROR, CRITICAL + +EXECUTABLE_NAME_FOR_USAGE = "python markdown.py" +""" The name used in the usage statement displayed for python versions < 2.3. +(With python 2.3 and higher the usage statement is generated by optparse +and uses the actual name of the executable called.) """ + +OPTPARSE_WARNING = """ +Python 2.3 or higher required for advanced command line options. +For lower versions of Python use: + + %s INPUT_FILE > OUTPUT_FILE + +""" % EXECUTABLE_NAME_FOR_USAGE + +def parse_options(): + """ + Define and parse `optparse` options for command-line usage. + """ + + try: + optparse = __import__("optparse") + except: + if len(sys.argv) == 2: + return {'input': sys.argv[1], + 'output': None, + 'safe': False, + 'extensions': [], + 'encoding': None }, CRITICAL + else: + print OPTPARSE_WARNING + return None, None + + parser = optparse.OptionParser(usage="%prog INPUTFILE [options]") + parser.add_option("-f", "--file", dest="filename", default=sys.stdout, + help="write output to OUTPUT_FILE", + metavar="OUTPUT_FILE") + parser.add_option("-e", "--encoding", dest="encoding", + help="encoding for input and output files",) + parser.add_option("-q", "--quiet", default = CRITICAL, + action="store_const", const=CRITICAL+10, dest="verbose", + help="suppress all messages") + parser.add_option("-v", "--verbose", + action="store_const", const=INFO, dest="verbose", + help="print info messages") + parser.add_option("-s", "--safe", dest="safe", default=False, + metavar="SAFE_MODE", + help="safe mode ('replace', 'remove' or 'escape' user's HTML tag)") + parser.add_option("-o", "--output_format", dest="output_format", + default='xhtml1', metavar="OUTPUT_FORMAT", + help="Format of output. One of 'xhtml1' (default) or 'html4'.") + parser.add_option("--noisy", + action="store_const", const=DEBUG, dest="verbose", + help="print debug messages") + parser.add_option("-x", "--extension", action="append", dest="extensions", + help = "load extension EXTENSION", metavar="EXTENSION") + + (options, args) = parser.parse_args() + + if not len(args) == 1: + parser.print_help() + return None, None + else: + input_file = args[0] + + if not options.extensions: + options.extensions = [] + + return {'input': input_file, + 'output': options.filename, + 'safe_mode': options.safe, + 'extensions': options.extensions, + 'encoding': options.encoding, + 'output_format': options.output_format}, options.verbose + +def run(): + """Run Markdown from the command line.""" + + # Parse options and adjust logging level if necessary + options, logging_level = parse_options() + if not options: sys.exit(0) + if logging_level: logging.getLogger('MARKDOWN').setLevel(logging_level) + + # Run + markdown.markdownFromFile(**options) diff --git a/website/markdown/etree_loader.py b/website/markdown/etree_loader.py new file mode 100644 index 000000000..e2599b2cb --- /dev/null +++ b/website/markdown/etree_loader.py @@ -0,0 +1,33 @@ + +from markdown import message, CRITICAL +import sys + +## Import +def importETree(): + """Import the best implementation of ElementTree, return a module object.""" + etree_in_c = None + try: # Is it Python 2.5+ with C implemenation of ElementTree installed? + import xml.etree.cElementTree as etree_in_c + except ImportError: + try: # Is it Python 2.5+ with Python implementation of ElementTree? + import xml.etree.ElementTree as etree + except ImportError: + try: # An earlier version of Python with cElementTree installed? + import cElementTree as etree_in_c + except ImportError: + try: # An earlier version of Python with Python ElementTree? + import elementtree.ElementTree as etree + except ImportError: + message(CRITICAL, "Failed to import ElementTree") + sys.exit(1) + if etree_in_c and etree_in_c.VERSION < "1.0": + message(CRITICAL, "For cElementTree version 1.0 or higher is required.") + sys.exit(1) + elif etree_in_c : + return etree_in_c + elif etree.VERSION < "1.1": + message(CRITICAL, "For ElementTree version 1.1 or higher is required") + sys.exit(1) + else : + return etree + diff --git a/website/markdown/extensions/__init__.py b/website/markdown/extensions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/website/markdown/extensions/toc.py b/website/markdown/extensions/toc.py new file mode 100644 index 000000000..1624ccf64 --- /dev/null +++ b/website/markdown/extensions/toc.py @@ -0,0 +1,140 @@ +""" +Table of Contents Extension for Python-Markdown +* * * + +(c) 2008 [Jack Miller](http://codezen.org) + +Dependencies: +* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) + +""" +import markdown +from markdown import etree +import re + +class TocTreeprocessor(markdown.treeprocessors.Treeprocessor): + # Iterator wrapper to get parent and child all at once + def iterparent(self, root): + for parent in root.getiterator(): + for child in parent: + yield parent, child + + def run(self, doc): + div = etree.Element("div") + div.attrib["class"] = "toc" + last_li = None + + # Add title to the div + if self.config["title"][0]: + header = etree.SubElement(div, "span") + header.attrib["class"] = "toctitle" + header.text = self.config["title"][0] + + level = 0 + list_stack=[div] + header_rgx = re.compile("[Hh][123456]") + + # Get a list of id attributes + used_ids = [] + for c in doc.getiterator(): + if "id" in c.attrib: + used_ids.append(c.attrib["id"]) + + for (p, c) in self.iterparent(doc): + if not c.text: + continue + + # To keep the output from screwing up the + # validation by putting a
    inside of a

    + # we actually replace the

    in its entirety. + # We do not allow the marker inside a header as that + # would causes an enless loop of placing a new TOC + # inside previously generated TOC. + + if c.text.find(self.config["marker"][0]) > -1 and not header_rgx.match(c.tag): + for i in range(len(p)): + if p[i] == c: + p[i] = div + break + + if header_rgx.match(c.tag): + tag_level = int(c.tag[-1]) + + # Regardless of how many levels we jumped + # only one list should be created, since + # empty lists containing lists are illegal. + + if tag_level < level: + list_stack.pop() + level = tag_level + + if tag_level > level: + newlist = etree.Element("ul") + if last_li: + last_li.append(newlist) + else: + list_stack[-1].append(newlist) + list_stack.append(newlist) + level = tag_level + + # Do not override pre-existing ids + if not "id" in c.attrib: + id = self.config["slugify"][0](c.text) + if id in used_ids: + ctr = 1 + while "%s_%d" % (id, ctr) in used_ids: + ctr += 1 + id = "%s_%d" % (id, ctr) + used_ids.append(id) + c.attrib["id"] = id + else: + id = c.attrib["id"] + + # List item link, to be inserted into the toc div + last_li = etree.Element("li") + link = etree.SubElement(last_li, "a") + link.text = c.text + link.attrib["href"] = '#' + id + + if int(self.config["anchorlink"][0]): + anchor = etree.SubElement(c, "a") + anchor.text = c.text + anchor.attrib["href"] = "#" + id + anchor.attrib["class"] = "toclink" + c.text = "" + + list_stack[-1].append(last_li) + +class TocExtension(markdown.Extension): + def __init__(self, configs): + self.config = { "marker" : ["[TOC]", + "Text to find and replace with Table of Contents -" + "Defaults to \"[TOC]\""], + "slugify" : [self.slugify, + "Function to generate anchors based on header text-" + "Defaults to a built in slugify function."], + "title" : [None, + "Title to insert into TOC

    - " + "Defaults to None"], + "anchorlink" : [0, + "1 if header should be a self link" + "Defaults to 0"]} + + for key, value in configs: + self.setConfig(key, value) + + # This is exactly the same as Django's slugify + def slugify(self, value): + """ Slugify a string, to make it URL friendly. """ + import unicodedata + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) + return re.sub('[-\s]+','-',value) + + def extendMarkdown(self, md, md_globals): + tocext = TocTreeprocessor(md) + tocext.config = self.config + md.treeprocessors.add("toc", tocext, "_begin") + +def makeExtension(configs={}): + return TocExtension(configs=configs) diff --git a/website/markdown/html4.py b/website/markdown/html4.py new file mode 100644 index 000000000..08f241d57 --- /dev/null +++ b/website/markdown/html4.py @@ -0,0 +1,274 @@ +# markdown/html4.py +# +# Add html4 serialization to older versions of Elementree +# Taken from ElementTree 1.3 preview with slight modifications +# +# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2007 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + + +import markdown +ElementTree = markdown.etree.ElementTree +QName = markdown.etree.QName +Comment = markdown.etree.Comment +PI = markdown.etree.PI +ProcessingInstruction = markdown.etree.ProcessingInstruction + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta" "param") + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: + pass + +_namespace_map = { + # "well-known" namespace prefixes + "http://www.w3.org/XML/1998/namespace": "xml", + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", + # xml schema + "http://www.w3.org/2001/XMLSchema": "xs", + "http://www.w3.org/2001/XMLSchema-instance": "xsi", + # dublic core + "http://purl.org/dc/elements/1.1/": "dc", +} + + +def _raise_serialization_error(text): + raise TypeError( + "cannot serialize %r (type %s)" % (text, type(text).__name__) + ) + +def _encode(text, encoding): + try: + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_cdata(text, encoding): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + + +def _escape_attrib(text, encoding): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "\n" in text: + text = text.replace("\n", " ") + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib_html(text, encoding): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + + +def _serialize_html(write, elem, encoding, qnames, namespaces): + tag = elem.tag + text = elem.text + if tag is Comment: + write("" % _escape_cdata(text, encoding)) + elif tag is ProcessingInstruction: + write("" % _escape_cdata(text, encoding)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_html(write, e, encoding, qnames, None) + else: + write("<" + tag) + items = elem.items() + if items or namespaces: + items.sort() # lexical order + for k, v in items: + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib_html(v, encoding) + # FIXME: handle boolean attributes + write(" %s=\"%s\"" % (qnames[k], v)) + if namespaces: + items = namespaces.items() + items.sort(key=lambda x: x[1]) # sort on prefix + for v, k in items: + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % ( + k.encode(encoding), + _escape_attrib(v, encoding) + )) + write(">") + tag = tag.lower() + if text: + if tag == "script" or tag == "style": + write(_encode(text, encoding)) + else: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_html(write, e, encoding, qnames, None) + if tag not in HTML_EMPTY: + write("") + if elem.tail: + write(_escape_cdata(elem.tail, encoding)) + +def write_html(root, f, + # keyword arguments + encoding="us-ascii", + default_namespace=None): + assert root is not None + if not hasattr(f, "write"): + f = open(f, "wb") + write = f.write + if not encoding: + encoding = "us-ascii" + qnames, namespaces = _namespaces( + root, encoding, default_namespace + ) + _serialize_html( + write, root, encoding, qnames, namespaces + ) + +# -------------------------------------------------------------------- +# serialization support + +def _namespaces(elem, encoding, default_namespace=None): + # identify namespaces used in this tree + + # maps qnames to *encoded* prefix:local names + qnames = {None: None} + + # maps uri:s to prefixes + namespaces = {} + if default_namespace: + namespaces[default_namespace] = "" + + def encode(text): + return text.encode(encoding) + + def add_qname(qname): + # calculate serialized qname representation + try: + if qname[:1] == "{": + uri, tag = qname[1:].split("}", 1) + prefix = namespaces.get(uri) + if prefix is None: + prefix = _namespace_map.get(uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + if prefix != "xml": + namespaces[uri] = prefix + if prefix: + qnames[qname] = encode("%s:%s" % (prefix, tag)) + else: + qnames[qname] = encode(tag) # default element + else: + if default_namespace: + # FIXME: can this be handled in XML 1.0? + raise ValueError( + "cannot use non-qualified names with " + "default_namespace option" + ) + qnames[qname] = encode(qname) + except TypeError: + _raise_serialization_error(qname) + + # populate qname and namespaces table + try: + iterate = elem.iter + except AttributeError: + iterate = elem.getiterator # cET compatibility + for elem in iterate(): + tag = elem.tag + if isinstance(tag, QName) and tag.text not in qnames: + add_qname(tag.text) + elif isinstance(tag, basestring): + if tag not in qnames: + add_qname(tag) + elif tag is not None and tag is not Comment and tag is not PI: + _raise_serialization_error(tag) + for key, value in elem.items(): + if isinstance(key, QName): + key = key.text + if key not in qnames: + add_qname(key) + if isinstance(value, QName) and value.text not in qnames: + add_qname(value.text) + text = elem.text + if isinstance(text, QName) and text.text not in qnames: + add_qname(text.text) + return qnames, namespaces + +def to_html_string(element, encoding=None): + class dummy: + pass + data = [] + file = dummy() + file.write = data.append + write_html(ElementTree(element).getroot(),file,encoding) + return "".join(data) diff --git a/website/markdown/inlinepatterns.py b/website/markdown/inlinepatterns.py new file mode 100644 index 000000000..89fa3b2ef --- /dev/null +++ b/website/markdown/inlinepatterns.py @@ -0,0 +1,371 @@ +""" +INLINE PATTERNS +============================================================================= + +Inline patterns such as *emphasis* are handled by means of auxiliary +objects, one per pattern. Pattern objects must be instances of classes +that extend markdown.Pattern. Each pattern object uses a single regular +expression and needs support the following methods: + + pattern.getCompiledRegExp() # returns a regular expression + + pattern.handleMatch(m) # takes a match object and returns + # an ElementTree element or just plain text + +All of python markdown's built-in patterns subclass from Pattern, +but you can add additional patterns that don't. + +Also note that all the regular expressions used by inline must +capture the whole block. For this reason, they all start with +'^(.*)' and end with '(.*)!'. In case with built-in expression +Pattern takes care of adding the "^(.*)" and "(.*)!". + +Finally, the order in which regular expressions are applied is very +important - e.g. if we first replace http://.../ links with tags +and _then_ try to replace inline html, we would end up with a mess. +So, we apply the expressions in the following order: + +* escape and backticks have to go before everything else, so + that we can preempt any markdown patterns by escaping them. + +* then we handle auto-links (must be done before inline html) + +* then we handle inline HTML. At this point we will simply + replace all inline HTML strings with a placeholder and add + the actual HTML to a hash. + +* then inline images (must be done before links) + +* then bracketed links, first regular then reference-style + +* finally we apply strong and emphasis +""" + +import markdown +import re +from urlparse import urlparse, urlunparse +import sys +if sys.version >= "3.0": + from html import entities as htmlentitydefs +else: + import htmlentitydefs + +""" +The actual regular expressions for patterns +----------------------------------------------------------------------------- +""" + +NOBRACKET = r'[^\]\[]*' +BRK = ( r'\[(' + + (NOBRACKET + r'(\[')*6 + + (NOBRACKET+ r'\])*')*6 + + NOBRACKET + r')\]' ) +NOIMG = r'(?|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*)\12)?\)''' +# [text](url) or [text]() + +IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^\)]*))\)' +# ![alttxt](http://x.com/) or ![alttxt]() +REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]' # [Google][3] +IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2] +NOT_STRONG_RE = r'( \* )' # stand-alone * or _ +AUTOLINK_RE = r'<((?:f|ht)tps?://[^>]*)>' # +AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # + +HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...> +ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # & +LINE_BREAK_RE = r' \n' # two spaces at end of line +LINE_BREAK_2_RE = r' $' # two spaces at end of text + + +def dequote(string): + """Remove quotes from around a string.""" + if ( ( string.startswith('"') and string.endswith('"')) + or (string.startswith("'") and string.endswith("'")) ): + return string[1:-1] + else: + return string + +ATTR_RE = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123} + +def handleAttributes(text, parent): + """Set values of an element based on attribute definitions ({@id=123}).""" + def attributeCallback(match): + parent.set(match.group(1), match.group(2).replace('\n', ' ')) + return ATTR_RE.sub(attributeCallback, text) + + +""" +The pattern classes +----------------------------------------------------------------------------- +""" + +class Pattern: + """Base class that inline patterns subclass. """ + + def __init__ (self, pattern, markdown_instance=None): + """ + Create an instant of an inline pattern. + + Keyword arguments: + + * pattern: A regular expression that matches a pattern + + """ + self.pattern = pattern + self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, re.DOTALL) + + # Api for Markdown to pass safe_mode into instance + self.safe_mode = False + if markdown_instance: + self.markdown = markdown_instance + + def getCompiledRegExp (self): + """ Return a compiled regular expression. """ + return self.compiled_re + + def handleMatch(self, m): + """Return a ElementTree element from the given match. + + Subclasses should override this method. + + Keyword arguments: + + * m: A re match object containing a match of the pattern. + + """ + pass + + def type(self): + """ Return class name, to define pattern type """ + return self.__class__.__name__ + +BasePattern = Pattern # for backward compatibility + +class SimpleTextPattern (Pattern): + """ Return a simple text of group(2) of a Pattern. """ + def handleMatch(self, m): + text = m.group(2) + if text == markdown.INLINE_PLACEHOLDER_PREFIX: + return None + return text + +class SimpleTagPattern (Pattern): + """ + Return element of type `tag` with a text attribute of group(3) + of a Pattern. + + """ + def __init__ (self, pattern, tag): + Pattern.__init__(self, pattern) + self.tag = tag + + def handleMatch(self, m): + el = markdown.etree.Element(self.tag) + el.text = m.group(3) + return el + + +class SubstituteTagPattern (SimpleTagPattern): + """ Return a eLement of type `tag` with no children. """ + def handleMatch (self, m): + return markdown.etree.Element(self.tag) + + +class BacktickPattern (Pattern): + """ Return a `` element containing the matching text. """ + def __init__ (self, pattern): + Pattern.__init__(self, pattern) + self.tag = "code" + + def handleMatch(self, m): + el = markdown.etree.Element(self.tag) + el.text = markdown.AtomicString(m.group(3).strip()) + return el + + +class DoubleTagPattern (SimpleTagPattern): + """Return a ElementTree element nested in tag2 nested in tag1. + + Useful for strong emphasis etc. + + """ + def handleMatch(self, m): + tag1, tag2 = self.tag.split(",") + el1 = markdown.etree.Element(tag1) + el2 = markdown.etree.SubElement(el1, tag2) + el2.text = m.group(3) + return el1 + + +class HtmlPattern (Pattern): + """ Store raw inline html and return a placeholder. """ + def handleMatch (self, m): + rawhtml = m.group(2) + inline = True + place_holder = self.markdown.htmlStash.store(rawhtml) + return place_holder + + +class LinkPattern (Pattern): + """ Return a link element from the given match. """ + def handleMatch(self, m): + el = markdown.etree.Element("a") + el.text = m.group(2) + title = m.group(11) + href = m.group(9) + + if href: + if href[0] == "<": + href = href[1:-1] + el.set("href", self.sanitize_url(href.strip())) + else: + el.set("href", "") + + if title: + title = dequote(title) #.replace('"', """) + el.set("title", title) + return el + + def sanitize_url(self, url): + """ + Sanitize a url against xss attacks in "safe_mode". + + Rather than specifically blacklisting `javascript:alert("XSS")` and all + its aliases (see ), we whitelist known + safe url formats. Most urls contain a network location, however some + are known not to (i.e.: mailto links). Script urls do not contain a + location. Additionally, for `javascript:...`, the scheme would be + "javascript" but some aliases will appear to `urlparse()` to have no + scheme. On top of that relative links (i.e.: "foo/bar.html") have no + scheme. Therefore we must check "path", "parameters", "query" and + "fragment" for any literal colons. We don't check "scheme" for colons + because it *should* never have any and "netloc" must allow the form: + `username:password@host:port`. + + """ + locless_schemes = ['', 'mailto', 'news'] + scheme, netloc, path, params, query, fragment = url = urlparse(url) + safe_url = False + if netloc != '' or scheme in locless_schemes: + safe_url = True + + for part in url[2:]: + if ":" in part: + safe_url = False + + if self.markdown.safeMode and not safe_url: + return '' + else: + return urlunparse(url) + +class ImagePattern(LinkPattern): + """ Return a img element from the given match. """ + def handleMatch(self, m): + el = markdown.etree.Element("img") + src_parts = m.group(9).split() + if src_parts: + src = src_parts[0] + if src[0] == "<" and src[-1] == ">": + src = src[1:-1] + el.set('src', self.sanitize_url(src)) + else: + el.set('src', "") + if len(src_parts) > 1: + el.set('title', dequote(" ".join(src_parts[1:]))) + + if markdown.ENABLE_ATTRIBUTES: + truealt = handleAttributes(m.group(2), el) + else: + truealt = m.group(2) + + el.set('alt', truealt) + return el + +class ReferencePattern(LinkPattern): + """ Match to a stored reference and return link element. """ + def handleMatch(self, m): + if m.group(9): + id = m.group(9).lower() + else: + # if we got something like "[Google][]" + # we'll use "google" as the id + id = m.group(2).lower() + + if not id in self.markdown.references: # ignore undefined refs + return None + href, title = self.markdown.references[id] + + text = m.group(2) + return self.makeTag(href, title, text) + + def makeTag(self, href, title, text): + el = markdown.etree.Element('a') + + el.set('href', self.sanitize_url(href)) + if title: + el.set('title', title) + + el.text = text + return el + + +class ImageReferencePattern (ReferencePattern): + """ Match to a stored reference and return img element. """ + def makeTag(self, href, title, text): + el = markdown.etree.Element("img") + el.set("src", self.sanitize_url(href)) + if title: + el.set("title", title) + el.set("alt", text) + return el + + +class AutolinkPattern (Pattern): + """ Return a link Element given an autolink (``). """ + def handleMatch(self, m): + el = markdown.etree.Element("a") + el.set('href', m.group(2)) + el.text = markdown.AtomicString(m.group(2)) + return el + +class AutomailPattern (Pattern): + """ + Return a mailto link Element given an automail link (``). + """ + def handleMatch(self, m): + el = markdown.etree.Element('a') + email = m.group(2) + if email.startswith("mailto:"): + email = email[len("mailto:"):] + + def codepoint2name(code): + """Return entity definition by code, or the code if not defined.""" + entity = htmlentitydefs.codepoint2name.get(code) + if entity: + return "%s%s;" % (markdown.AMP_SUBSTITUTE, entity) + else: + return "%s#%d;" % (markdown.AMP_SUBSTITUTE, code) + + letters = [codepoint2name(ord(letter)) for letter in email] + el.text = markdown.AtomicString(''.join(letters)) + + mailto = "mailto:" + email + mailto = "".join([markdown.AMP_SUBSTITUTE + '#%d;' % + ord(letter) for letter in mailto]) + el.set('href', mailto) + return el + diff --git a/website/markdown/odict.py b/website/markdown/odict.py new file mode 100644 index 000000000..bf3ef0718 --- /dev/null +++ b/website/markdown/odict.py @@ -0,0 +1,162 @@ +class OrderedDict(dict): + """ + A dictionary that keeps its keys in the order in which they're inserted. + + Copied from Django's SortedDict with some modifications. + + """ + def __new__(cls, *args, **kwargs): + instance = super(OrderedDict, cls).__new__(cls, *args, **kwargs) + instance.keyOrder = [] + return instance + + def __init__(self, data=None): + if data is None: + data = {} + super(OrderedDict, self).__init__(data) + if isinstance(data, dict): + self.keyOrder = data.keys() + else: + self.keyOrder = [] + for key, value in data: + if key not in self.keyOrder: + self.keyOrder.append(key) + + def __deepcopy__(self, memo): + from copy import deepcopy + return self.__class__([(key, deepcopy(value, memo)) + for key, value in self.iteritems()]) + + def __setitem__(self, key, value): + super(OrderedDict, self).__setitem__(key, value) + if key not in self.keyOrder: + self.keyOrder.append(key) + + def __delitem__(self, key): + super(OrderedDict, self).__delitem__(key) + self.keyOrder.remove(key) + + def __iter__(self): + for k in self.keyOrder: + yield k + + def pop(self, k, *args): + result = super(OrderedDict, self).pop(k, *args) + try: + self.keyOrder.remove(k) + except ValueError: + # Key wasn't in the dictionary in the first place. No problem. + pass + return result + + def popitem(self): + result = super(OrderedDict, self).popitem() + self.keyOrder.remove(result[0]) + return result + + def items(self): + return zip(self.keyOrder, self.values()) + + def iteritems(self): + for key in self.keyOrder: + yield key, super(OrderedDict, self).__getitem__(key) + + def keys(self): + return self.keyOrder[:] + + def iterkeys(self): + return iter(self.keyOrder) + + def values(self): + return [super(OrderedDict, self).__getitem__(k) for k in self.keyOrder] + + def itervalues(self): + for key in self.keyOrder: + yield super(OrderedDict, self).__getitem__(key) + + def update(self, dict_): + for k, v in dict_.items(): + self.__setitem__(k, v) + + def setdefault(self, key, default): + if key not in self.keyOrder: + self.keyOrder.append(key) + return super(OrderedDict, self).setdefault(key, default) + + def value_for_index(self, index): + """Return the value of the item at the given zero-based index.""" + return self[self.keyOrder[index]] + + def insert(self, index, key, value): + """Insert the key, value pair before the item with the given index.""" + if key in self.keyOrder: + n = self.keyOrder.index(key) + del self.keyOrder[n] + if n < index: + index -= 1 + self.keyOrder.insert(index, key) + super(OrderedDict, self).__setitem__(key, value) + + def copy(self): + """Return a copy of this object.""" + # This way of initializing the copy means it works for subclasses, too. + obj = self.__class__(self) + obj.keyOrder = self.keyOrder[:] + return obj + + def __repr__(self): + """ + Replace the normal dict.__repr__ with a version that returns the keys + in their sorted order. + """ + return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self.items()]) + + def clear(self): + super(OrderedDict, self).clear() + self.keyOrder = [] + + def index(self, key): + """ Return the index of a given key. """ + return self.keyOrder.index(key) + + def index_for_location(self, location): + """ Return index or None for a given location. """ + if location == '_begin': + i = 0 + elif location == '_end': + i = None + elif location.startswith('<') or location.startswith('>'): + i = self.index(location[1:]) + if location.startswith('>'): + if i >= len(self): + # last item + i = None + else: + i += 1 + else: + raise ValueError('Not a valid location: "%s". Location key ' + 'must start with a ">" or "<".' % location) + return i + + def add(self, key, value, location): + """ Insert by key location. """ + i = self.index_for_location(location) + if i is not None: + self.insert(i, key, value) + else: + self.__setitem__(key, value) + + def link(self, key, location): + """ Change location of an existing item. """ + n = self.keyOrder.index(key) + del self.keyOrder[n] + i = self.index_for_location(location) + try: + if i is not None: + self.keyOrder.insert(i, key) + else: + self.keyOrder.append(key) + except Error: + # restore to prevent data loss and reraise + self.keyOrder.insert(n, key) + raise Error diff --git a/website/markdown/postprocessors.py b/website/markdown/postprocessors.py new file mode 100644 index 000000000..80227bb90 --- /dev/null +++ b/website/markdown/postprocessors.py @@ -0,0 +1,77 @@ +""" +POST-PROCESSORS +============================================================================= + +Markdown also allows post-processors, which are similar to preprocessors in +that they need to implement a "run" method. However, they are run after core +processing. + +""" + + +import markdown + +class Processor: + def __init__(self, markdown_instance=None): + if markdown_instance: + self.markdown = markdown_instance + +class Postprocessor(Processor): + """ + Postprocessors are run after the ElementTree it converted back into text. + + Each Postprocessor implements a "run" method that takes a pointer to a + text string, modifies it as necessary and returns a text string. + + Postprocessors must extend markdown.Postprocessor. + + """ + + def run(self, text): + """ + Subclasses of Postprocessor should implement a `run` method, which + takes the html document as a single text string and returns a + (possibly modified) string. + + """ + pass + + +class RawHtmlPostprocessor(Postprocessor): + """ Restore raw html to the document. """ + + def run(self, text): + """ Iterate over html stash and restore "safe" html. """ + for i in range(self.markdown.htmlStash.html_counter): + html, safe = self.markdown.htmlStash.rawHtmlBlocks[i] + if self.markdown.safeMode and not safe: + if str(self.markdown.safeMode).lower() == 'escape': + html = self.escape(html) + elif str(self.markdown.safeMode).lower() == 'remove': + html = '' + else: + html = markdown.HTML_REMOVED_TEXT + if safe or not self.markdown.safeMode: + text = text.replace("

    %s

    " % + (markdown.preprocessors.HTML_PLACEHOLDER % i), + html + "\n") + text = text.replace(markdown.preprocessors.HTML_PLACEHOLDER % i, + html) + return text + + def escape(self, html): + """ Basic html escaping """ + html = html.replace('&', '&') + html = html.replace('<', '<') + html = html.replace('>', '>') + return html.replace('"', '"') + + +class AndSubstitutePostprocessor(Postprocessor): + """ Restore valid entities """ + def __init__(self): + pass + + def run(self, text): + text = text.replace(markdown.AMP_SUBSTITUTE, "&") + return text diff --git a/website/markdown/preprocessors.py b/website/markdown/preprocessors.py new file mode 100644 index 000000000..712a1e875 --- /dev/null +++ b/website/markdown/preprocessors.py @@ -0,0 +1,214 @@ + +""" +PRE-PROCESSORS +============================================================================= + +Preprocessors work on source text before we start doing anything too +complicated. +""" + +import re +import markdown + +HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:" +HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX + +class Processor: + def __init__(self, markdown_instance=None): + if markdown_instance: + self.markdown = markdown_instance + +class Preprocessor (Processor): + """ + Preprocessors are run after the text is broken into lines. + + Each preprocessor implements a "run" method that takes a pointer to a + list of lines of the document, modifies it as necessary and returns + either the same pointer or a pointer to a new list. + + Preprocessors must extend markdown.Preprocessor. + + """ + def run(self, lines): + """ + Each subclass of Preprocessor should override the `run` method, which + takes the document as a list of strings split by newlines and returns + the (possibly modified) list of lines. + + """ + pass + +class HtmlStash: + """ + This class is used for stashing HTML objects that we extract + in the beginning and replace with place-holders. + """ + + def __init__ (self): + """ Create a HtmlStash. """ + self.html_counter = 0 # for counting inline html segments + self.rawHtmlBlocks=[] + + def store(self, html, safe=False): + """ + Saves an HTML segment for later reinsertion. Returns a + placeholder string that needs to be inserted into the + document. + + Keyword arguments: + + * html: an html segment + * safe: label an html segment as safe for safemode + + Returns : a placeholder string + + """ + self.rawHtmlBlocks.append((html, safe)) + placeholder = HTML_PLACEHOLDER % self.html_counter + self.html_counter += 1 + return placeholder + + def reset(self): + self.html_counter = 0 + self.rawHtmlBlocks = [] + + +class HtmlBlockPreprocessor(Preprocessor): + """Remove html blocks from the text and store them for later retrieval.""" + + right_tag_patterns = ["", "%s>"] + + def _get_left_tag(self, block): + return block[1:].replace(">", " ", 1).split()[0].lower() + + def _get_right_tag(self, left_tag, block): + for p in self.right_tag_patterns: + tag = p % left_tag + i = block.rfind(tag) + if i > 2: + return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag) + return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block) + + def _equal_tags(self, left_tag, right_tag): + if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc. + return True + if ("/" + left_tag) == right_tag: + return True + if (right_tag == "--" and left_tag == "--"): + return True + elif left_tag == right_tag[1:] \ + and right_tag[0] != "<": + return True + else: + return False + + def _is_oneliner(self, tag): + return (tag in ['hr', 'hr/']) + + def run(self, lines): + text = "\n".join(lines) + new_blocks = [] + text = text.split("\n\n") + items = [] + left_tag = '' + right_tag = '' + in_tag = False # flag + + while text: + block = text[0] + if block.startswith("\n"): + block = block[1:] + text = text[1:] + + if block.startswith("\n"): + block = block[1:] + + if not in_tag: + if block.startswith("<"): + left_tag = self._get_left_tag(block) + right_tag, data_index = self._get_right_tag(left_tag, block) + + if data_index < len(block): + text.insert(0, block[data_index:]) + block = block[:data_index] + + if not (markdown.isBlockLevel(left_tag) \ + or block[1] in ["!", "?", "@", "%"]): + new_blocks.append(block) + continue + + if self._is_oneliner(left_tag): + new_blocks.append(block.strip()) + continue + + if block[1] == "!": + # is a comment block + left_tag = "--" + right_tag, data_index = self._get_right_tag(left_tag, block) + # keep checking conditions below and maybe just append + + if block.rstrip().endswith(">") \ + and self._equal_tags(left_tag, right_tag): + new_blocks.append( + self.markdown.htmlStash.store(block.strip())) + continue + else: #if not block[1] == "!": + # if is block level tag and is not complete + + if markdown.isBlockLevel(left_tag) or left_tag == "--" \ + and not block.rstrip().endswith(">"): + items.append(block.strip()) + in_tag = True + else: + new_blocks.append( + self.markdown.htmlStash.store(block.strip())) + + continue + + new_blocks.append(block) + + else: + items.append(block.strip()) + + right_tag, data_index = self._get_right_tag(left_tag, block) + + if self._equal_tags(left_tag, right_tag): + # if find closing tag + in_tag = False + new_blocks.append( + self.markdown.htmlStash.store('\n\n'.join(items))) + items = [] + + if items: + new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) + new_blocks.append('\n') + + new_text = "\n\n".join(new_blocks) + return new_text.split("\n") + + +class ReferencePreprocessor(Preprocessor): + """ Remove reference definitions from text and store for later use. """ + + RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL) + + def run (self, lines): + new_text = []; + for line in lines: + m = self.RE.match(line) + if m: + id = m.group(2).strip().lower() + t = m.group(4).strip() # potential title + if not t: + self.markdown.references[id] = (m.group(3), t) + elif (len(t) >= 2 + and (t[0] == t[-1] == "\"" + or t[0] == t[-1] == "\'" + or (t[0] == "(" and t[-1] == ")") ) ): + self.markdown.references[id] = (m.group(3), t[1:-1]) + else: + new_text.append(line) + else: + new_text.append(line) + + return new_text #+ "\n" diff --git a/website/markdown/treeprocessors.py b/website/markdown/treeprocessors.py new file mode 100644 index 000000000..1dc612a95 --- /dev/null +++ b/website/markdown/treeprocessors.py @@ -0,0 +1,329 @@ +import markdown +import re + +def isString(s): + """ Check if it's string """ + return isinstance(s, unicode) or isinstance(s, str) + +class Processor: + def __init__(self, markdown_instance=None): + if markdown_instance: + self.markdown = markdown_instance + +class Treeprocessor(Processor): + """ + Treeprocessors are run on the ElementTree object before serialization. + + Each Treeprocessor implements a "run" method that takes a pointer to an + ElementTree, modifies it as necessary and returns an ElementTree + object. + + Treeprocessors must extend markdown.Treeprocessor. + + """ + def run(self, root): + """ + Subclasses of Treeprocessor should implement a `run` method, which + takes a root ElementTree. This method can return another ElementTree + object, and the existing root ElementTree will be replaced, or it can + modify the current tree and return None. + """ + pass + + +class InlineProcessor(Treeprocessor): + """ + A Treeprocessor that traverses a tree, applying inline patterns. + """ + + def __init__ (self, md): + self.__placeholder_prefix = markdown.INLINE_PLACEHOLDER_PREFIX + self.__placeholder_suffix = markdown.ETX + self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ + + len(self.__placeholder_suffix) + self.__placeholder_re = re.compile(markdown.INLINE_PLACEHOLDER % r'([0-9]{4})') + self.markdown = md + + def __makePlaceholder(self, type): + """ Generate a placeholder """ + id = "%04d" % len(self.stashed_nodes) + hash = markdown.INLINE_PLACEHOLDER % id + return hash, id + + def __findPlaceholder(self, data, index): + """ + Extract id from data string, start from index + + Keyword arguments: + + * data: string + * index: index, from which we start search + + Returns: placeholder id and string index, after the found placeholder. + """ + + m = self.__placeholder_re.search(data, index) + if m: + return m.group(1), m.end() + else: + return None, index + 1 + + def __stashNode(self, node, type): + """ Add node to stash """ + placeholder, id = self.__makePlaceholder(type) + self.stashed_nodes[id] = node + return placeholder + + def __handleInline(self, data, patternIndex=0): + """ + Process string with inline patterns and replace it + with placeholders + + Keyword arguments: + + * data: A line of Markdown text + * patternIndex: The index of the inlinePattern to start with + + Returns: String with placeholders. + + """ + if not isinstance(data, markdown.AtomicString): + startIndex = 0 + while patternIndex < len(self.markdown.inlinePatterns): + data, matched, startIndex = self.__applyPattern( + self.markdown.inlinePatterns.value_for_index(patternIndex), + data, patternIndex, startIndex) + if not matched: + patternIndex += 1 + return data + + def __processElementText(self, node, subnode, isText=True): + """ + Process placeholders in Element.text or Element.tail + of Elements popped from self.stashed_nodes. + + Keywords arguments: + + * node: parent node + * subnode: processing node + * isText: bool variable, True - it's text, False - it's tail + + Returns: None + + """ + if isText: + text = subnode.text + subnode.text = None + else: + text = subnode.tail + subnode.tail = None + + childResult = self.__processPlaceholders(text, subnode) + + if not isText and node is not subnode: + pos = node.getchildren().index(subnode) + node.remove(subnode) + else: + pos = 0 + + childResult.reverse() + for newChild in childResult: + node.insert(pos, newChild) + + def __processPlaceholders(self, data, parent): + """ + Process string with placeholders and generate ElementTree tree. + + Keyword arguments: + + * data: string with placeholders instead of ElementTree elements. + * parent: Element, which contains processing inline data + + Returns: list with ElementTree elements with applied inline patterns. + """ + def linkText(text): + if text: + if result: + if result[-1].tail: + result[-1].tail += text + else: + result[-1].tail = text + else: + if parent.text: + parent.text += text + else: + parent.text = text + + result = [] + strartIndex = 0 + while data: + index = data.find(self.__placeholder_prefix, strartIndex) + if index != -1: + id, phEndIndex = self.__findPlaceholder(data, index) + + if id in self.stashed_nodes: + node = self.stashed_nodes.get(id) + + if index > 0: + text = data[strartIndex:index] + linkText(text) + + if not isString(node): # it's Element + for child in [node] + node.getchildren(): + if child.tail: + if child.tail.strip(): + self.__processElementText(node, child, False) + if child.text: + if child.text.strip(): + self.__processElementText(child, child) + else: # it's just a string + linkText(node) + strartIndex = phEndIndex + continue + + strartIndex = phEndIndex + result.append(node) + + else: # wrong placeholder + end = index + len(prefix) + linkText(data[strartIndex:end]) + strartIndex = end + else: + text = data[strartIndex:] + linkText(text) + data = "" + + return result + + def __applyPattern(self, pattern, data, patternIndex, startIndex=0): + """ + Check if the line fits the pattern, create the necessary + elements, add it to stashed_nodes. + + Keyword arguments: + + * data: the text to be processed + * pattern: the pattern to be checked + * patternIndex: index of current pattern + * startIndex: string index, from which we starting search + + Returns: String with placeholders instead of ElementTree elements. + + """ + match = pattern.getCompiledRegExp().match(data[startIndex:]) + leftData = data[:startIndex] + + if not match: + return data, False, 0 + + node = pattern.handleMatch(match) + + if node is None: + return data, True, len(leftData) + match.span(len(match.groups()))[0] + + if not isString(node): + if not isinstance(node.text, markdown.AtomicString): + # We need to process current node too + for child in [node] + node.getchildren(): + if not isString(node): + if child.text: + child.text = self.__handleInline(child.text, + patternIndex + 1) + if child.tail: + child.tail = self.__handleInline(child.tail, + patternIndex) + + placeholder = self.__stashNode(node, pattern.type()) + + return "%s%s%s%s" % (leftData, + match.group(1), + placeholder, match.groups()[-1]), True, 0 + + def run(self, tree): + """Apply inline patterns to a parsed Markdown tree. + + Iterate over ElementTree, find elements with inline tag, apply inline + patterns and append newly created Elements to tree. If you don't + want process your data with inline paterns, instead of normal string, + use subclass AtomicString: + + node.text = markdown.AtomicString("data won't be processed with inline patterns") + + Arguments: + + * markdownTree: ElementTree object, representing Markdown tree. + + Returns: ElementTree object with applied inline patterns. + + """ + self.stashed_nodes = {} + + stack = [tree] + + while stack: + currElement = stack.pop() + insertQueue = [] + for child in currElement.getchildren(): + if child.text and not isinstance(child.text, markdown.AtomicString): + text = child.text + child.text = None + lst = self.__processPlaceholders(self.__handleInline( + text), child) + stack += lst + insertQueue.append((child, lst)) + + if child.getchildren(): + stack.append(child) + + for element, lst in insertQueue: + if element.text: + element.text = \ + markdown.inlinepatterns.handleAttributes(element.text, + element) + i = 0 + for newChild in lst: + # Processing attributes + if newChild.tail: + newChild.tail = \ + markdown.inlinepatterns.handleAttributes(newChild.tail, + element) + if newChild.text: + newChild.text = \ + markdown.inlinepatterns.handleAttributes(newChild.text, + newChild) + element.insert(i, newChild) + i += 1 + return tree + + +class PrettifyTreeprocessor(Treeprocessor): + """ Add linebreaks to the html document. """ + + def _prettifyETree(self, elem): + """ Recursively add linebreaks to ElementTree children. """ + + i = "\n" + if markdown.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: + if (not elem.text or not elem.text.strip()) \ + and len(elem) and markdown.isBlockLevel(elem[0].tag): + elem.text = i + for e in elem: + if markdown.isBlockLevel(e.tag): + self._prettifyETree(e) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + if not elem.tail or not elem.tail.strip(): + elem.tail = i + + def run(self, root): + """ Add linebreaks to ElementTree root object. """ + + self._prettifyETree(root) + # Do
    's seperately as they are often in the middle of + # inline content and missed by _prettifyETree. + brs = root.getiterator('br') + for br in brs: + if not br.tail or not br.tail.strip(): + br.tail = '\n' + else: + br.tail = '\n%s' % br.tail diff --git a/website/static/base.css b/website/static/base.css new file mode 100644 index 000000000..543d6f24c --- /dev/null +++ b/website/static/base.css @@ -0,0 +1,120 @@ +body { + background: white; + color: black; + font-family: Georgia, serif; + font-size: 11pt; + margin: 10px; + margin-top: 15px; + margin-bottom: 15px; +} + +h1, +h2, +h3, +h4 { + font-family: Calibri, sans-serif; + margin: 0; +} + +img { + border: 0; +} + +pre, +code { + color: #060; +} + +a, +a code { + color: #216093; +} + +table { + border-collapse: collapse; + border: 0; +} + +td { + border: 0; + padding: 0; +} + +#body { + margin: auto; + max-width: 850px; +} + +#header { + margin-bottom: 15px; + margin-right: 30px; +} + +#content, +#footer { + margin-left: 31px; + margin-right: 31px; +} + +#content p, +#content li, +#footer { + line-height: 16pt; +} + +#content pre { + line-height: 14pt; + margin: 17pt; + padding-left: 1em; + border-left: 1px solid #ccc; +} + +#footer { + margin-top: 5em; +} + +#header .logo { + line-height: 0; + padding-bottom: 5px; + padding-right: 15px; +} + +#header .logo img { + width: 286px; + height: 72px; +} + +#header .title { + vertical-align: bottom; +} + +#header .title h1 { + font-size: 35px; + font-weight: normal; +} + +#header .title h1, +#header .title h1 a { + color: #666; +} + +#content h1, +#content h2, +#content h3 { + color: #4d8cbf; + margin-bottom: 2pt; + margin-top: 17pt; +} + +#content h2 { + font-size: 19pt; +} + +#content h3 { + font-size: 15pt; +} + +#content p { + margin: 0; + margin-bottom: 1em; +} diff --git a/website/static/robots.txt b/website/static/robots.txt new file mode 100644 index 000000000..0ad279c73 --- /dev/null +++ b/website/static/robots.txt @@ -0,0 +1,2 @@ +User-Agent: * +Disallow: diff --git a/website/static/tornado-0.1.tar.gz b/website/static/tornado-0.1.tar.gz new file mode 100644 index 000000000..f282add88 Binary files /dev/null and b/website/static/tornado-0.1.tar.gz differ diff --git a/website/static/tornado.png b/website/static/tornado.png new file mode 100644 index 000000000..a920aa566 Binary files /dev/null and b/website/static/tornado.png differ diff --git a/website/templates/base.html b/website/templates/base.html new file mode 100644 index 000000000..02aef4898 --- /dev/null +++ b/website/templates/base.html @@ -0,0 +1,27 @@ + + + + + {% block title %}Tornado Web Server{% end %} + + {% block head %}{% end %} + + +
    + {% block bottom %}{% end %} + + diff --git a/website/templates/documentation.html b/website/templates/documentation.html new file mode 100644 index 000000000..8c2874087 --- /dev/null +++ b/website/templates/documentation.html @@ -0,0 +1,9 @@ +{% extends "base.html" %} + +{% block title %}Tornado Web Server Documentation{% end %} + +{% block headertitle %}

    documentation

    {% end %} + +{% block body %} + {{ markdown("documentation.txt", toc=True) }} +{% end %} diff --git a/website/templates/documentation.txt b/website/templates/documentation.txt new file mode 100644 index 000000000..f7c0a0f0d --- /dev/null +++ b/website/templates/documentation.txt @@ -0,0 +1,856 @@ +Overview +-------- +[FriendFeed](http://friendfeed.com/)'s web server is a relatively simple, +non-blocking web server written in Python. The FriendFeed application is +written using a web framework that looks a bit like +[web.py](http://webpy.org/) or Google's +[webapp](http://code.google.com/appengine/docs/python/tools/webapp/), +but with additional tools and optimizations to take advantage of the +non-blocking web server and tools. + +[Tornado](http://github.com/facebook/tornado) is an open source +version of this web server and some of the tools we use most often at +FriendFeed. The framework is distinct from most mainstream web server +frameworks (and certainly most Python frameworks) because it is +non-blocking and reasonably fast. Because it is non-blocking +and uses [epoll](http://www.kernel.org/doc/man-pages/online/pages/man4/epoll.4.html), it can handle 1000s of simultaneous standing connections, +which means the framework is ideal for real-time web services. We built the +web server specifically to handle FriendFeed's real-time features — +every active user of FriendFeed maintains an open connection to the +FriendFeed servers. (For more information on scaling servers to support +thousands of clients, see +[The C10K problem](http://www.kegel.com/c10k.html).) + +Here is the canonical "Hello, world" example app: + + import tornado.httpserver + import tornado.ioloop + import tornado.web + + class MainHandler(tornado.web.RequestHandler): + def get(self): + self.write("Hello, world") + + application = tornado.web.Application([ + (r"/", MainHandler), + ]) + + if __name__ == "__main__": + http_server = tornado.httpserver.HTTPServer(application) + http_server.listen(8888) + tornado.ioloop.IOLoop.instance().start() + +See [Tornado walkthrough](#tornado-walkthrough) below for a detailed +walkthrough of the `tornado.web` package. + +We attempted to clean up the code base to reduce interdependencies between +modules, so you should (theoretically) be able to use any of the modules +independently in your project without using the whole package. + + +Download +-------- +Download the most recent version of Tornado from GitHub: + +> [tornado-0.1.tar.gz](/static/tornado-0.1.tar.gz) + +You can also [browse the source](http://github.com/facebook/tornado) on GitHub. + +To install Tornado: + + tar xvzf tornado-1.0.tar.gz + cd tornado-1.0 + python setup.py build + sudo python setup.py install + +After installation, you should be able to run any of the demos in the `demos` +directory included with the Tornado package. + + ./demos/helloworld/helloworld.py + + +Module index +------------ +The most important module is [`web`](http://github.com/facebook/tornado/blob/master/tornado/web.py), which is the web framework +that includes most of the meat of the Tornado package. The other modules +are tools that make `web` more useful. See +[Tornado walkthrough](#tornado-walkthrough) below for a detailed +walkthrough of the `web` package. + +### Main modules + * [`web`](http://github.com/facebook/tornado/blob/master/tornado/web.py) - The web framework on which FriendFeed is built. `web` incorporates most of the important features of Tornado + * [`escape`](http://github.com/facebook/tornado/blob/master/tornado/escape.py) - XHTML, JSON, and URL encoding/decoding methods + * [`database`](http://github.com/facebook/tornado/blob/master/tornado/database.py) - A simple wrapper around `MySQLdb` to make MySQL easier to use + * [`template`](http://github.com/facebook/tornado/blob/master/tornado/template.py) - A Python-based web templating language + * [`httpclient`](http://github.com/facebook/tornado/blob/master/tornado/httpclient.py) - A non-blocking HTTP client designed to work with `web` and `httpserver` + * [`auth`](http://github.com/facebook/tornado/blob/master/tornado/auth.py) - Implementation of third party authentication and authorization schemes (Google OpenID/OAuth, Facebook Platform, Yahoo BBAuth, FriendFeed OpenID/OAuth, Twitter OAuth) + * [`locale`](http://github.com/facebook/tornado/blob/master/tornado/locale.py) - Localization/translation support + * [`options`](http://github.com/facebook/tornado/blob/master/tornado/options.py) - Command line and config file parsing, optimized for server environments + +### Low-level modules + * [`httpserver`](http://github.com/facebook/tornado/blob/master/tornado/httpserver.py) - A very simple HTTP server built on which `web` is built + * [`iostream`](http://github.com/facebook/tornado/blob/master/tornado/iostream.py) - A simple wrapper around non-blocking sockets to aide common reading and writing patterns + * [`ioloop`](http://github.com/facebook/tornado/blob/master/tornado/ioloop.py) - Core I/O loop + +### Random modules + * [`s3server`](http://github.com/facebook/tornado/blob/master/tornado/s3server.py) - A web server that implements most of the [Amazon S3](http://aws.amazon.com/s3/) interface, backed by local file storage + + +Tornado walkthrough +------------------- + +### Request handlers and request arguments + +A Tornado web application maps URLs or URL patterns to subclasses of +`tornado.web.RequestHandler`. Those classes define `get()` or `post()` +methods to handle HTTP `GET` or `POST` requests to that URL. + +This code maps the root URL `/` to `MainHandler` and the URL pattern +`/story/([0-9]+)` to `StoryHandler`. Regular expression groups are passed +as arguments to the `RequestHandler` methods: + + class MainHandler(tornado.web.RequestHandler): + def get(self): + self.write("You requested the main page") + + class StoryHandler(tornado.web.RequestHandler): + def get(self, story_id): + self.write("You requested the story " + story_id) + + application = tornado.web.Application([ + (r"/", MainHandler), + (r"/story/([0-9]+)", StoryHandler), + ]) + +You can get query string arguments and parse `POST` bodies with the +`get_argument()` method: + + class MainHandler(tornado.web.RequestHandler): + def get(self): + self.write('
    ' + '' + '' + '
    ') + + def post(self): + self.set_header("Content-Type", "text/plain") + self.write("You wrote " + self.get_argument("message")) + +If you want to send an error response to the client, e.g., 403 Unauthorized, +you can just raise a `tornado.web.HTTPError` exception: + + if not self.user_is_logged_in(): + raise tornado.web.HTTPError(403) + +The request handler can access the object representing the current request +with `self.request`. The `HTTPRequest` object includes a number of useful +attribute, including: + + * `arguments` - all of the `GET` and `POST` arguments + * `files` - all of the uploaded files (via `multipart/form-data` POST requests) + * `path` - the request path (everything before the `?`) + * `headers` - the request headers + +See the class definition for `HTTPRequest` in `httpserver` for a complete list +of attributes. + + +### Templates + +You can use any template language supported by Python, but Tornado ships +with its own templating language that is a lot faster and more flexible +than many of the most popular templating systems out there. See the +[`template`](http://github.com/facebook/tornado/blob/master/tornado/template.py) module documentation for complete documentation. + +A Tornado template is just HTML (or any other text-based format) with +Python control sequences and expressions embedded within the markup: + + + + {{ title }} + + +
      + {% for item in items %} +
    • {{ escape(item) }}
    • + {% end %} +
    + + + +If you saved this template as "template.html" and put it in the same +directory as your Python file, you could render this template with: + + class MainHandler(tornado.web.RequestHandler): + def get(self): + items = ["Item 1", "Item 2", "Item 3"] + self.render("template.html", title="My title", items=items) + +Tornado templates support *control statements* and *expressions*. Control +statements are surronded by `{%` and `%}`, e.g., `{% if len(items) > 2 %}`. +Expressions are surrounded by `{{` and `}}`, e.g., `{{ items[0] }}`. + +Control statements more or less map exactly to Python statements. We support +`if`, `for`, `while`, and `try`, all of which are terminated with `{% end %}`. +We also support *template inheritance* using the `extends` and `block` +statements, which are described in detail in the documentation for the +[`template` module](http://github.com/facebook/tornado/blob/master/tornado/template.py). + +Expressions can be any Python expression, including function calls. We +support the functions `escape`, `url_escape`, and `json_encode` by default, +and you can pass other functions into the template simply by passing them +as keyword arguments to the template render function: + + class MainHandler(tornado.web.RequestHandler): + def get(self): + self.render("template.html", add=self.add) + + def add(self, x, y): + return x + y + +When you are building a real application, you are going to want to use +all of the features of Tornado templates, especially template inheritance. +Read all about those features in the [`template` module](http://github.com/facebook/tornado/blob/master/tornado/template.py) +section. + +Under the hood, Tornado templates are translated directly to Python. +The expressions you include in your template are copied verbatim into +a Python function representing your template. We don't try to prevent +anything in the template language; we created it explicitly to provide +the flexibility that other, stricter templating systems prevent. +Consequently, if you write random stuff inside of your template expressions, +you will get random Python errors when you execute the template. + + +### Cookies and secure cookies + +You can set cookies in the user's browser with the `set_cookie` method: + + class MainHandler(tornado.web.RequestHandler): + def get(self): + if not self.get_cookie("mycookie"): + self.set_cookie("mycookie", "myvalue") + self.write("Your cookie was not set yet!") + else: + self.write("Your cookie was set!") + +Cookies are easily forged by malicious clients. If you need to set cookies +to, e.g., save the user ID of the currently logged in user, you need to +sign your cookies to prevent forgery. Tornado supports this out of the +box with the `set_secure_cookie` and `get_secure_cookie` methods. To use +these methods, you need to specify a secret key named `cookie_secret` when +you create your application. You can pass in application settings as keyword +arguments to your application: + + application = tornado.web.Application([ + (r"/", MainHandler), + ], cookie_secret="61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=") + +Signed cookies contain the encoded value of the cookie in addition to a +timestamp and an [HMAC](http://en.wikipedia.org/wiki/HMAC) signature. If the +cookie is old or if the signature doesn't match, `get_secure_cookie` will +return `None` just as if the cookie isn't set. The secure version of the +example above: + + class MainHandler(tornado.web.RequestHandler): + def get(self): + if not self.get_secure_cookie("mycookie"): + self.set_secure_cookie("mycookie", "myvalue") + self.write("Your cookie was not set yet!") + else: + self.write("Your cookie was set!") + + +### User authentication + +The currently authenticated user is available in every request handler +as `self.current_user`, and in every template as `current_user`. By +default, `current_user` is `None`. + +To implement user authentication in your application, you need to +override the `get_current_user()` method in your request handlers to +determine the current user based on, e.g., the value of a cookie. +Here is an example that lets users log into the application simply +by specifying a nickname, which is then saved in a cookie: + + class BaseHandler(tornado.web.RequestHandler): + def get_current_user(self): + return self.get_secure_cookie("user") + + class MainHandler(BaseHandler): + def get(self): + if not self.current_user: + self.redirect("/login") + return + name = tornado.escape.xhtml_escape(self.current_user) + self.write("Hello, " + name) + + class LoginHandler(BaseHandler): + def get(self): + self.write('
    ' + 'Name: ' + '' + '
    ') + + def post(self): + self.set_secure_cookie("user", self.get_argument("name")) + self.redirect("/") + + application = tornado.web.Application([ + (r"/", MainHandler), + (r"/login", LoginHandler), + ], cookie_secret="61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=") + +You can require that the user be logged in using the +[Python decorator](http://www.python.org/dev/peps/pep-0318/) +`tornado.web.authenticated`. If a request goes to a method with this +decorator, and the user is not logged in, they will be redirected to +`login_url` (another application setting). The example above could +be rewritten: + + class MainHandler(BaseHandler): + @tornado.web.authenticated + def get(self): + name = tornado.escape.xhtml_escape(self.current_user) + self.write("Hello, " + name) + + settings = { + "cookie_secret": "61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + "login_url": "/login", + } + application = tornado.web.Application([ + (r"/", MainHandler), + (r"/login", LoginHandler), + ], **settings) + +If you decorate `post()` methods with the `authenticated` decorator, and +the user is not logged in, the server will send a `403` response. + +Tornado comes with built-in support for third-party authentication schemes +like Google OAuth. See the [`auth` module](http://github.com/facebook/tornado/blob/master/tornado/auth.py) for more details. Check +out the Tornado Blog example application for a complete example that +uses authentication (and stores user data in a MySQL database). + + +### Cross-site request forgery protection + +[Cross-site request forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery), or XSRF, is a common problem for personalized web applcations. See the +[Wikipedia article](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +for more information on how XSRF works. + +The generally accepted solution to prevent XSRF is to cookie every user +with an unpredictable value and include that value as an additional +argument with every form submission on your site. If the cookie and the +value in the form submission do not match, then the request is likely +forged. + +Tornado comes with built-in XSRF protection. To include it in your site, +include the application setting `xsrf_cookies`: + + settings = { + "cookie_secret": "61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + "login_url": "/login", + "xsrf_cookies": True, + } + application = tornado.web.Application([ + (r"/", MainHandler), + (r"/login", LoginHandler), + ], **settings) + +If `xsrf_cookies` is set, the Tornado web application will set the `_xsrf` +cookie for all users and reject all `POST` requests hat do not contain a +correct `_xsrf` value. If you turn this setting on, you need to instrument +all forms that submit via `POST` to contain this field. You can do this with +the special function `xsrf_form_html()`, available in all templates: + +
    + {{ xsrf_form_html() }} +
    Username:
    +
    Password:
    +
    +
    + +If you submit AJAX `POST` requests, you will also need to instrument your +JavaScript to include the `_xsrf` value with each request. This is the +[jQuery](http://jquery.com/) function we use at FriendFeed for AJAX `POST` +requests that automatically adds the `_xsrf` value to all requests: + + function getCookie(name) { + var r = document.cookie.match("\\b" + name + "=([^;]*)\\b"); + return r ? r[1] : undefined; + } + + jQuery.postJSON = function(url, args, callback) { + args._xsrf = getCookie("_xsrf"); + $.ajax({url: url, data: $.param(args), dataType: "text", type: "POST", + success: function(response) { + callback(eval("(" + response + ")")); + }}); + }; + + +### Static files and aggressive file caching + +You can serve static files from Tornado by specifying the `static_path` +setting in your application: + + settings = { + "static_path": os.path.join(os.path.dirname(__file__), "static"), + "cookie_secret": "61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + "login_url": "/login", + "xsrf_cookies": True, + } + application = tornado.web.Application([ + (r"/", MainHandler), + (r"/login", LoginHandler), + ], **settings) + +This setting will automatically make all requests that start with `/static/` +serve from that static directory, e.g., [http://localhost:8888/static/foo.png](http://localhost:8888/static/foo.png) +will serve the file `foo.png` from the specified static directory. We +also automatically serve `/robots.txt` and `/favicon.ico` from the static +directory (even though they don't start with the `/static/` prefix). + +To improve performance, it is generally a good idea for browsers to +cache static resources aggressively so browsers won't send unnecessary +`If-Modified-Since` or `Etag` requests that might block the rendering of +the page. Tornado supports this out of the box with *static content +versioning*. + +To use this feature, use the `static_url()` method in your templates rather +than typing the URL of the static file directly in your HTML: + + + + FriendFeed - {{ _("Home") }} + + +
    + + + +The `static_url()` function will translate that relative path to a URI +that looks like `/static/images/logo.png?v=aae54`. The `v` argument is +a hash of the content in `logo.png`, and its presence makes the Tornado +server send cache headers to the user's browser that will make the browser +cache the content indefinitely. + +Since the `v` argument is based on the content of the file, if you update +a file and restart your server, it will start sending a new `v` value, +so the user's browser will automatically fetch the new file. If the file's +contents don't change, the browser will continue to use a locally cached +copy without ever checking for updates on the server, significantly +improving rendering performance. + +In production, you probably want to serve static files from a more +optimized static file server like [nginx](http://nginx.net/). You can +configure most any web server to support these caching semantics. Here +is the nginx configuration we use at FriendFeed: + + location /static/ { + root /var/friendfeed/static; + if ($query_string) { + expires max; + } + } + + +### Localization + +The locale of the current user (whether they are logged in or not) is +always available as `self.locale` in the request handler and as `locale` +in templates. The name of the locale (e.g., `en_US`) is available as +`locale.name`, and you can translate strings with the `locale.translate` +method. Templates also have the global function call `_()` available +for string translation. The translate function has two forms: + + _("Translate this string") + +which translates the string directly based on the current locale, and + + _("A person liked this", "%(num)d people liked this", len(people)) % {"num": len(people)} + +which translates a string that can be singular or plural based on the value +of the third argument. In the example above, a translation of the first +string will be returned if `len(people)` is `1`, or a translation of the +second string will be returned otherwise. + +The most common pattern for translations is to use Python named placeholders +for variables (the `%(num)d` in the example above) since placeholders can +move around on translation. + +Here is a properly localized template: + + + + FriendFeed - {{ _("Sign in") }} + + +
    +
    {{ _("Username") }}
    +
    {{ _("Password") }}
    +
    + {{ xsrf_form_html() }} +
    + + + +By default, we detect the user's locale using the `Accept-Language` header +sent by the user's browser. We choose `en_US` if we can't find an appropriate +`Accept-Language` value. If you let user's set their locale as a preference, +you can override this default locale selection by overriding `get_user_locale` +in your request handler: + + class BaseHandler(tornado.web.RequestHandler): + def get_current_user(self): + user_id = self.get_secure_cookie("user") + if not user_id: return None + return self.backend.get_user_by_id(user_id) + + def get_user_locale(self): + if "locale" not in self.current_user.prefs: + # Use the Accept-Language header + return None + return self.current_user.prefs["locale"] + +If `get_user_locale` returns `None`, we fall back on the `Accept-Language` +header. + +You can load all the translations for your application using the +`tornado.locale.load_translations` method. It takes in the name of the +directory which should contain CSV files named after the locales whose +translations they contain, e.g., `es_GT.csv` or `fr_CA.csv`. The method +loads all the translations from those CSV files and infers the list of +supported locales based on the presence of each CSV file. You typically +call this method once in the `main()` method of your server: + + def main(): + tornado.locale.load_translations( + os.path.join(os.path.dirname(__file__), "translations")) + start_server() + +You can get the list of supported locales in your application with +`tornado.locale.get_supported_locales()`. The user's locale is chosen to +be the closest match based on the supported locales. For example, if the +user's locale is `es_GT`, and the `es` locale is supported, `self.locale` +will be `es` for that request. We fall back on `en_US` if no close match +can be found. + +See the [`locale` module](http://github.com/facebook/tornado/blob/master/tornado/locale.py) documentation for detailed information +on the CSV format and other localization methods. + + +### UI modules + +Tornado supports *UI modules* to make it easy to support standard, reusable +UI widgets across your application. UI modules are like special functional +calls to render components of your page, and they can come packaged with +their own CSS and JavaScript. + +For example, if you are implementing a blog, and you want to have +blog entries appear on both the blog home page and on each blog entry page, +you can make an `Entry` module to render them on both pages. First, create +a Python module for your UI modules, e.g., `uimodules.py`: + + class Entry(tornado.web.UIModule): + def render(self, entry, show_comments=False): + return self.render_string( + "module-entry.html", show_comments=show_comments) + +Tell Tornado to use `uimodules.py` using the `ui_modules` setting in your +application: + + class HomeHandler(tornado.web.RequestHandler): + def get(self): + entries = self.db.query("SELECT * FROM entries ORDER BY date DESC") + self.render("home.html", entries=entries) + + class EntryHandler(tornado.web.RequestHandler): + def get(self, entry_id): + entry = self.db.get("SELECT * FROM entries WHERE id = %s", entry_id) + if not entry: raise tornado.web.HTTPError(404) + self.render("entry.html", entry=entry) + + settings = { + "ui_modules": uimodules, + } + application = tornado.web.Application([ + (r"/", HomeHandler), + (r"/entry/([0-9]+)", EntryHandler), + ], **settings) + +Within `home.html`, you reference the `Entry` module rather than printing +the HTML directly: + + {% for entry in entries %} + {{ modules.Entry(entry) }} + {% end %} + +Within `entry.html`, you reference the `Entry` module with the +`show_comments` argument to show the expanded form of the entry: + + {{ modules.Entry(entry, show_comments=True) }} + +Modules can include custom CSS and JavaScript functions by overriding +the `embedded_css`, `embedded_javascript`, `javascript_file`, or +`css_file` methods: + + class Entry(tornado.web.UIModule): + def embedded_css(self): + return ".entry { margin-bottom: 1em; }" + + def render(self, entry, show_comments=False): + return self.render_string( + "module-entry.html", show_comments=show_comments) + +Module CSS and JavaScript will be included once no matter how many times +a module is used on a page. CSS is always included in the `` of the +page, and JavaScript is always included just before the `` tag +at the end of the page. + + +### Non-blocking, asynchronous requests + +When a request handler is executed, the request is automatically finished. +Since Tornado uses a non-blocking I/O style, you can override this default +behavior if you want a request to remain open after the main request handler +method returns using the `tonado.web.asynchronous` decorator. + +When you use this decorator, it is your responsibility to call +`self.finish()` to finish the HTTP request, or the user's browser +will simply hang: + + class MainHandler(tornado.web.RequestHandler): + @tonado.web.asynchronous + def get(self): + self.write("Hello, world") + self.finish() + +Here is a real example that makes a call to the FriendFeed API using +Tornado's built-in asynchronous HTTP client: + + class MainHandler(tornado.web.RequestHandler): + @tornado.web.asynchronous + def get(self): + http = tornado.httpclient.AsyncHTTPClient() + http.fetch("http://friendfeed-api.com/v2/feed/bret", + callback=self.async_callback(self.on_response)) + + def on_response(self, response): + if response.error: raise tornado.web.HTTPError(500) + json = tornado.escape.json_decode(response.body) + self.write("Fetched " + str(len(json["entries"])) + " entries " + "from the FriendFeed API") + self.finish() + +When `get()` returns, the request has not finished. When the HTTP client +eventually calls `on_response()`, the request is still open, and the response +is finally flushed to the client with the call to `self.finish()`. + +If you make calls to asynchronous library functions that require a callback +(like the HTTP `fetch` function above), you should always wrap your +callbacks with `self.async_callback`. This simple wrapper ensures that if +your callback function raises an exception or has a programming error, +a proper HTTP error response will be sent to the browser, and the connection +will be properly closed. + +For a more advanced asynchronous example, take a look at the `chat` example +application, which implements an AJAX chat room using +[long polling](http://en.wikipedia.org/wiki/Push_technology#Long_polling). + + +### Third party authentication + +Tornado's `auth` module implements the authentication and authorization +protocols for a number of the most popular sites on the web, including +Google/Gmail, Facebook, Twitter, Yahoo, and FriendFeed. The module includes +methods to log users in via these sites and, where applicable, methods to +authorize access to the service so you can, e.g., download a user's address +book or publish a Twitter message on their behalf. + +Here is an example handler that uses Google for authentication, saving +the Google credentials in a cookie for later access: + + class GoogleHandler(tornado.web.RequestHandler, tornado.auth.GoogleMixin): + @tornado.web.asynchronous + def get(self): + if self.get_argument("openid.mode", None): + self.get_authenticated_user(self.async_callback(self._on_auth)) + return + self.authenticate_redirect() + + def _on_auth(self, user): + if not user: + self.authenticate_redirect() + return + # Save the user with, e.g., set_secure_cookie() + +See the `auth` module documentation for more details. + + +Performance +----------- +Web application performance is generally bound by architecture, not frontend +performance. That said, Tornado is pretty fast relative to most popular +Python web frameworks. + +We ran a few remedial load tests on a simple "Hello, world" application +in each of the most popular Python web frameworks +([Django](http://www.djangoproject.com/), [web.py](http://webpy.org/), and +[CherryPy](http://www.cherrypy.org/)) to get the baseline performance of +each relative to Tornado. We used Apache/mod_wsgi for Django and web.py +and ran CherryPy as a standalone server, which was our impression of how +each framework is typically run in production environments. We ran 4 +single-threaded Tornado frontends behind an [nginx](http://nginx.net/) +reverse proxy, which is how we recommend running Tornado in production +(our load test machine had four cores, and we recommend 1 frontend per +core). + +We load tested each with Apache Benchmark (`ab`) on the a separate machine +with the command + + ab -n 100000 -c 25 http://10.0.1.x/ + +The results (requests per second) on a 2.4GHz AMD Opteron processor with +4 cores: + +
    + +In our tests, Tornado consistently had 4X the throughput of the next fastest +framework, and even a single standalone Tornado frontend got 33% more +throughput even though it only used one of the four cores. + +Not very scientific, but at a high level, it should give you a sense that we +have cared about performance as we built Tornado, and it shouldn't add too +much latency to your apps relative to most Python web development frameworks. + + +Running Tornado in production +----------------------------- +At FriendFeed, we use [nginx](http://nginx.net/) as a load balancer +and static file server. We run multiple instances of the Tornado web +server on multiple frontend machines. We typically run one Tornado frontend +per core on the machine (sometimes more depending on utilization). + +This is a barebones nginx config file that is structurally similar to the +one we use at FriendFeed. It assumes nginx and the Tornado servers +are running on the same machine, and the four Tornado servers +are running on ports 8000 - 8003: + + user nginx; + worker_processes 1; + + error_log /var/log/nginx/error.log; + pid /var/run/nginx.pid; + + events { + worker_connections 1024; + use epoll; + } + + http { + # Enumerate all the Tornado servers here + upstream frontends { + server 127.0.0.1:8000; + server 127.0.0.1:8001; + server 127.0.0.1:8002; + server 127.0.0.1:8003; + } + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + access_log /var/log/nginx/access.log; + + keepalive_timeout 65; + proxy_read_timeout 200; + sendfile on; + tcp_nopush on; + tcp_nodelay on; + gzip on; + gzip_min_length 1000; + gzip_proxied any; + gzip_types text/plain text/html text/css text/xml + application/x-javascript application/xml + application/atom+xml text/javascript; + + # Only retry if there was a communication error, not a timeout + # on the Tornado server (to avoid propagating "queries of death" + # to all frontends) + proxy_next_upstream error; + + server { + listen 80; + + # Allow file uploads + client_max_body_size 50M; + + location ^~ /static/ { + root /var/www; + if ($query_string) { + expires max; + } + } + location = /favicon.ico { + rewrite (.*) /static/favicon.ico; + } + location = /robots.txt { + rewrite (.*) /static/robots.txt; + } + + location / { + proxy_pass_header Server; + proxy_set_header Host $http_host; + proxy_redirect false; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Scheme $scheme; + proxy_pass http://frontends; + } + } + } + + +WSGI and Google AppEngine +------------------------- +Tornado comes with limited support for [WSGI](http://wsgi.org/). However, +since WSGI does not support non-blocking requests, you cannot use any +of the asynchronous/non-blocking features of Tornado in your application +if you choose to use WSGI instead of Tornado's HTTP server. Some of the +features that are not available in WSGI applications: +`@tornado.web.asynchronous`, the `httpclient` module, and the `auth` module. + +You can create a valid WSGI application from your Tornado request handlers +by using `WSGIApplication` in the `wsgi` module instead of using +`tornado.web.Application`. Here is an example that uses the built-in WSGI +`CGIHandler` to make a valid +[Google AppEngine](http://code.google.com/appengine/) application: + + import tornado.web + import tornado.wsgi + import wsgiref.handlers + + class MainHandler(tornado.web.RequestHandler): + def get(self): + self.write("Hello, world") + + if __name__ == "__main__": + application = tornado.wsgi.WSGIApplication([ + (r"/", MainHandler), + ]) + wsgiref.handlers.CGIHandler().run(application) + +See the `appengine` example application for a full-featured AppEngine +app built on Tornado. + + +Caveats and support +------------------- +Tornado was refactored from the [FriendFeed](http://friendfeed.com/) +code base to reduce dependencies. This refactoring may have introduced bugs. +Likewise, because the FriendFeed servers have always run +[behind nginx](#running-tornado-in-production), Tornado has not been +extensively tested HTTP/1.1 clients beyond Firefox. Tornado currently +does not attempt to handle multi-line headers and some types of malformed +input. + +You can discuss Tornado and report bugs on [the Tornado developer mailing list](http://groups.google.com/group/python-tornado). diff --git a/website/templates/index.html b/website/templates/index.html new file mode 100644 index 000000000..d30cd2463 --- /dev/null +++ b/website/templates/index.html @@ -0,0 +1,48 @@ +{% extends "base.html" %} + +{% block body %} +

    Tornado is an open source version of the scalable, non-blocking web server and and tools that power FriendFeed. The FriendFeed application is written using a web framework that looks a bit like web.py or Google's webapp, but with additional tools and optimizations to take advantage of the underlying non-blocking infrastructure.

    +

    The framework is distinct from most mainstream web server frameworks (and certainly most Python frameworks) because it is non-blocking and reasonably fast. Because it is non-blocking and uses epoll, it can handle thousands of simultaneous standing connections, which means it is ideal for real-time web services. We built the web server specifically to handle FriendFeed's real-time features — every active user of FriendFeed maintains an open connection to the FriendFeed servers. (For more information on scaling servers to support thousands of clients, see The C10K problem.)

    +

    See the Tornado documentation for a detailed walkthrough of the framework.

    + +

    Download and install

    +

    Download: tornado-0.1.tar.gz

    +
    tar xvzf python-tornado-0.1.tar.gz
    +cd python-tornado-0.1
    +python setup.py build
    +sudo python setup.py install
    +

    The Tornado source code is hosted on GitHub.

    + +

    Prerequisites

    +

    Tornado has been tested on Python 2.5 and 2.6. To use all of the features of Tornado, you need to have PycURL and a JSON library like simplejson installed. Complete installation instructions for Mac OS X and Ubuntu are included below for convenience.

    +

    Mac OS X 10.5/10.6

    +
    sudo easy_install setuptools pycurl==7.16.2.1 simplejson
    + +

    Ubuntu Linux

    +
    sudo apt-get install python-pycurl python-simplejson
    + +

    Hello, world

    +

    Here is the canonical "Hello, world" example app for Tornado:

    +
    import tornado.httpserver
    +import tornado.ioloop
    +import tornado.web
    +
    +class MainHandler(tornado.web.RequestHandler):
    +    def get(self):
    +        self.write("Hello, world")
    +
    +application = tornado.web.Application([
    +    (r"/", MainHandler),
    +])
    +
    +if __name__ == "__main__":
    +    http_server = tornado.httpserver.HTTPServer(application)
    +    http_server.listen(8888)
    +    tornado.ioloop.IOLoop.instance().start()
    +

    See the Tornado documentation for a detailed walkthrough of the framework.

    + +

    Discussion and support

    +

    You can discuss Tornado and report bugs on the Tornado developer mailing list. + +{% end %} + diff --git a/website/website.py b/website/website.py new file mode 100644 index 000000000..f073b67e6 --- /dev/null +++ b/website/website.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# +# Copyright 2009 Bret Taylor +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import markdown +import os +import os.path +import time +import tornado.web +import tornado.wsgi +import wsgiref.handlers + + +class ContentHandler(tornado.web.RequestHandler): + def get(self, path): + paths = ("documentation", "index") + if not path: path = "index" + if path not in paths: + raise tornado.web.HTTPError(404) + self.render(path + ".html", markdown=self.markdown) + + def markdown(self, path, toc=False): + if not hasattr(ContentHandler, "_md") or self.settings.get("debug"): + ContentHandler._md = {} + if path not in ContentHandler._md: + full_path = os.path.join(self.settings["template_path"], path) + f = open(full_path, "r") + contents = f.read().decode("utf-8") + f.close() + if toc: contents = u"[TOC]\n\n" + contents + md = markdown.Markdown(extensions=["toc"] if toc else []) + ContentHandler._md[path] = md.convert(contents).encode("utf-8") + return ContentHandler._md[path] + + +settings = { + "template_path": os.path.join(os.path.dirname(__file__), "templates"), + "xsrf_cookies": True, + "debug": os.environ.get("SERVER_SOFTWARE", "").startswith("Development/"), +} +application = tornado.wsgi.WSGIApplication([ + (r"/([a-z]*)", ContentHandler), +], **settings) + + +def main(): + wsgiref.handlers.CGIHandler().run(application) + + +if __name__ == "__main__": + main()