From: Ben Darnell Date: Sun, 15 May 2011 23:58:29 +0000 (-0700) Subject: Add tornado.escape.native_str() for dealing with python 2 vs 3 issues. X-Git-Tag: v2.0.0~73 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bd1c8ca6ca4f8f1fe5a15b791937c479ec1a3fba;p=thirdparty%2Ftornado.git Add tornado.escape.native_str() for dealing with python 2 vs 3 issues. Rename _unicode() to to_unicode() so it doesn't look private, make its implementation match utf8(), and document both functions. --- diff --git a/tornado/escape.py b/tornado/escape.py index 42c5ad764..5bd627abd 100644 --- a/tornado/escape.py +++ b/tornado/escape.py @@ -94,11 +94,39 @@ def url_unescape(value): _UTF8_TYPES = (bytes, type(None)) def utf8(value): + """Converts a string argument to a byte string. + + If the argument is already a byte string or None, it is returned unchanged. + Otherwise it must be a unicode string and is encoded as utf8. + """ if isinstance(value, _UTF8_TYPES): return value assert isinstance(value, unicode) return value.encode("utf-8") +_TO_UNICODE_TYPES = (unicode, type(None)) +def to_unicode(value): + """Converts a string argument to a unicode string. + + If the argument is already a unicode string or None, it is returned + unchanged. Otherwise it must be a byte string and is decoded as utf8. + """ + if isinstance(value, _TO_UNICODE_TYPES): + return value + assert isinstance(value, bytes) + return value.decode("utf-8") + +# to_unicode was previously named _unicode not because it was private, +# but to avoid conflicts with the built-in unicode() function/type +_unicode = to_unicode + +# When dealing with the standard library across python 2 and 3 it is +# sometimes useful to have a direct conversion to the native string type +if str is unicode: + native_str = to_unicode +else: + native_str = utf8 + # I originally used the regex from # http://daringfireball.net/2010/07/improved_regex_for_matching_urls @@ -187,13 +215,6 @@ def linkify(text, shorten=False, extra_params="", return _URL_RE.sub(make_link, text) -def _unicode(value): - if isinstance(value, bytes): - return value.decode("utf-8") - assert isinstance(value, unicode) - return value - - def _convert_entity(m): if m.group(1) == "#": try: