record.message = record.getMessage()
except Exception, e:
record.message = "Bad message (%r): %r" % (e, record.__dict__)
+ assert isinstance(record.message, basestring) # guaranteed by logging
record.asctime = time.strftime(
"%y%m%d %H:%M:%S", self.converter(record.created))
prefix = '[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d]' % \
if self._color:
prefix = (self._colors.get(record.levelno, self._normal) +
prefix + self._normal)
- formatted = prefix + " " + record.message
+
+ # Encoding notes: The logging module prefers to work with character
+ # strings, but only enforces that log messages are instances of
+ # basestring. In python 2, non-ascii bytestrings will make
+ # their way through the logging framework until they blow up with
+ # an unhelpful decoding error (with this formatter it happens
+ # when we attach the prefix, but there are other opportunities for
+ # exceptions further along in the framework).
+ #
+ # If a byte string makes it this far, convert it to unicode to
+ # ensure it will make it out to the logs. Use repr() as a fallback
+ # to ensure that all byte strings can be converted successfully,
+ # but don't do it by default so we don't add extra quotes to ascii
+ # bytestrings. This is a bit of a hacky place to do this, but
+ # it's worth it since the encoding errors that would otherwise
+ # result are so useless (and tornado is fond of using utf8-encoded
+ # byte strings whereever possible).
+ try:
+ message = _unicode(record.message)
+ except UnicodeDecodeError:
+ message = repr(record.message)
+
+ formatted = prefix + " " + message
if record.exc_info:
if not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
from __future__ import absolute_import, division, with_statement
+import logging
+import os
+import re
+import tempfile
import unittest
-from tornado.options import _Options
+from tornado.escape import utf8
+from tornado.options import _Options, _LogFormatter
+from tornado.util import b, bytes_type
class OptionsTest(unittest.TestCase):
def test_parse_command_line(self):
self.options.parse_command_line(["main.py", "--port=443"])
self.assertEqual(self.options.port, 443)
+
+
+class LogFormatterTest(unittest.TestCase):
+ LINE_RE = re.compile(b("\x01\\[E [0-9]{6} [0-9]{2}:[0-9]{2}:[0-9]{2} options_test:[0-9]+\\]\x02 (.*)"))
+
+ def setUp(self):
+ self.formatter = _LogFormatter(color=False)
+ # Fake color support. We can't guarantee anything about the $TERM
+ # variable when the tests are run, so just patch in some values
+ # for testing. (testing with color off fails to expose some potential
+ # encoding issues from the control characters)
+ self.formatter._colors = {
+ logging.ERROR: u"\u0001",
+ }
+ self.formatter._normal = u"\u0002"
+ self.formatter._color = True
+ # construct a Logger directly to bypass getLogger's caching
+ self.logger = logging.Logger('LogFormatterTest')
+ self.logger.propagate = False
+ self.tempdir = tempfile.mkdtemp()
+ self.filename = os.path.join(self.tempdir, 'log.out')
+ self.handler = self.make_handler(self.filename)
+ self.handler.setFormatter(self.formatter)
+ self.logger.addHandler(self.handler)
+
+ def tearDown(self):
+ os.unlink(self.filename)
+ os.rmdir(self.tempdir)
+
+ def make_handler(self, filename):
+ # Base case: default setup without explicit encoding.
+ # In python 2, supports arbitrary byte strings and unicode objects
+ # that contain only ascii. In python 3, supports ascii-only unicode
+ # strings (but byte strings will be repr'd automatically.
+ return logging.FileHandler(filename)
+
+ def get_output(self):
+ with open(self.filename, "rb") as f:
+ line = f.read().strip()
+ m = LogFormatterTest.LINE_RE.match(line)
+ if m:
+ return m.group(1)
+ else:
+ raise Exception("output didn't match regex: %r" % line)
+
+ def test_basic_logging(self):
+ self.logger.error("foo")
+ self.assertEqual(self.get_output(), b("foo"))
+
+ def test_bytes_logging(self):
+ self.logger.error(b("\xe9"))
+ # This will be "\xe9" on python 2 or "b'\xe9'" on python 3
+ self.assertEqual(self.get_output(), utf8(repr(b("\xe9"))))
+
+ def test_utf8_logging(self):
+ self.logger.error(u"\u00e9".encode("utf8"))
+ if issubclass(bytes_type, basestring):
+ # on python 2, utf8 byte strings (and by extension ascii byte
+ # strings) are passed through as-is.
+ self.assertEqual(self.get_output(), utf8(u"\u00e9"))
+ else:
+ # on python 3, byte strings always get repr'd even if
+ # they're ascii-only, so this degenerates into another
+ # copy of test_bytes_logging.
+ self.assertEqual(self.get_output(), utf8(repr(utf8(u"\u00e9"))))
+
+
+class UnicodeLogFormatterTest(LogFormatterTest):
+ def make_handler(self, filename):
+ # Adding an explicit encoding configuration allows non-ascii unicode
+ # strings in both python 2 and 3, without changing the behavior
+ # for byte strings.
+ return logging.FileHandler(filename, encoding="utf8")
+
+ def test_unicode_logging(self):
+ self.logger.error(u"\u00e9")
+ self.assertEqual(self.get_output(), utf8(u"\u00e9"))