from __future__ import absolute_import, division, print_function, with_statement
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line
-from tornado.escape import utf8
+from tornado.escape import utf8, native_str
from tornado.log import gen_log
from tornado.testing import ExpectLog
from tornado.test.util import unittest
("Foo", "bar baz"),
("Foo", "even more lines")])
+ def test_unicode_newlines(self):
+ # Ensure that only \r\n is recognized as a header separator, and not
+ # the other newline-like unicode characters.
+ # Characters that are likely to be problematic can be found in
+ # http://unicode.org/standard/reports/tr13/tr13-5.html
+ # and cpython's unicodeobject.c (which defines the implementation
+ # of unicode_type.splitlines(), and uses a different list than TR13).
+ newlines = [
+ u'\u001b', # VERTICAL TAB
+ u'\u001c', # FILE SEPARATOR
+ u'\u001d', # GROUP SEPARATOR
+ u'\u001e', # RECORD SEPARATOR
+ u'\u0085', # NEXT LINE
+ u'\u2028', # LINE SEPARATOR
+ u'\u2029', # PARAGRAPH SEPARATOR
+ ]
+ for newline in newlines:
+ # Try the utf8 and latin1 representations of each newline
+ for encoding in ['utf8', 'latin1']:
+ try:
+ try:
+ encoded = newline.encode(encoding)
+ except UnicodeEncodeError:
+ # Some chars cannot be represented in latin1
+ continue
+ data = b'Cookie: foo=' + encoded + b'bar'
+ # parse() wants a native_str, so decode through latin1
+ # in the same way the real parser does.
+ headers = HTTPHeaders.parse(
+ native_str(data.decode('latin1')))
+ expected = [('Cookie', 'foo=' +
+ native_str(encoded.decode('latin1')) + 'bar')]
+ self.assertEqual(
+ expected, list(headers.get_all()))
+ except Exception:
+ gen_log.warning("failed while trying %r in %s",
+ newline, encoding)
+ raise
+
+ def test_optional_cr(self):
+ # Both CRLF and LF should be accepted as separators. CR should not be
+ # part of the data when followed by LF, but it is a normal char
+ # otherwise (or should bare CR be an error?)
+ headers = HTTPHeaders.parse(
+ 'CRLF: crlf\r\nLF: lf\nCR: cr\rMore: more\r\n')
+ self.assertEqual(sorted(headers.get_all()),
+ [('Cr', 'cr\rMore: more'),
+ ('Crlf', 'crlf'),
+ ('Lf', 'lf'),
+ ])
+
class FormatTimestampTest(unittest.TestCase):
# Make sure that all the input types are supported.