From: Ben Darnell Date: Fri, 30 Sep 2016 15:39:29 +0000 (+0800) Subject: httputil: Rewrite cookie parsing X-Git-Tag: v4.4.2^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cb247cb8db7903fda0ca26531c1526e895e10800;p=thirdparty%2Ftornado.git httputil: Rewrite cookie parsing Move from the python standard library to a parsing function copied from Django. This parser more closely matches browser behavior. The primary motivation is that differences between server-side and browser cookie parsing can lead to an XSRF bypass, as in https://hackerone.com/reports/26647. A secondary benefit is that this makes it possible to work with cookie headers containing cookies that are invalid according to the spec, which is a surprisingly common request. Closes #1851 Closes #633 Closes #1434 Closes #1176 --- diff --git a/tornado/httputil.py b/tornado/httputil.py index 9ca840db6..21842caa2 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -379,10 +379,18 @@ class HTTPServerRequest(object): self._cookies = Cookie.SimpleCookie() if "Cookie" in self.headers: try: - self._cookies.load( - native_str(self.headers["Cookie"])) + parsed = parse_cookie(self.headers["Cookie"]) except Exception: - self._cookies = {} + pass + else: + for k, v in parsed.items(): + try: + self._cookies[k] = v + except Exception: + # SimpleCookie imposes some restrictions on keys; + # parse_cookie does not. Discard any cookies + # with disallowed keys. + pass return self._cookies def write(self, chunk, callback=None): @@ -909,3 +917,82 @@ def split_host_and_port(netloc): host = netloc port = None return (host, port) + +_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +_QuotePatt = re.compile(r"[\\].") +_nulljoin = ''.join + +def _unquote_cookie(str): + """Handle double quotes and escaping in cookie values. + + This method is copied verbatim from the Python 3.5 standard + library (http.cookies._unquote) so we don't have to depend on + non-public interfaces. + """ + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if str is None or len(str) < 2: + return str + if str[0] != '"' or str[-1] != '"': + return str + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + str = str[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + i = 0 + n = len(str) + res = [] + while 0 <= i < n: + o_match = _OctalPatt.search(str, i) + q_match = _QuotePatt.search(str, i) + if not o_match and not q_match: # Neither matched + res.append(str[i:]) + break + # else: + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): # QuotePatt matched + res.append(str[i:k]) + res.append(str[k+1]) + i = k + 2 + else: # OctalPatt matched + res.append(str[i:j]) + res.append(chr(int(str[j+1:j+4], 8))) + i = j + 4 + return _nulljoin(res) + + +def parse_cookie(cookie): + """Parse a ``Cookie`` HTTP header into a dict of name/value pairs. + + This function attempts to mimic browser cookie parsing behavior; + it specifically does not follow any of the cookie-related RFCs + (because browsers don't either). + + The algorithm used is identical to that used by Django version 1.9.10. + + .. versionadded:: 4.4.2 + """ + cookiedict = {} + for chunk in cookie.split(str(';')): + if str('=') in chunk: + key, val = chunk.split(str('='), 1) + else: + # Assume an empty name per + # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 + key, val = str(''), chunk + key, val = key.strip(), val.strip() + if key or val: + # unquote using Python's algorithm. + cookiedict[key] = _unquote_cookie(val) + return cookiedict diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py index 62b8c6d76..3eb104d1c 100644 --- a/tornado/test/httputil_test.py +++ b/tornado/test/httputil_test.py @@ -1,8 +1,9 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, with_statement -from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line +from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line, parse_cookie from tornado.escape import utf8, native_str from tornado.log import gen_log from tornado.testing import ExpectLog @@ -378,3 +379,53 @@ class ParseRequestStartLineTest(unittest.TestCase): self.assertEqual(parsed_start_line.method, self.METHOD) self.assertEqual(parsed_start_line.path, self.PATH) self.assertEqual(parsed_start_line.version, self.VERSION) + + +class ParseCookieTest(unittest.TestCase): + # These tests copied from Django: + # https://github.com/django/django/pull/6277/commits/da810901ada1cae9fc1f018f879f11a7fb467b28 + def test_python_cookies(self): + """ + Test cases copied from Python's Lib/test/test_http_cookies.py + """ + self.assertEqual(parse_cookie('chips=ahoy; vienna=finger'), {'chips': 'ahoy', 'vienna': 'finger'}) + # Here parse_cookie() differs from Python's cookie parsing in that it + # treats all semicolons as delimiters, even within quotes. + self.assertEqual( + parse_cookie('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'), + {'keebler': '"E=mc2', 'L': '\\"Loves\\"', 'fudge': '\\012', '': '"'} + ) + # Illegal cookies that have an '=' char in an unquoted value. + self.assertEqual(parse_cookie('keebler=E=mc2'), {'keebler': 'E=mc2'}) + # Cookies with ':' character in their name. + self.assertEqual(parse_cookie('key:term=value:term'), {'key:term': 'value:term'}) + # Cookies with '[' and ']'. + self.assertEqual(parse_cookie('a=b; c=[; d=r; f=h'), {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'}) + + def test_cookie_edgecases(self): + # Cookies that RFC6265 allows. + self.assertEqual(parse_cookie('a=b; Domain=example.com'), {'a': 'b', 'Domain': 'example.com'}) + # parse_cookie() has historically kept only the last cookie with the + # same name. + self.assertEqual(parse_cookie('a=b; h=i; a=c'), {'a': 'c', 'h': 'i'}) + + def test_invalid_cookies(self): + """ + Cookie strings that go against RFC6265 but browsers will send if set + via document.cookie. + """ + # Chunks without an equals sign appear as unnamed values per + # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 + self.assertIn('django_language', parse_cookie('abc=def; unnamed; django_language=en').keys()) + # Even a double quote may be an unamed value. + self.assertEqual(parse_cookie('a=b; "; c=d'), {'a': 'b', '': '"', 'c': 'd'}) + # Spaces in names and values, and an equals sign in values. + self.assertEqual(parse_cookie('a b c=d e = f; gh=i'), {'a b c': 'd e = f', 'gh': 'i'}) + # More characters the spec forbids. + self.assertEqual(parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'), {'a b,c<>@:/[]?{}': 'd " =e,f g'}) + # Unicode characters. The spec only allows ASCII. + self.assertEqual(parse_cookie('saint=André Bessette'), {'saint': native_str('André Bessette')}) + # Browsers don't send extra whitespace or semicolons in Cookie headers, + # but parse_cookie() should parse whitespace the same way + # document.cookie parses whitespace. + self.assertEqual(parse_cookie(' = b ; ; = ; c = ; '), {'': 'b', 'c': ''}) diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py index fdd1797cc..14f6904aa 100644 --- a/tornado/test/web_test.py +++ b/tornado/test/web_test.py @@ -279,8 +279,8 @@ class CookieTest(WebTestCase): data = [('foo=a=b', 'a=b'), ('foo="a=b"', 'a=b'), - ('foo="a;b"', 'a;b'), - # ('foo=a\\073b', 'a;b'), # even encoded, ";" is a delimiter + ('foo="a;b"', '"a'), # even quoted, ";" is a delimiter + ('foo=a\\073b', 'a\\073b'), # escapes only decoded in quotes ('foo="a\\073b"', 'a;b'), ('foo="a\\"b"', 'a"b'), ]