From: Ben Darnell Date: Wed, 5 Jun 2024 19:43:45 +0000 (-0400) Subject: curl_httpclient,http1connection: Prohibit CR and LF in headers X-Git-Tag: v6.4.1~2^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F3386%2Fhead;p=thirdparty%2Ftornado.git curl_httpclient,http1connection: Prohibit CR and LF in headers libcurl does not check for CR and LF in headers, making this the application's responsibility. However, Tornado's other HTTP interfaces check for linefeeds so we should do the same here so that switching between the simple and curl http clients does not introduce header injection vulnerabilties. http1connection previously checked only for LF in headers (alone or in a CRLF pair). It now prohibits bare CR as well, following the requirement in RFC 9112. --- diff --git a/tornado/curl_httpclient.py b/tornado/curl_httpclient.py index 23320e48..397c3a97 100644 --- a/tornado/curl_httpclient.py +++ b/tornado/curl_httpclient.py @@ -19,6 +19,7 @@ import collections import functools import logging import pycurl +import re import threading import time from io import BytesIO @@ -44,6 +45,8 @@ if typing.TYPE_CHECKING: curl_log = logging.getLogger("tornado.curl_httpclient") +CR_OR_LF_RE = re.compile(b"\r|\n") + class CurlAsyncHTTPClient(AsyncHTTPClient): def initialize( # type: ignore @@ -347,14 +350,15 @@ class CurlAsyncHTTPClient(AsyncHTTPClient): if "Pragma" not in request.headers: request.headers["Pragma"] = "" - curl.setopt( - pycurl.HTTPHEADER, - [ - b"%s: %s" - % (native_str(k).encode("ASCII"), native_str(v).encode("ISO8859-1")) - for k, v in request.headers.get_all() - ], - ) + encoded_headers = [ + b"%s: %s" + % (native_str(k).encode("ASCII"), native_str(v).encode("ISO8859-1")) + for k, v in request.headers.get_all() + ] + for line in encoded_headers: + if CR_OR_LF_RE.search(line): + raise ValueError("Illegal characters in header (CR or LF): %r" % line) + curl.setopt(pycurl.HTTPHEADER, encoded_headers) curl.setopt( pycurl.HEADERFUNCTION, diff --git a/tornado/http1connection.py b/tornado/http1connection.py index ca50e8ff..3bdffac1 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -38,6 +38,8 @@ from tornado.util import GzipDecompressor from typing import cast, Optional, Type, Awaitable, Callable, Union, Tuple +CR_OR_LF_RE = re.compile(b"\r|\n") + class _QuietException(Exception): def __init__(self) -> None: @@ -453,8 +455,8 @@ class HTTP1Connection(httputil.HTTPConnection): ) lines.extend(line.encode("latin1") for line in header_lines) for line in lines: - if b"\n" in line: - raise ValueError("Newline in header: " + repr(line)) + if CR_OR_LF_RE.search(line): + raise ValueError("Illegal characters (CR or LF) in header: %r" % line) future = None if self.stream.closed(): future = self._write_future = Future() diff --git a/tornado/test/httpclient_test.py b/tornado/test/httpclient_test.py index 31a19161..17291f8f 100644 --- a/tornado/test/httpclient_test.py +++ b/tornado/test/httpclient_test.py @@ -725,6 +725,22 @@ X-XSS-Protection: 1; if el.logged_stack: break + def test_header_crlf(self): + # Ensure that the client doesn't allow CRLF injection in headers. RFC 9112 section 2.2 + # prohibits a bare CR specifically and "a recipient MAY recognize a single LF as a line + # terminator" so we check each character separately as well as the (redundant) CRLF pair. + for header, name in [ + ("foo\rbar:", "cr"), + ("foo\nbar:", "lf"), + ("foo\r\nbar:", "crlf"), + ]: + with self.subTest(name=name, position="value"): + with self.assertRaises(ValueError): + self.fetch("/hello", headers={"foo": header}) + with self.subTest(name=name, position="key"): + with self.assertRaises(ValueError): + self.fetch("/hello", headers={header: "foo"}) + class RequestProxyTest(unittest.TestCase): def test_request_set(self):