From: Ben Darnell Date: Thu, 27 Mar 2025 20:30:08 +0000 (-0400) Subject: httputil: Make parse_request_start_line stricter X-Git-Tag: v6.5.0b1~16^2~1 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=88344aef46861c41202ec361bdf9640afadfb95d;p=thirdparty%2Ftornado.git httputil: Make parse_request_start_line stricter The method is now restricted to being valid token characters as defined in RFC 9110, allowing us to correctly issue status code 400 or 405 as appropriate (this can make a difference with some caching proxies). The request-target no longer allows control characters. This is less strict than the RFC (which does not allow non-ascii characters), but prioritizes backwards compatibility. Fixes #3415 Closes #3338 --- diff --git a/tornado/httputil.py b/tornado/httputil.py index f0c4ff10..2f1a9ba8 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -86,13 +86,22 @@ class _ABNF: # RFC 9110 (HTTP Semantics) obs_text = re.compile(r"[\x80-\xFF]") + field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})") tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]") token = re.compile(rf"{tchar.pattern}+") field_name = token + method = token # RFC 9112 (HTTP/1.1) HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]") reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+") + # request_target delegates to the URI RFC 3986, which is complex and may be + # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII + # characters). Instead, we allow everything but control chars and whitespace. + request_target = re.compile(rf"{field_vchar.pattern}+") + request_line = re.compile( + rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})" + ) status_code = re.compile(r"[0-9]{3}") status_line = re.compile( rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?" @@ -170,7 +179,7 @@ class HTTPHeaders(StrMutableMapping): def add(self, name: str, value: str) -> None: """Adds a new value for the given key.""" - if not _ABNF.token.fullmatch(name): + if not _ABNF.field_name.fullmatch(name): raise HTTPInputError("Invalid header name %r" % name) norm_name = _normalize_header(name) self._last_key = norm_name @@ -925,22 +934,18 @@ def parse_request_start_line(line: str) -> RequestStartLine: >>> parse_request_start_line("GET /foo HTTP/1.1") RequestStartLine(method='GET', path='/foo', version='HTTP/1.1') """ - try: - method, path, version = line.split(" ") - except ValueError: + match = _ABNF.request_line.fullmatch(line) + if not match: # https://tools.ietf.org/html/rfc7230#section-3.1.1 # invalid request-line SHOULD respond with a 400 (Bad Request) raise HTTPInputError("Malformed HTTP request line") - if not _ABNF.HTTP_version.fullmatch(version): - raise HTTPInputError( - "Malformed HTTP version in HTTP Request-Line: %r" % version - ) - if not version.startswith("HTTP/1"): + r = RequestStartLine(match.group(1), match.group(2), match.group(3)) + if not r.version.startswith("HTTP/1"): # HTTP/2 and above doesn't use parse_request_start_line. # This could be folded into the regex but we don't want to deviate # from the ABNF in the RFCs. - raise HTTPInputError("Unexpected HTTP version %r" % version) - return RequestStartLine(method, path, version) + raise HTTPInputError("Unexpected HTTP version %r" % r.version) + return r class ResponseStartLine(typing.NamedTuple):