# To be used with str.strip() and related methods.
HTTP_WHITESPACE = " \t"
-HTTP_TOKEN_RE = re.compile(r"^[!#$%&'*+\-.^_`|~0-9A-Za-z]+$")
+
+class _ABNF:
+ """Class that holds a subset of ABNF rules from RFC 9110 and friends.
+
+ Class attributes are re.Pattern objects, with the same name as in the RFC
+ (with hyphens changed to underscores). Currently contains only the subset
+ we use (which is why this class is not public). Unfortunately the fields
+ cannot be alphabetized as they are in the RFCs because of dependencies.
+ """
+
+ # RFC 5234 (ABNF)
+ VCHAR = re.compile(r"[\x21-\x7E]")
+
+ # RFC 9110 (HTTP Semantics)
+ obs_text = re.compile(r"[\x80-\xFF]")
+ tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
+ token = re.compile(rf"{tchar.pattern}+")
+ field_name = token
+
+ # RFC 9112 (HTTP/1.1)
+ HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
+ reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
+ status_code = re.compile(r"[0-9]{3}")
+ status_line = re.compile(
+ rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
+ )
@lru_cache(1000)
def add(self, name: str, value: str) -> None:
"""Adds a new value for the given key."""
- if not HTTP_TOKEN_RE.match(name):
+ if not _ABNF.token.fullmatch(name):
raise HTTPInputError("Invalid header name %r" % name)
norm_name = _normalize_header(name)
self._last_key = norm_name
version: str
-_http_version_re = re.compile(r"^HTTP/1\.[0-9]$")
-
-
def parse_request_start_line(line: str) -> RequestStartLine:
"""Returns a (method, path, version) tuple for an HTTP 1.x request line.
# https://tools.ietf.org/html/rfc7230#section-3.1.1
# invalid request-line SHOULD respond with a 400 (Bad Request)
raise HTTPInputError("Malformed HTTP request line")
- if not _http_version_re.match(version):
+ if not _ABNF.HTTP_version.fullmatch(version):
raise HTTPInputError(
"Malformed HTTP version in HTTP Request-Line: %r" % version
)
+ if not version.startswith("HTTP/1"):
+ # HTTP/2 and above doesn't use parse_request_start_line.
+ # This could be folded into the regex but we don't want to deviate
+ # from the ABNF in the RFCs.
+ raise HTTPInputError("Unexpected HTTP version %r" % version)
return RequestStartLine(method, path, version)
reason: str
-_http_response_line_re = re.compile(r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)")
-
-
def parse_response_start_line(line: str) -> ResponseStartLine:
"""Returns a (version, code, reason) tuple for an HTTP 1.x response line.
>>> parse_response_start_line("HTTP/1.1 200 OK")
ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
"""
- line = native_str(line)
- match = _http_response_line_re.match(line)
+ match = _ABNF.status_line.fullmatch(line)
if not match:
raise HTTPInputError("Error parsing response start line")
- return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
+ r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
+ if not r.version.startswith("HTTP/1"):
+ # HTTP/2 and above doesn't use parse_response_start_line.
+ raise HTTPInputError("Unexpected HTTP version %r" % r.version)
+ return r
# _parseparam and _parse_header are copied and modified from python2.7's cgi.py