# It has also been modified to support valueless parameters as seen in
# websocket extension negotiations, and to support non-ascii values in
# RFC 2231/5987 format.
+#
+# _parseparam has been further modified with the logic from
+# https://github.com/python/cpython/pull/136072/files
+# to avoid quadratic behavior when parsing semicolons in quoted strings.
+#
+# TODO: See if we can switch to email.message.Message for this functionality.
+# This is the suggested replacement for the cgi.py module now that cgi has
+# been removed from recent versions of Python. We need to verify that
+# the email module is consistent with our existing behavior (and all relevant
+# RFCs for multipart/form-data) before making this change.
def _parseparam(s: str) -> Generator[str, None, None]:
- while s[:1] == ";":
- s = s[1:]
- end = s.find(";")
- while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
- end = s.find(";", end + 1)
+ start = 0
+ while s.find(";", start) == start:
+ start += 1
+ end = s.find(";", start)
+ ind, diff = start, 0
+ while end > 0:
+ diff += s.count('"', ind, end) - s.count('\\"', ind, end)
+ if diff % 2 == 0:
+ break
+ end, ind = ind, s.find(";", end + 1)
if end < 0:
end = len(s)
- f = s[:end]
+ f = s[start:end]
yield f.strip()
- s = s[end:]
+ start = end
def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
self.assertEqual(file["filename"], "ab.txt")
self.assertEqual(file["body"], b"Foo")
+ def test_disposition_param_linear_performance(self):
+ # This is a regression test for performance of parsing parameters
+ # to the content-disposition header, specifically for semicolons within
+ # quoted strings.
+ def f(n):
+ start = time.time()
+ message = (
+ b"--1234\r\nContent-Disposition: form-data; "
+ + b'x="'
+ + b";" * n
+ + b'"; '
+ + b'name="files"; filename="a.txt"\r\n\r\nFoo\r\n--1234--\r\n'
+ )
+ args: dict[str, list[bytes]] = {}
+ files: dict[str, list[HTTPFile]] = {}
+ parse_multipart_form_data(b"1234", message, args, files)
+ return time.time() - start
+
+ d1 = f(1_000)
+ d2 = f(10_000)
+ if d2 / d1 > 20:
+ self.fail(f"Disposition param parsing is not linear: {d1=} vs {d2=}")
+
class HTTPHeadersTest(unittest.TestCase):
def test_multi_line(self):