From: Nik Date: Mon, 29 Jul 2019 03:16:37 +0000 (+0300) Subject: Fixed multipart header params encoding (#167) X-Git-Tag: 0.7.0~34 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fb17459335b7e703f1895c4186a4fe421a54d815;p=thirdparty%2Fhttpx.git Fixed multipart header params encoding (#167) --- diff --git a/httpx/multipart.py b/httpx/multipart.py index 74805d14..a0e727ba 100644 --- a/httpx/multipart.py +++ b/httpx/multipart.py @@ -1,9 +1,17 @@ import binascii import mimetypes import os +import re import typing from io import BytesIO -from urllib.parse import quote + +_HTML5_FORM_ENCODING_REPLACEMENTS = {'"': "%22", "\\": "\\\\"} +_HTML5_FORM_ENCODING_REPLACEMENTS.update( + {chr(c): "%{:02X}".format(c) for c in range(0x00, 0x1F + 1) if c != 0x1B} +) +_HTML5_FORM_ENCODING_RE = re.compile( + r"|".join([re.escape(c) for c in _HTML5_FORM_ENCODING_REPLACEMENTS.keys()]) +) class Field: @@ -24,10 +32,8 @@ class DataField(Field): self.value = value def render_headers(self) -> bytes: - name = quote(self.name, encoding="utf-8").encode("ascii") - return b"".join( - [b'Content-Disposition: form-data; name="', name, b'"\r\n' b"\r\n"] - ) + name = _format_param("name", self.name) + return b"".join([b"Content-Disposition: form-data; ", name, b"\r\n\r\n"]) def render_data(self) -> bytes: return ( @@ -55,20 +61,18 @@ class FileField(Field): return mimetypes.guess_type(self.filename)[0] or "application/octet-stream" def render_headers(self) -> bytes: - name = quote(self.name, encoding="utf-8").encode("ascii") - filename = quote(self.filename, encoding="utf-8").encode("ascii") - content_type = self.content_type.encode("ascii") + name = _format_param("name", self.name) + filename = _format_param("filename", self.filename) + content_type = self.content_type.encode() return b"".join( [ - b'Content-Disposition: form-data; name="', + b"Content-Disposition: form-data; ", name, - b'"; filename="', + b"; ", filename, - b'"\r\n', - b"Content-Type: ", + b"\r\nContent-Type: ", content_type, - b"\r\n", - b"\r\n", + b"\r\n\r\n", ] ) @@ -104,3 +108,14 @@ def multipart_encode(data: dict, files: dict) -> typing.Tuple[bytes, str]: content_type = "multipart/form-data; boundary=%s" % boundary.decode("ascii") return body.getvalue(), content_type + + +def _format_param(name: str, value: typing.Union[str, bytes]) -> bytes: + if isinstance(value, bytes): + value = value.decode() + + def replacer(match: typing.Match[str]) -> str: + return _HTML5_FORM_ENCODING_REPLACEMENTS[match.group(0)] + + value = _HTML5_FORM_ENCODING_RE.sub(replacer, value) + return f'{name}="{value}"'.encode() diff --git a/tests/test_multipart.py b/tests/test_multipart.py index a3e6eb59..097adbdb 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -123,3 +123,21 @@ def test_multipart_encode(): "--{0}--\r\n" "".format(boundary).encode("ascii") ) + + +class TestHeaderParamHTML5Formatting: + def test_unicode(self): + param = multipart._format_param("filename", "n\u00e4me") + assert param == b'filename="n\xc3\xa4me"' + + def test_ascii(self): + param = multipart._format_param("filename", b"name") + assert param == b'filename="name"' + + def test_unicode_escape(self): + param = multipart._format_param("filename", "hello\\world\u0022") + assert param == b'filename="hello\\\\world%22"' + + def test_unicode_with_control_character(self): + param = multipart._format_param("filename", "hello\x1A\x1B\x1C") + assert param == b'filename="hello%1A\x1B%1C"'