]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
Fixed multipart header params encoding (#167)
authorNik <snive2013@yandex.ru>
Mon, 29 Jul 2019 03:16:37 +0000 (06:16 +0300)
committerSeth Michael Larson <sethmichaellarson@gmail.com>
Mon, 29 Jul 2019 03:16:37 +0000 (22:16 -0500)
httpx/multipart.py
tests/test_multipart.py

index 74805d14396a9da1e0311d8967221a1ff5b48910..a0e727ba71d1e35cc31f65a41e2e7f23989a3098 100644 (file)
@@ -1,9 +1,17 @@
 import binascii
 import mimetypes
 import os
+import re
 import typing
 from io import BytesIO
-from urllib.parse import quote
+
+_HTML5_FORM_ENCODING_REPLACEMENTS = {'"': "%22", "\\": "\\\\"}
+_HTML5_FORM_ENCODING_REPLACEMENTS.update(
+    {chr(c): "%{:02X}".format(c) for c in range(0x00, 0x1F + 1) if c != 0x1B}
+)
+_HTML5_FORM_ENCODING_RE = re.compile(
+    r"|".join([re.escape(c) for c in _HTML5_FORM_ENCODING_REPLACEMENTS.keys()])
+)
 
 
 class Field:
@@ -24,10 +32,8 @@ class DataField(Field):
         self.value = value
 
     def render_headers(self) -> bytes:
-        name = quote(self.name, encoding="utf-8").encode("ascii")
-        return b"".join(
-            [b'Content-Disposition: form-data; name="', name, b'"\r\n' b"\r\n"]
-        )
+        name = _format_param("name", self.name)
+        return b"".join([b"Content-Disposition: form-data; ", name, b"\r\n\r\n"])
 
     def render_data(self) -> bytes:
         return (
@@ -55,20 +61,18 @@ class FileField(Field):
         return mimetypes.guess_type(self.filename)[0] or "application/octet-stream"
 
     def render_headers(self) -> bytes:
-        name = quote(self.name, encoding="utf-8").encode("ascii")
-        filename = quote(self.filename, encoding="utf-8").encode("ascii")
-        content_type = self.content_type.encode("ascii")
+        name = _format_param("name", self.name)
+        filename = _format_param("filename", self.filename)
+        content_type = self.content_type.encode()
         return b"".join(
             [
-                b'Content-Disposition: form-data; name="',
+                b"Content-Disposition: form-data; ",
                 name,
-                b'"; filename="',
+                b"; ",
                 filename,
-                b'"\r\n',
-                b"Content-Type: ",
+                b"\r\nContent-Type: ",
                 content_type,
-                b"\r\n",
-                b"\r\n",
+                b"\r\n\r\n",
             ]
         )
 
@@ -104,3 +108,14 @@ def multipart_encode(data: dict, files: dict) -> typing.Tuple[bytes, str]:
     content_type = "multipart/form-data; boundary=%s" % boundary.decode("ascii")
 
     return body.getvalue(), content_type
+
+
+def _format_param(name: str, value: typing.Union[str, bytes]) -> bytes:
+    if isinstance(value, bytes):
+        value = value.decode()
+        
+    def replacer(match: typing.Match[str]) -> str:
+        return _HTML5_FORM_ENCODING_REPLACEMENTS[match.group(0)]
+
+    value = _HTML5_FORM_ENCODING_RE.sub(replacer, value)
+    return f'{name}="{value}"'.encode()
index a3e6eb59eb75c56ea6892a2f163560121b1428c3..097adbdb26e392876b012892b7e6ea7b6dc79811 100644 (file)
@@ -123,3 +123,21 @@ def test_multipart_encode():
             "--{0}--\r\n"
             "".format(boundary).encode("ascii")
         )
+
+
+class TestHeaderParamHTML5Formatting:
+    def test_unicode(self):
+        param = multipart._format_param("filename", "n\u00e4me")
+        assert param == b'filename="n\xc3\xa4me"'
+
+    def test_ascii(self):
+        param = multipart._format_param("filename", b"name")
+        assert param == b'filename="name"'
+
+    def test_unicode_escape(self):
+        param = multipart._format_param("filename", "hello\\world\u0022")
+        assert param == b'filename="hello\\\\world%22"'
+
+    def test_unicode_with_control_character(self):
+        param = multipart._format_param("filename", "hello\x1A\x1B\x1C")
+        assert param == b'filename="hello%1A\x1B%1C"'