def parse_qs_bytes(
- qs: str, keep_blank_values: bool = False, strict_parsing: bool = False
+ qs: Union[str, bytes], keep_blank_values: bool = False, strict_parsing: bool = False
) -> Dict[str, List[bytes]]:
- """Parses a query string like urlparse.parse_qs, but returns the
- values as byte strings.
+ """Parses a query string like urlparse.parse_qs,
+ but takes bytes and returns the values as byte strings.
Keys still become type str (interpreted as latin1 in python3!)
because it's too painful to keep them as byte strings in
"""
# This is gross, but python3 doesn't give us another way.
# Latin1 is the universal donor of character encodings.
+ if isinstance(qs, bytes):
+ qs = qs.decode("latin1")
result = urllib.parse.parse_qs(
qs, keep_blank_values, strict_parsing, encoding="latin1", errors="strict"
)
import sys
import tempfile
import unittest
+import urllib.parse
from io import BytesIO
import typing
self.errors[name] = "expected %s, got %s" % (expected_type, actual_type)
+class PostEchoHandler(RequestHandler):
+ def post(self, *path_args):
+ self.write(dict(echo=self.get_argument("data")))
+
+
+class PostEchoGBKHandler(PostEchoHandler):
+ def decode_argument(self, value, name=None):
+ try:
+ return value.decode("gbk")
+ except Exception:
+ raise HTTPError(400, "invalid gbk bytes: %r" % value)
+
+
class HTTPServerTest(AsyncHTTPTestCase):
def get_app(self):
return Application(
("/echo", EchoHandler),
("/typecheck", TypeCheckHandler),
("//doubleslash", EchoHandler),
+ ("/post_utf8", PostEchoHandler),
+ ("/post_gbk", PostEchoGBKHandler),
]
)
self.assertEqual(200, response.code)
self.assertEqual(json_decode(response.body), {})
- def test_malformed_body(self):
- # parse_qs is pretty forgiving, but it will fail on python 3
- # if the data is not utf8.
- with ExpectLog(gen_log, "Invalid x-www-form-urlencoded body"):
- response = self.fetch(
- "/echo",
- method="POST",
- headers={"Content-Type": "application/x-www-form-urlencoded"},
- body=b"\xe9",
- )
- self.assertEqual(200, response.code)
- self.assertEqual(b"{}", response.body)
+ def test_post_encodings(self):
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
+ uni_text = "chinese: \u5f20\u4e09"
+ for enc in ("utf8", "gbk"):
+ for quote in (True, False):
+ with self.subTest(enc=enc, quote=quote):
+ bin_text = uni_text.encode(enc)
+ if quote:
+ bin_text = urllib.parse.quote(bin_text).encode("ascii")
+ response = self.fetch(
+ "/post_" + enc,
+ method="POST",
+ headers=headers,
+ body=(b"data=" + bin_text),
+ )
+ self.assertEqual(json_decode(response.body), {"echo": uni_text})
class HTTPServerRawTest(AsyncHTTPTestCase):