)
from .multipart import multipart_encode
from .status_codes import StatusCode
-from .utils import is_known_encoding, normalize_header_key, normalize_header_value
+from .utils import (
+ guess_json_utf,
+ is_known_encoding,
+ normalize_header_key,
+ normalize_header_value,
+)
URLTypes = typing.Union["URL", str]
if message:
raise HttpError(message)
- def json(self) -> typing.Any:
- return jsonlib.loads(self.content.decode("utf-8"))
+ def json(self, **kwargs: typing.Any) -> typing.Union[dict, list]:
+ if self.charset_encoding is None and self.content and len(self.content) > 3:
+ encoding = guess_json_utf(self.content)
+ if encoding is not None:
+ try:
+ return jsonlib.loads(self.content.decode(encoding), **kwargs)
+ except UnicodeDecodeError:
+ pass
+ return jsonlib.loads(self.text, **kwargs)
@property
def cookies(self) -> "Cookies":
except LookupError:
return False
return True
+
+
+# Null bytes; no need to recreate these on each call to guess_json_utf
+_null = "\x00".encode("ascii") # encoding to ASCII for Python 3
+_null2 = _null * 2
+_null3 = _null * 3
+
+
+def guess_json_utf(data: bytes) -> typing.Optional[str]:
+ # JSON always starts with two ASCII characters, so detection is as
+ # easy as counting the nulls and from their location and count
+ # determine the encoding. Also detect a BOM, if present.
+ sample = data[:4]
+ if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
+ return "utf-32" # BOM included
+ if sample[:3] == codecs.BOM_UTF8:
+ return "utf-8-sig" # BOM included, MS style (discouraged)
+ if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
+ return "utf-16" # BOM included
+ nullcount = sample.count(_null)
+ if nullcount == 0:
+ return "utf-8"
+ if nullcount == 2:
+ if sample[::2] == _null2: # 1st and 3rd are null
+ return "utf-16-be"
+ if sample[1::2] == _null2: # 2nd and 4th are null
+ return "utf-16-le"
+ # Did not detect 2 valid UTF-16 ascii-range characters
+ if nullcount == 3:
+ if sample[:3] == _null3:
+ return "utf-32-be"
+ if sample[1:] == _null3:
+ return "utf-32-le"
+ # Did not detect a valid UTF-32 ascii-range character
+ return None
+import json
+from unittest import mock
+
import pytest
import http3
assert response.status_code == 600
assert response.reason_phrase == ""
assert response.text == ""
+
+
+def test_json_with_specified_encoding():
+ data = dict(greeting="hello", recipient="world")
+ content = json.dumps(data).encode("utf-16")
+ headers = {"Content-Type": "application/json, charset=utf-16"}
+ response = http3.Response(200, content=content, headers=headers)
+ assert response.json() == data
+
+
+def test_json_with_options():
+ data = dict(greeting="hello", recipient="world", amount=1)
+ content = json.dumps(data).encode("utf-16")
+ headers = {"Content-Type": "application/json, charset=utf-16"}
+ response = http3.Response(200, content=content, headers=headers)
+ assert response.json(parse_int=str)["amount"] == "1"
+
+
+def test_json_without_specified_encoding():
+ data = dict(greeting="hello", recipient="world")
+ content = json.dumps(data).encode("utf-32-be")
+ headers = {"Content-Type": "application/json"}
+ response = http3.Response(200, content=content, headers=headers)
+ assert response.json() == data
+
+
+def test_json_without_specified_encoding_decode_error():
+ data = dict(greeting="hello", recipient="world")
+ content = json.dumps(data).encode("utf-32-be")
+ headers = {"Content-Type": "application/json"}
+ # force incorrect guess from `guess_json_utf` to trigger error
+ with mock.patch("http3.models.guess_json_utf", return_value="utf-32"):
+ response = http3.Response(200, content=content, headers=headers)
+ with pytest.raises(json.JSONDecodeError):
+ response.json()
--- /dev/null
+import pytest
+
+from http3.utils import guess_json_utf
+
+
+@pytest.mark.parametrize(
+ "encoding",
+ (
+ "utf-32",
+ "utf-8-sig",
+ "utf-16",
+ "utf-8",
+ "utf-16-be",
+ "utf-16-le",
+ "utf-32-be",
+ "utf-32-le",
+ ),
+)
+def test_encoded(encoding):
+ data = "{}".encode(encoding)
+ assert guess_json_utf(data) == encoding
+
+
+def test_bad_utf_like_encoding():
+ assert guess_json_utf(b"\x00\x00\x00\x00") is None
+
+
+@pytest.mark.parametrize(
+ ("encoding", "expected"),
+ (
+ ("utf-16-be", "utf-16"),
+ ("utf-16-le", "utf-16"),
+ ("utf-32-be", "utf-32"),
+ ("utf-32-le", "utf-32"),
+ ),
+)
+def test_guess_by_bom(encoding, expected):
+ data = u"\ufeff{}".encode(encoding)
+ assert guess_json_utf(data) == expected