From: Tom Christie Date: Mon, 28 Oct 2024 17:19:40 +0000 (+0000) Subject: Merge branch 'master' into limit-supported-codecs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=292cfe7c3fd9dfc9bc6c162b681fdfb4a5af8e0e;p=thirdparty%2Fhttpx.git Merge branch 'master' into limit-supported-codecs --- 292cfe7c3fd9dfc9bc6c162b681fdfb4a5af8e0e diff --cc httpx/_utils.py index f95658b6,fcf4b64c..af89d8fc --- a/httpx/_utils.py +++ b/httpx/_utils.py @@@ -25,57 -23,11 +23,57 @@@ _HTML5_FORM_ENCODING_RE = re.compile r"|".join([re.escape(c) for c in _HTML5_FORM_ENCODING_REPLACEMENTS.keys()]) ) +# For our supported text codecs, we start with the text codecs as supported by Chromium, Oct. 2023. +# https://chromium.googlesource.com/chromium/chromium/+/refs/heads/trunk/chrome/browser/character_encoding.cc#36 +# +# Then limit them to only includec codecs which are documented as included by cpython. +# https://docs.python.org/3/library/codecs.html#standard-encodings +# +# We're referencing them with the canonical name as used by the Python codecs. +# The alias given in the chromium source is included as a comment for comparison. +SUPPORTED_CODECS = { + "big5", # big5 + "big5hkscs", # big5-hkscs + "cp1250", # windows-1250 + "cp1251", # windows-1251 + "cp1252", # windows-1252 + "cp1253", # windows-1253 + "cp1254", # windows-1254 + "cp1255", # windows-1255 + "cp1256", # windows-1256 + "cp1257", # windows-1257 + "cp1258", # windows-1258 + "euc_jp", # euc-jp + "euc_kr", # euc-kr + "gb18030", # gb18030 + "gbk", # gbk + "iso2022_jp", # iso-2022-jp + "iso8859-1", # iso-8859-1 + "iso8859-2", # iso-8859-2 + "iso8859-3", # iso-8859-3 + "iso8859-4", # iso-8859-4 + "iso8859-5", # iso-8859-5 + "iso8859-6", # iso-8859-6 + "iso8859-7", # iso-8859-7 + "iso8859-8", # iso-8859-8 + "iso8859-10", # iso-8859-10 + "iso8859-13", # iso-8859-13 + "iso8859-14", # iso-8859-14 + "iso8859-15", # iso-8859-15 + "iso8859-16", # iso-8859-16 + "koi8-r", # koi8-r + "koi8-u", # koi8-u + "mac-roman", # macintosh + "shift_jis", # shift-jis + "utf-8", # utf-8 + "utf-16-le", # utf-16le +} + def normalize_header_key( - value: typing.Union[str, bytes], + value: str | bytes, lower: bool, - encoding: typing.Optional[str] = None, + encoding: str | None = None, ) -> bytes: """ Coerce str/bytes into a strictly byte-wise HTTP header key.