from http.cookiejar import Cookie, CookieJar
from urllib.parse import parse_qsl, quote, unquote, urlencode
+import idna
import rfc3986
import rfc3986.exceptions
class URL:
"""
- url = httpx.URL("HTTPS://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink")
+ url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink")
assert url.scheme == "https"
assert url.username == "jo@email.com"
assert url.password == "a secret"
assert url.userinfo == b"jo%40email.com:a%20secret"
- assert url.host == "example.com"
+ assert url.host == "müller.de"
+ assert url.raw_host == b"xn--mller-kva.de"
assert url.port == 1234
- assert url.netloc == "example.com:1234"
+ assert url.netloc == b"xn--mller-kva.de:1234"
assert url.path == "/pa th"
assert url.query == b"?search=ab"
assert url.raw_path == b"/pa%20th?search=ab"
The components of a URL are broken down like this:
- https://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink
- [scheme][ username ] [password] [ host ][port][ path ] [ query ] [fragment]
- [ userinfo ] [ netloc ][ raw_path ]
+ https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink
+ [scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment]
+ [ userinfo ] [ netloc ][ raw_path ]
Note that:
* `url.scheme` is normalized to always be lowercased.
- * `url.host` is normalized to always be lowercased, and is IDNA encoded. For instance:
- url = httpx.URL("http://中国.icom.museum")
- assert url.host == "xn--fiqs8s.icom.museum"
+ * `url.host` is normalized to always be lowercased. Internationalized domain
+ names are represented in unicode, without IDNA encoding applied. For instance:
+
+ url = httpx.URL("http://中国.icom.museum")
+ assert url.host == "中国.icom.museum"
+ url = httpx.URL("http://xn--fiqs8s.icom.museum")
+ assert url.host == "中国.icom.museum"
+
+ * `url.raw_host` is normalized to always be lowercased, and is IDNA encoded.
+
+ url = httpx.URL("http://中国.icom.museum")
+ assert url.raw_host == b"xn--fiqs8s.icom.museum"
+ url = httpx.URL("http://xn--fiqs8s.icom.museum")
+ assert url.raw_host == b"xn--fiqs8s.icom.museum"
* `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with
`url.username` and `url.password` instead, which handle the URL escaping.
"""
return self._uri_reference.scheme or ""
+ @property
+ def raw_scheme(self) -> bytes:
+ """
+ The raw bytes representation of the URL scheme, such as b"http", b"https".
+ Always normalised to lowercase.
+ """
+ return self.scheme.encode("ascii")
+
@property
def userinfo(self) -> bytes:
"""
def host(self) -> str:
"""
The URL host as a string.
- Always normlized to lowercase, and IDNA encoded.
+ Always normalized to lowercase, with IDNA hosts decoded into unicode.
Examples:
assert url.host == "www.example.org"
url = httpx.URL("http://中国.icom.museum")
- assert url.host == "xn--fiqs8s.icom.museum"
+ assert url.host == "中国.icom.museum"
+
+ url = httpx.URL("http://xn--fiqs8s.icom.museum")
+ assert url.host == "中国.icom.museum"
url = httpx.URL("https://[::ffff:192.168.0.1]")
assert url.host == "::ffff:192.168.0.1"
"""
- host: str = self._uri_reference.host
+ host: str = self._uri_reference.host or ""
+
+ if host and ":" in host and host[0] == "[":
+ # it's an IPv6 address
+ host = host.lstrip("[").rstrip("]")
+
+ if host.startswith("xn--"):
+ host = idna.decode(host)
+
+ return host
+
+ @property
+ def raw_host(self) -> bytes:
+ """
+ The raw bytes representation of the URL host.
+ Always normalized to lowercase, and IDNA encoded.
+
+ Examples:
+
+ url = httpx.URL("http://www.EXAMPLE.org")
+ assert url.raw_host == b"www.example.org"
+
+ url = httpx.URL("http://中国.icom.museum")
+ assert url.raw_host == b"xn--fiqs8s.icom.museum"
+
+ url = httpx.URL("http://xn--fiqs8s.icom.museum")
+ assert url.raw_host == b"xn--fiqs8s.icom.museum"
+
+ url = httpx.URL("https://[::ffff:192.168.0.1]")
+ assert url.raw_host == b"::ffff:192.168.0.1"
+ """
+ host: str = self._uri_reference.host or ""
if host and ":" in host and host[0] == "[":
# it's an IPv6 address
host = host.lstrip("[").rstrip("]")
- return host or ""
+ return host.encode("ascii")
@property
def port(self) -> typing.Optional[int]:
return int(port) if port else None
@property
- def netloc(self) -> str:
+ def netloc(self) -> bytes:
"""
- Either `<host>` or `<host>:<port>` as a string.
- Always normlized to lowercase, and IDNA encoded.
+ Either `<host>` or `<host>:<port>` as bytes.
+ Always normalized to lowercase, and IDNA encoded.
"""
host = self._uri_reference.host or ""
port = self._uri_reference.port
- return host if port is None else f"{host}:{port}"
+ netloc = host.encode("ascii")
+ if port:
+ netloc = netloc + b":" + str(port).encode("ascii")
+ return netloc
@property
def path(self) -> str:
Provides the (scheme, host, port, target) for the outgoing request.
"""
return (
- self.scheme.encode("ascii"),
- self.host.encode("ascii"),
+ self.raw_scheme,
+ self.raw_host,
self.port,
self.raw_path,
)
# URLs with a fragment portion as not absolute.
# What we actually care about is if the URL provides
# a scheme and hostname to which connections should be made.
- return bool(self.scheme and self.host)
+ return bool(self._uri_reference.scheme and self._uri_reference.host)
@property
def is_relative_url(self) -> bool:
"userinfo": bytes,
"host": str,
"port": int,
- "netloc": str,
+ "netloc": bytes,
"path": str,
"query": bytes,
"raw_path": bytes,
# it's an IPv6 address, so it should be hidden under bracket
host = f"[{host}]"
- kwargs["netloc"] = f"{host}:{port}" if port is not None else host
+ kwargs["netloc"] = (
+ f"{host}:{port}".encode("ascii")
+ if port is not None
+ else host.encode("ascii")
+ )
if "userinfo" in kwargs or "netloc" in kwargs:
# Consolidate userinfo and netloc into authority.
userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii")
- netloc = kwargs.pop("netloc", self.netloc) or ""
+ netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii")
authority = f"{userinfo}@{netloc}" if userinfo else netloc
kwargs["authority"] = authority
)
if not has_host and self.url.host:
- default_port = {"http": 80, "https": 443}.get(self.url.scheme)
- if self.url.port is None or self.url.port == default_port:
- host_header = self.url.host.encode("ascii")
- else:
- host_header = self.url.netloc.encode("ascii")
+ default_port = {"http": b":80", "https": b":443"}.get(self.url.scheme, b"")
+ host_header = self.url.netloc
+ if host_header.endswith(default_port):
+ host_header = host_header[: -len(default_port)]
auto_headers.append((b"Host", host_header))
if not has_content_length and self.method in ("POST", "PUT", "PATCH"):
auto_headers.append((b"Content-Length", b"0"))
@pytest.mark.parametrize(
- "given,idna,host,scheme,port",
+ "given,idna,host,raw_host,scheme,port",
[
(
"http://中国.icom.museum:80/",
"http://xn--fiqs8s.icom.museum:80/",
- "xn--fiqs8s.icom.museum",
+ "中国.icom.museum",
+ b"xn--fiqs8s.icom.museum",
"http",
80,
),
(
"http://Königsgäßchen.de",
"http://xn--knigsgchen-b4a3dun.de",
- "xn--knigsgchen-b4a3dun.de",
+ "königsgäßchen.de",
+ b"xn--knigsgchen-b4a3dun.de",
"http",
None,
),
- ("https://faß.de", "https://xn--fa-hia.de", "xn--fa-hia.de", "https", None),
+ (
+ "https://faß.de",
+ "https://xn--fa-hia.de",
+ "faß.de",
+ b"xn--fa-hia.de",
+ "https",
+ None,
+ ),
(
"https://βόλος.com:443",
"https://xn--nxasmm1c.com:443",
- "xn--nxasmm1c.com",
+ "βόλος.com",
+ b"xn--nxasmm1c.com",
"https",
443,
),
(
"http://ශ්රී.com:444",
"http://xn--10cl1a0b660p.com:444",
- "xn--10cl1a0b660p.com",
+ "ශ්රී.com",
+ b"xn--10cl1a0b660p.com",
"http",
444,
),
(
"https://نامهای.com:4433",
"https://xn--mgba3gch31f060k.com:4433",
- "xn--mgba3gch31f060k.com",
+ "نامهای.com",
+ b"xn--mgba3gch31f060k.com",
"https",
4433,
),
"https_with_custom_port",
],
)
-def test_idna_url(given, idna, host, scheme, port):
+def test_idna_url(given, idna, host, raw_host, scheme, port):
url = httpx.URL(given)
assert url == httpx.URL(idna)
assert url.host == host
+ assert url.raw_host == raw_host
assert url.scheme == scheme
assert url.port == port
def test_url_copywith_netloc():
copy_with_kwargs = {
- "netloc": "example.net:444",
+ "netloc": b"example.net:444",
}
url = httpx.URL("https://example.org")
new = url.copy_with(**copy_with_kwargs)
url = httpx.URL("http://[::ffff:192.168.0.1]:5678/")
assert url.host == "::ffff:192.168.0.1"
- assert url.netloc == "[::ffff:192.168.0.1]:5678"
+ assert url.netloc == b"[::ffff:192.168.0.1]:5678"
@pytest.mark.parametrize(
url = httpx.URL(url_str).copy_with(host=new_host)
assert url.host == "::ffff:192.168.0.1"
- assert url.netloc == "[::ffff:192.168.0.1]:1234"
+ assert url.netloc == b"[::ffff:192.168.0.1]:1234"
assert str(url) == "http://[::ffff:192.168.0.1]:1234"
url = httpx.URL(raw_url)
assert url.host == "::ffff:192.168.0.1"
- assert url.netloc == "[::ffff:192.168.0.1]:443"
+ assert url.netloc == b"[::ffff:192.168.0.1]:443"
assert str(url) == "https://[::ffff:192.168.0.1]:443/"