class URL:
+ """
+ url = httpx.URL("HTTPS://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink")
+
+ assert url.scheme == "https"
+ assert url.username == "jo@email.com"
+ assert url.password == "a secret"
+ assert url.userinfo == b"jo%40email.com:a%20secret"
+ assert url.host == "example.com"
+ assert url.port == 1234
+ assert url.netloc == "example.com:1234"
+ assert url.path == "/pa th"
+ assert url.query == b"?search=ab"
+ assert url.raw_path == b"/pa%20th?search=ab"
+ assert url.fragment == "anchorlink"
+
+ The components of a URL are broken down like this:
+
+ https://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink
+ [scheme][ username ] [password] [ host ][port][ path ] [ query ] [fragment]
+ [ userinfo ] [ netloc ][ raw_path ]
+
+ Note that:
+
+ * `url.scheme` is normalized to always be lowercased.
+
+ * `url.host` is normalized to always be lowercased, and is IDNA encoded. For instance:
+ url = httpx.URL("http://中国.icom.museum")
+ assert url.host == "xn--fiqs8s.icom.museum"
+
+ * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with
+ `url.username` and `url.password` instead, which handle the URL escaping.
+
+ * `url.raw_path` is raw bytes of both the path and query, without URL escaping.
+ This portion is used as the target when constructing HTTP requests. Usually you'll
+ want to work with `url.path` instead.
+
+ * `url.query` is raw bytes, without URL escaping. A URL query string portion can only
+ be properly URL escaped when decoding the parameter names and values themselves.
+ """
+
def __init__(
self, url: typing.Union["URL", str, RawURL] = "", params: QueryParamTypes = None
) -> None:
@property
def scheme(self) -> str:
+ """
+ The URL scheme, such as "http", "https".
+ Always normalised to lowercase.
+ """
return self._uri_reference.scheme or ""
@property
- def authority(self) -> str:
- port_str = self._uri_reference.port
- default_port_str = {"https": "443", "http": "80"}.get(self.scheme, "")
- if port_str is None or port_str == default_port_str:
- return self._uri_reference.host or ""
- return self._uri_reference.authority or ""
-
- @property
- def userinfo(self) -> str:
- return self._uri_reference.userinfo or ""
+ def userinfo(self) -> bytes:
+ """
+ The URL userinfo as a raw bytestring.
+ For example: b"jo%40email.com:a%20secret".
+ """
+ userinfo = self._uri_reference.userinfo or ""
+ return userinfo.encode("ascii")
@property
def username(self) -> str:
- return unquote(self.userinfo.partition(":")[0])
+ """
+ The URL username as a string, with URL decoding applied.
+ For example: "jo@email.com"
+ """
+ userinfo = self._uri_reference.userinfo or ""
+ return unquote(userinfo.partition(":")[0])
@property
def password(self) -> str:
- return unquote(self.userinfo.partition(":")[2])
+ """
+ The URL password as a string, with URL decoding applied.
+ For example: "a secret"
+ """
+ userinfo = self._uri_reference.userinfo or ""
+ return unquote(userinfo.partition(":")[2])
@property
def host(self) -> str:
+ """
+ The URL host as a string.
+ Always normlized to lowercase, and IDNA encoded.
+
+ Examples:
+
+ url = httpx.URL("http://www.EXAMPLE.org")
+ assert url.host == "www.example.org"
+
+ url = httpx.URL("http://中国.icom.museum")
+ assert url.host == "xn--fiqs8s.icom.museum"
+ """
return self._uri_reference.host or ""
@property
def port(self) -> typing.Optional[int]:
+ """
+ The URL port as an integer.
+ """
port = self._uri_reference.port
return int(port) if port else None
+ @property
+ def netloc(self) -> str:
+ """
+ Either `<host>` or `<host>:<port>` as a string.
+ Always normlized to lowercase, and IDNA encoded.
+ """
+ host = self._uri_reference.host or ""
+ port = self._uri_reference.port
+ return host if port is None else f"{host}:{port}"
+
@property
def path(self) -> str:
- return self._uri_reference.path or "/"
+ """
+ The URL path as a string. Excluding the query string, and URL decoded.
+
+ For example:
+
+ url = httpx.URL("https://example.com/pa%20th")
+ assert url.path == "/pa th"
+ """
+ path = self._uri_reference.path or "/"
+ return unquote(path)
@property
- def query(self) -> str:
- return self._uri_reference.query or ""
+ def query(self) -> bytes:
+ """
+ The URL query string, as raw bytes, excluding the leading b"?".
+ Note that URL decoding can only be applied on URL query strings
+ at the point of decoding the individual parameter names/values.
+ """
+ query = self._uri_reference.query or ""
+ return query.encode("ascii")
@property
- def full_path(self) -> str:
- path = self.path
- if self.query:
- path += "?" + self.query
- return path
+ def raw_path(self) -> bytes:
+ """
+ The complete URL path and query string as raw bytes.
+ Used as the target when constructing HTTP requests.
+
+ For example:
+
+ GET /users?search=some%20text HTTP/1.1
+ Host: www.example.org
+ Connection: close
+ """
+ path = self._uri_reference.path or "/"
+ if self._uri_reference.query is not None:
+ path += "?" + self._uri_reference.query
+ return path.encode("ascii")
@property
def fragment(self) -> str:
+ """
+ The URL fragments, as used in HTML anchors.
+ As a string, without the leading '#'.
+ """
return self._uri_reference.fragment or ""
@property
def raw(self) -> RawURL:
+ """
+ The URL in the raw representation used by the low level
+ transport API. For example, see `httpcore`.
+
+ Provides the (scheme, host, port, target) for the outgoing request.
+ """
return (
self.scheme.encode("ascii"),
self.host.encode("ascii"),
self.port,
- self.full_path.encode("ascii"),
+ self.raw_path,
)
@property
@property
def is_relative_url(self) -> bool:
+ """
+ Return `False` for absolute URLs such as 'http://example.com/path',
+ and `True` for relative URLs such as '/path'.
+ """
return not self.is_absolute_url
def copy_with(self, **kwargs: typing.Any) -> "URL":
- if (
- "username" in kwargs
- or "password" in kwargs
- or "host" in kwargs
- or "port" in kwargs
- ):
- host = kwargs.pop("host", self.host)
- port = kwargs.pop("port", self.port)
+ """
+ Copy this URL, returning a new URL with some components altered.
+ Accepts the same set of parameters as the components that are made
+ available via properties on the `URL` class.
+
+ For example:
+
+ url = httpx.URL("https://www.example.com").copy_with(username="jo@gmail.com", password="a secret")
+ assert url == "https://jo%40email.com:a%20secret@www.example.com"
+ """
+ allowed = {
+ "scheme": str,
+ "username": str,
+ "password": str,
+ "userinfo": bytes,
+ "host": str,
+ "port": int,
+ "netloc": str,
+ "path": str,
+ "query": bytes,
+ "raw_path": bytes,
+ "fragment": str,
+ }
+ for key, value in kwargs.items():
+ if key not in allowed:
+ message = f"{key!r} is an invalid keyword argument for copy_with()"
+ raise TypeError(message)
+ if value is not None and not isinstance(value, allowed[key]):
+ expected = allowed[key].__name__
+ seen = type(value).__name__
+ message = f"Argument {key!r} must be {expected} but got {seen}"
+ raise TypeError(message)
+
+ # Replace username, password, userinfo, host, port, netloc with "authority" for rfc3986
+ if "username" in kwargs or "password" in kwargs:
+ # Consolidate username and password into userinfo.
username = quote(kwargs.pop("username", self.username) or "")
password = quote(kwargs.pop("password", self.password) or "")
+ userinfo = f"{username}:{password}" if password else username
+ kwargs["userinfo"] = userinfo.encode("ascii")
- authority = host
- if port is not None:
- authority += f":{port}"
- if username:
- userpass = username
- if password:
- userpass += f":{password}"
- authority = f"{userpass}@{authority}"
+ if "host" in kwargs or "port" in kwargs:
+ # Consolidate host and port into netloc.
+ host = kwargs.pop("host", self.host) or ""
+ port = kwargs.pop("port", self.port)
+ kwargs["netloc"] = f"{host}:{port}" if port is not None else host
+ if "userinfo" in kwargs or "netloc" in kwargs:
+ # Consolidate userinfo and netloc into authority.
+ userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii")
+ netloc = kwargs.pop("netloc", self.netloc) or ""
+ authority = f"{userinfo}@{netloc}" if userinfo else netloc
kwargs["authority"] = authority
+ if "raw_path" in kwargs:
+ raw_path = kwargs.pop("raw_path") or b""
+ path, has_query, query = raw_path.decode("ascii").partition("?")
+ kwargs["path"] = path
+ kwargs["query"] = query if has_query else None
+
+ else:
+ # Ensure path=<url quoted str> for rfc3986
+ if kwargs.get("path") is not None:
+ kwargs["path"] = quote(kwargs["path"])
+
+ # Ensure query=<str> for rfc3986
+ if kwargs.get("query") is not None:
+ kwargs["query"] = kwargs["query"].decode("ascii")
+
return URL(self._uri_reference.copy_with(**kwargs).unsplit())
def join(self, url: URLTypes) -> "URL":
"""
Return an absolute URL, using this URL as the base.
+
+ Eg.
+
+ url = httpx.URL("https://www.example.com/test")
+ url = url.join("/new/path")
+ assert url == "https://www.example.com/test/new/path"
"""
if self.is_relative_url:
return URL(url)
class_name = self.__class__.__name__
url_str = str(self)
if self._uri_reference.userinfo:
+ username = quote(self.username)
url_str = (
rfc3986.urlparse(url_str)
- .copy_with(userinfo=f"{self.username}:[secure]")
+ .copy_with(userinfo=f"{username}:[secure]")
.unsplit()
)
return f"{class_name}({url_str!r})"
"content-length" in self.headers or "transfer-encoding" in self.headers
)
- if not has_host and self.url.authority:
- host = self.url.copy_with(username=None, password=None).authority
- auto_headers.append((b"host", host.encode("ascii")))
+ if not has_host and self.url.host:
+ default_port = {"http": 80, "https": 443}.get(self.url.scheme)
+ if self.url.port is None or self.url.port == default_port:
+ host_header = self.url.host.encode("ascii")
+ else:
+ host_header = self.url.netloc.encode("ascii")
+ auto_headers.append((b"host", host_header))
if not has_content_length and self.method in ("POST", "PUT", "PATCH"):
auto_headers.append((b"content-length", b"0"))
assert url.scheme == "https"
assert url.host == "example.org"
assert url.port == 123
- assert url.authority == "example.org:123"
assert url.path == "/path/to/somewhere"
- assert url.query == "abc=123"
+ assert url.query == b"abc=123"
+ assert url.raw_path == b"/path/to/somewhere?abc=123"
assert url.fragment == "anchor"
assert (
repr(url) == "URL('https://example.org:123/path/to/somewhere?abc=123#anchor')"
assert all(url in urls for url in url_set)
-def test_url_copywith_for_authority():
+def test_url_copywith_authority_subcomponents():
copy_with_kwargs = {
"username": "username",
"password": "password",
}
url = httpx.URL("https://example.org")
new = url.copy_with(**copy_with_kwargs)
- for k, v in copy_with_kwargs.items():
- assert getattr(new, k) == v
assert str(new) == "https://username:password@example.net:444"
-def test_url_copywith_for_userinfo():
+def test_url_copywith_netloc():
+ copy_with_kwargs = {
+ "netloc": "example.net:444",
+ }
+ url = httpx.URL("https://example.org")
+ new = url.copy_with(**copy_with_kwargs)
+ assert str(new) == "https://example.net:444"
+
+
+def test_url_copywith_userinfo_subcomponents():
copy_with_kwargs = {
"username": "tom@example.org",
"password": "abc123@ %",
assert str(new) == "https://tom%40example.org:abc123%40%20%25@example.org"
assert new.username == "tom@example.org"
assert new.password == "abc123@ %"
+ assert new.userinfo == b"tom%40example.org:abc123%40%20%25"
+
+
+def test_url_copywith_invalid_component():
+ url = httpx.URL("https://example.org")
+ with pytest.raises(TypeError):
+ url.copy_with(pathh="/incorrect-spelling")
+ with pytest.raises(TypeError):
+ url.copy_with(userinfo="should be bytes")
+
+
+def test_url_copywith_urlencoded_path():
+ url = httpx.URL("https://example.org")
+ url = url.copy_with(path="/path to somewhere")
+ assert url.path == "/path to somewhere"
+ assert url.query == b""
+ assert url.raw_path == b"/path%20to%20somewhere"
+
+
+def test_url_copywith_query():
+ url = httpx.URL("https://example.org")
+ url = url.copy_with(query=b"a=123")
+ assert url.path == "/"
+ assert url.query == b"a=123"
+ assert url.raw_path == b"/?a=123"
+
+
+def test_url_copywith_raw_path():
+ url = httpx.URL("https://example.org")
+ url = url.copy_with(raw_path=b"/some/path")
+ assert url.path == "/some/path"
+ assert url.query == b""
+ assert url.raw_path == b"/some/path"
+
+ url = httpx.URL("https://example.org")
+ url = url.copy_with(raw_path=b"/some/path?")
+ assert url.path == "/some/path"
+ assert url.query == b""
+ assert url.raw_path == b"/some/path?"
+
+ url = httpx.URL("https://example.org")
+ url = url.copy_with(raw_path=b"/some/path?a=123")
+ assert url.path == "/some/path"
+ assert url.query == b"a=123"
+ assert url.raw_path == b"/some/path?a=123"
def test_url_invalid():
with pytest.raises(TypeError):
httpx.URL(ExternalURLClass()) # type: ignore
+
+
+def test_url_with_empty_query():
+ """
+ URLs with and without a trailing `?` but an empty query component
+ should preserve the information on the raw path.
+ """
+ url = httpx.URL("https://www.example.com/path")
+ assert url.path == "/path"
+ assert url.query == b""
+ assert url.raw_path == b"/path"
+
+ url = httpx.URL("https://www.example.com/path?")
+ assert url.path == "/path"
+ assert url.query == b""
+ assert url.raw_path == b"/path?"
+
+
+def test_url_with_url_encoded_path():
+ url = httpx.URL("https://www.example.com/path%20to%20somewhere")
+ assert url.path == "/path to somewhere"
+ assert url.query == b""
+ assert url.raw_path == b"/path%20to%20somewhere"