"""
Either `<host>` or `<host>:<port>` as bytes.
Always normalized to lowercase, and IDNA encoded.
+
+ The port component is not included if it is the default for an
+ "http://" or "https://" URL.
+
+ This property may be used for generating the value of a request
+ "Host" header.
+
+ See: https://tools.ietf.org/html/rfc3986#section-3.2.3
"""
host = self._uri_reference.host or ""
port = self._uri_reference.port
netloc = host.encode("ascii")
if port:
- netloc = netloc + b":" + str(port).encode("ascii")
+ netloc = netloc + b":" + port.encode("ascii")
return netloc
@property
The URL fragments, as used in HTML anchors.
As a string, without the leading '#'.
"""
- return self._uri_reference.fragment or ""
+ return unquote(self._uri_reference.fragment or "")
@property
def raw(self) -> RawURL:
"""
The URL in the raw representation used by the low level
- transport API. For example, see `httpcore`.
+ transport API. See `BaseTransport.handle_request`.
Provides the (scheme, host, port, target) for the outgoing request.
"""
"fragment": str,
"params": object,
}
+
+ # Step 1
+ # ======
+ #
+ # Perform type checking for all supported keyword arguments.
for key, value in kwargs.items():
if key not in allowed:
message = f"{key!r} is an invalid keyword argument for copy_with()"
message = f"Argument {key!r} must be {expected} but got {seen}"
raise TypeError(message)
- # Replace username, password, userinfo, host, port, netloc with "authority" for rfc3986
+ # Step 2
+ # ======
+ #
+ # Consolidate "username", "password", "userinfo", "host", "port" and "netloc"
+ # into a single "authority" keyword, for `rfc3986`.
if "username" in kwargs or "password" in kwargs:
- # Consolidate username and password into userinfo.
+ # Consolidate "username" and "password" into "userinfo".
username = quote(kwargs.pop("username", self.username) or "")
password = quote(kwargs.pop("password", self.password) or "")
userinfo = f"{username}:{password}" if password else username
kwargs["userinfo"] = userinfo.encode("ascii")
if "host" in kwargs or "port" in kwargs:
- # Consolidate host and port into netloc.
+ # Consolidate "host" and "port" into "netloc".
host = kwargs.pop("host", self.host) or ""
port = kwargs.pop("port", self.port)
if host and ":" in host and host[0] != "[":
- # it's an IPv6 address, so it should be hidden under bracket
+ # IPv6 addresses need to be escaped within sqaure brackets.
host = f"[{host}]"
kwargs["netloc"] = (
)
if "userinfo" in kwargs or "netloc" in kwargs:
- # Consolidate userinfo and netloc into authority.
+ # Consolidate "userinfo" and "netloc" into authority.
userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii")
netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii")
authority = f"{userinfo}@{netloc}" if userinfo else netloc
kwargs["authority"] = authority
+ # Step 3
+ # ======
+ #
+ # Wrangle any "path", "query", "raw_path" and "params" keywords into
+ # "query" and "path" keywords for `rfc3986`.
if "raw_path" in kwargs:
+ # If "raw_path" is included, then split it into "path" and "query" components.
raw_path = kwargs.pop("raw_path") or b""
path, has_query, query = raw_path.decode("ascii").partition("?")
kwargs["path"] = path
kwargs["query"] = query if has_query else None
else:
- # Ensure path=<url quoted str> for rfc3986
if kwargs.get("path") is not None:
+ # Ensure `kwargs["path"] = <url quoted str>` for `rfc3986`.
kwargs["path"] = quote(kwargs["path"])
if kwargs.get("query") is not None:
- # Ensure query=<str> for rfc3986
+ # Ensure `kwargs["query"] = <str>` for `rfc3986`.
+ #
+ # Note that `.copy_with(query=None)` and `.copy_with(query=b"")`
+ # are subtly different. The `None` style will not include an empty
+ # trailing "?" character.
kwargs["query"] = kwargs["query"].decode("ascii")
if "params" in kwargs:
+ # Replace any "params" keyword with the raw "query" instead.
+ #
+ # Ensure that empty params use `kwargs["query"] = None` rather
+ # than `kwargs["query"] = ""`, so that generated URLs do not
+ # include an empty trailing "?".
params = kwargs.pop("params")
kwargs["query"] = None if not params else str(QueryParams(params))
+ # Step 4
+ # ======
+ #
+ # Ensure any fragment component is quoted.
+ if kwargs.get("fragment") is not None:
+ kwargs["fragment"] = quote(kwargs["fragment"])
+
+ # Step 5
+ # ======
+ #
+ # At this point kwargs may include keys for "scheme", "authority", "path",
+ # "query" and "fragment". Together these constitute the entire URL.
+ #
+ # See https://tools.ietf.org/html/rfc3986#section-3
+ #
+ # foo://example.com:8042/over/there?name=ferret#nose
+ # \_/ \______________/\_________/ \_________/ \__/
+ # | | | | |
+ # scheme authority path query fragment
return URL(self._uri_reference.copy_with(**kwargs).unsplit())
def copy_set_param(self, key: str, value: typing.Any = None) -> "URL":
class_name = self.__class__.__name__
url_str = str(self)
if self._uri_reference.userinfo:
+ # Mask any password component in the URL representation, to lower the
+ # risk of unintended leakage, such as in debug information and logging.
username = quote(self.username)
url_str = (
rfc3986.urlparse(url_str)