From: Tom Christie Date: Tue, 27 Apr 2021 10:23:52 +0000 (+0100) Subject: Code comments for URL model (#1602) X-Git-Tag: 0.18.0~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c927f3e9659a8f663afae69d8ac1d997f1dca4eb;p=thirdparty%2Fhttpx.git Code comments for URL model (#1602) --- diff --git a/httpx/_models.py b/httpx/_models.py index 9325f444..fa8c2660 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -262,12 +262,20 @@ class URL: """ Either `` or `:` as bytes. Always normalized to lowercase, and IDNA encoded. + + The port component is not included if it is the default for an + "http://" or "https://" URL. + + This property may be used for generating the value of a request + "Host" header. + + See: https://tools.ietf.org/html/rfc3986#section-3.2.3 """ host = self._uri_reference.host or "" port = self._uri_reference.port netloc = host.encode("ascii") if port: - netloc = netloc + b":" + str(port).encode("ascii") + netloc = netloc + b":" + port.encode("ascii") return netloc @property @@ -331,13 +339,13 @@ class URL: The URL fragments, as used in HTML anchors. As a string, without the leading '#'. """ - return self._uri_reference.fragment or "" + return unquote(self._uri_reference.fragment or "") @property def raw(self) -> RawURL: """ The URL in the raw representation used by the low level - transport API. For example, see `httpcore`. + transport API. See `BaseTransport.handle_request`. Provides the (scheme, host, port, target) for the outgoing request. """ @@ -393,6 +401,11 @@ class URL: "fragment": str, "params": object, } + + # Step 1 + # ====== + # + # Perform type checking for all supported keyword arguments. for key, value in kwargs.items(): if key not in allowed: message = f"{key!r} is an invalid keyword argument for copy_with()" @@ -403,21 +416,25 @@ class URL: message = f"Argument {key!r} must be {expected} but got {seen}" raise TypeError(message) - # Replace username, password, userinfo, host, port, netloc with "authority" for rfc3986 + # Step 2 + # ====== + # + # Consolidate "username", "password", "userinfo", "host", "port" and "netloc" + # into a single "authority" keyword, for `rfc3986`. if "username" in kwargs or "password" in kwargs: - # Consolidate username and password into userinfo. + # Consolidate "username" and "password" into "userinfo". username = quote(kwargs.pop("username", self.username) or "") password = quote(kwargs.pop("password", self.password) or "") userinfo = f"{username}:{password}" if password else username kwargs["userinfo"] = userinfo.encode("ascii") if "host" in kwargs or "port" in kwargs: - # Consolidate host and port into netloc. + # Consolidate "host" and "port" into "netloc". host = kwargs.pop("host", self.host) or "" port = kwargs.pop("port", self.port) if host and ":" in host and host[0] != "[": - # it's an IPv6 address, so it should be hidden under bracket + # IPv6 addresses need to be escaped within sqaure brackets. host = f"[{host}]" kwargs["netloc"] = ( @@ -427,31 +444,65 @@ class URL: ) if "userinfo" in kwargs or "netloc" in kwargs: - # Consolidate userinfo and netloc into authority. + # Consolidate "userinfo" and "netloc" into authority. userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii") netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii") authority = f"{userinfo}@{netloc}" if userinfo else netloc kwargs["authority"] = authority + # Step 3 + # ====== + # + # Wrangle any "path", "query", "raw_path" and "params" keywords into + # "query" and "path" keywords for `rfc3986`. if "raw_path" in kwargs: + # If "raw_path" is included, then split it into "path" and "query" components. raw_path = kwargs.pop("raw_path") or b"" path, has_query, query = raw_path.decode("ascii").partition("?") kwargs["path"] = path kwargs["query"] = query if has_query else None else: - # Ensure path= for rfc3986 if kwargs.get("path") is not None: + # Ensure `kwargs["path"] = ` for `rfc3986`. kwargs["path"] = quote(kwargs["path"]) if kwargs.get("query") is not None: - # Ensure query= for rfc3986 + # Ensure `kwargs["query"] = ` for `rfc3986`. + # + # Note that `.copy_with(query=None)` and `.copy_with(query=b"")` + # are subtly different. The `None` style will not include an empty + # trailing "?" character. kwargs["query"] = kwargs["query"].decode("ascii") if "params" in kwargs: + # Replace any "params" keyword with the raw "query" instead. + # + # Ensure that empty params use `kwargs["query"] = None` rather + # than `kwargs["query"] = ""`, so that generated URLs do not + # include an empty trailing "?". params = kwargs.pop("params") kwargs["query"] = None if not params else str(QueryParams(params)) + # Step 4 + # ====== + # + # Ensure any fragment component is quoted. + if kwargs.get("fragment") is not None: + kwargs["fragment"] = quote(kwargs["fragment"]) + + # Step 5 + # ====== + # + # At this point kwargs may include keys for "scheme", "authority", "path", + # "query" and "fragment". Together these constitute the entire URL. + # + # See https://tools.ietf.org/html/rfc3986#section-3 + # + # foo://example.com:8042/over/there?name=ferret#nose + # \_/ \______________/\_________/ \_________/ \__/ + # | | | | | + # scheme authority path query fragment return URL(self._uri_reference.copy_with(**kwargs).unsplit()) def copy_set_param(self, key: str, value: typing.Any = None) -> "URL": @@ -505,6 +556,8 @@ class URL: class_name = self.__class__.__name__ url_str = str(self) if self._uri_reference.userinfo: + # Mask any password component in the URL representation, to lower the + # risk of unintended leakage, such as in debug information and logging. username = quote(self.username) url_str = ( rfc3986.urlparse(url_str)