]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
Perform port normalization for http, https, ws, wss, and ftp schemes (#1603)
authorTom Christie <tom@tomchristie.com>
Tue, 27 Apr 2021 13:06:23 +0000 (14:06 +0100)
committerGitHub <noreply@github.com>
Tue, 27 Apr 2021 13:06:23 +0000 (14:06 +0100)
httpx/_models.py
tests/client/test_proxies.py
tests/models/test_url.py

index fa8c266027dba3205f02b0c4e551374325387801..a6157a8728beafc3b1593c2e11bfa123c5f8290e 100644 (file)
@@ -100,6 +100,13 @@ class URL:
       url = httpx.URL("http://xn--fiqs8s.icom.museum")
       assert url.raw_host == b"xn--fiqs8s.icom.museum"
 
+    * `url.port` is either None or an integer. URLs that include the default port for
+      "http", "https", "ws", "wss", and "ftp" schemes have their port normalized to `None`.
+
+      assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80")
+      assert httpx.URL("http://example.com").port is None
+      assert httpx.URL("http://example.com:80").port is None
+
     * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with
       `url.username` and `url.password` instead, which handle the URL escaping.
 
@@ -144,6 +151,24 @@ class URL:
                 f"Invalid type for url.  Expected str or httpx.URL, got {type(url)}: {url!r}"
             )
 
+        # Perform port normalization, following the WHATWG spec for default ports.
+        #
+        # See:
+        # * https://tools.ietf.org/html/rfc3986#section-3.2.3
+        # * https://url.spec.whatwg.org/#url-miscellaneous
+        # * https://url.spec.whatwg.org/#scheme-state
+        default_port = {
+            "ftp": ":21",
+            "http": ":80",
+            "https": ":443",
+            "ws": ":80",
+            "wss": ":443",
+        }.get(self._uri_reference.scheme, "")
+        authority = self._uri_reference.authority or ""
+        if default_port and authority.endswith(default_port):
+            authority = authority[: -len(default_port)]
+            self._uri_reference = self._uri_reference.copy_with(authority=authority)
+
         if kwargs:
             self._uri_reference = self.copy_with(**kwargs)._uri_reference
 
@@ -253,6 +278,15 @@ class URL:
     def port(self) -> typing.Optional[int]:
         """
         The URL port as an integer.
+
+        Note that the URL class performs port normalization as per the WHATWG spec.
+        Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always
+        treated as `None`.
+
+        For example:
+
+        assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80")
+        assert httpx.URL("http://www.example.com:80").port is None
         """
         port = self._uri_reference.port
         return int(port) if port else None
@@ -263,13 +297,8 @@ class URL:
         Either `<host>` or `<host>:<port>` as bytes.
         Always normalized to lowercase, and IDNA encoded.
 
-        The port component is not included if it is the default for an
-        "http://" or "https://" URL.
-
         This property may be used for generating the value of a request
         "Host" header.
-
-        See: https://tools.ietf.org/html/rfc3986#section-3.2.3
         """
         host = self._uri_reference.host or ""
         port = self._uri_reference.port
@@ -547,7 +576,7 @@ class URL:
         return hash(str(self))
 
     def __eq__(self, other: typing.Any) -> bool:
-        return isinstance(other, (URL, str)) and str(self) == str(other)
+        return isinstance(other, (URL, str)) and str(self) == str(URL(other))
 
     def __str__(self) -> str:
         return self._uri_reference.unsplit()
@@ -1099,11 +1128,7 @@ class Request:
         )
 
         if not has_host and self.url.host:
-            default_port = {"http": b":80", "https": b":443"}.get(self.url.scheme, b"")
-            host_header = self.url.netloc
-            if host_header.endswith(default_port):
-                host_header = host_header[: -len(default_port)]
-            auto_headers.append((b"Host", host_header))
+            auto_headers.append((b"Host", self.url.netloc))
         if not has_content_length and self.method in ("POST", "PUT", "PATCH"):
             auto_headers.append((b"Content-Length", b"0"))
 
index d4919031a47f8e717c552ba732ae5f450a40e351..6ea4cbe407a419ad1c10d230c1ce0a18a74f8b07 100644 (file)
@@ -79,9 +79,8 @@ PROXY_URL = "http://[::1]"
         ("http://example.com", {"all://": PROXY_URL, "http://example.com": None}, None),
         ("http://example.com", {"http://": PROXY_URL}, PROXY_URL),
         ("http://example.com", {"all://example.com": PROXY_URL}, PROXY_URL),
-        ("http://example.com", {"all://example.com:80": PROXY_URL}, None),
         ("http://example.com", {"http://example.com": PROXY_URL}, PROXY_URL),
-        ("http://example.com", {"http://example.com:80": PROXY_URL}, None),
+        ("http://example.com", {"http://example.com:80": PROXY_URL}, PROXY_URL),
         ("http://example.com:8080", {"http://example.com:8080": PROXY_URL}, PROXY_URL),
         ("http://example.com:8080", {"http://example.com": PROXY_URL}, PROXY_URL),
         (
index c28d070f88b6e67edfd68fcc8083f74da7c19d32..cd099bd931ada0eb7f953fee6a583e0a3044ff89 100644 (file)
@@ -12,7 +12,7 @@ import httpx
             "中国.icom.museum",
             b"xn--fiqs8s.icom.museum",
             "http",
-            80,
+            None,
         ),
         (
             "http://Königsgäßchen.de",
@@ -36,7 +36,7 @@ import httpx
             "βόλος.com",
             b"xn--nxasmm1c.com",
             "https",
-            443,
+            None,
         ),
         (
             "http://ශ්‍රී.com:444",
@@ -374,5 +374,5 @@ def test_ipv6_url_from_raw_url(host):
     url = httpx.URL(raw_url)
 
     assert url.host == "::ffff:192.168.0.1"
-    assert url.netloc == b"[::ffff:192.168.0.1]:443"
-    assert str(url) == "https://[::ffff:192.168.0.1]:443/"
+    assert url.netloc == b"[::ffff:192.168.0.1]"
+    assert str(url) == "https://[::ffff:192.168.0.1]/"