]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
Resolve queryparam quoting (#3187)
authorTom Christie <tom@tomchristie.com>
Fri, 3 May 2024 00:09:08 +0000 (01:09 +0100)
committerGitHub <noreply@github.com>
Fri, 3 May 2024 00:09:08 +0000 (01:09 +0100)
httpx/_urlparse.py
tests/models/test_url.py

index 232269ee4dbffb94b54bdc59b4a0dee969f9f96a..883f0895d3e8356b69edbd1bf4f463fb107da88d 100644 (file)
@@ -406,44 +406,22 @@ def normalize_path(path: str) -> str:
     return "/".join(output)
 
 
-def percent_encode(char: str) -> str:
-    """
-    Replace a single character with the percent-encoded representation.
-
-    Characters outside the ASCII range are represented with their a percent-encoded
-    representation of their UTF-8 byte sequence.
-
-    For example:
-
-        percent_encode(" ") == "%20"
-    """
-    return "".join([f"%{byte:02x}" for byte in char.encode("utf-8")]).upper()
-
-
-def is_safe(string: str, safe: str = "/") -> bool:
-    """
-    Determine if a given string is already quote-safe.
-    """
-    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe + "%"
-
-    # All characters must already be non-escaping or '%'
-    for char in string:
-        if char not in NON_ESCAPED_CHARS:
-            return False
-
-    return True
+def PERCENT(string: str) -> str:
+    return "".join([f"%{byte:02X}" for byte in string.encode("utf-8")])
 
 
 def percent_encoded(string: str, safe: str = "/") -> str:
     """
     Use percent-encoding to quote a string.
     """
-    if is_safe(string, safe=safe):
+    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe
+
+    # Fast path for strings that don't need escaping.
+    if not string.rstrip(NON_ESCAPED_CHARS):
         return string
 
-    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe
     return "".join(
-        [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string]
+        [char if char in NON_ESCAPED_CHARS else PERCENT(char) for char in string]
     )
 
 
index 79e1605a5a4a8fd1326fad20ece389c479da0e70..32571238101d4c15e5783f73e6234abc0865492a 100644 (file)
@@ -229,6 +229,11 @@ def test_url_normalized_host():
     assert url.host == "example.com"
 
 
+def test_url_percent_escape_host():
+    url = httpx.URL("https://exam%le.com/")
+    assert url.host == "exam%25le.com"
+
+
 def test_url_ipv4_like_host():
     """rare host names used to quality as IPv4"""
     url = httpx.URL("https://023b76x43144/")
@@ -278,24 +283,64 @@ def test_url_leading_dot_prefix_on_relative_url():
     assert url.path == "../abc"
 
 
-# Tests for optional percent encoding
+# Tests for query parameter percent encoding.
+#
+# Percent-encoding in `params={}` should match browser form behavior.
 
 
-def test_param_requires_encoding():
+def test_param_with_space():
+    # Params passed as form key-value pairs should be escaped.
     url = httpx.URL("http://webservice", params={"u": "with spaces"})
     assert str(url) == "http://webservice?u=with%20spaces"
 
 
 def test_param_does_not_require_encoding():
+    # Params passed as form key-value pairs should be escaped.
+    url = httpx.URL("http://webservice", params={"u": "%"})
+    assert str(url) == "http://webservice?u=%25"
+
+
+def test_param_with_percent_encoded():
+    # Params passed as form key-value pairs should always be escaped,
+    # even if they include a valid escape sequence.
+    # We want to match browser form behaviour here.
     url = httpx.URL("http://webservice", params={"u": "with%20spaces"})
-    assert str(url) == "http://webservice?u=with%20spaces"
+    assert str(url) == "http://webservice?u=with%2520spaces"
 
 
 def test_param_with_existing_escape_requires_encoding():
+    # Params passed as form key-value pairs should always be escaped,
+    # even if they include a valid escape sequence.
+    # We want to match browser form behaviour here.
     url = httpx.URL("http://webservice", params={"u": "http://example.com?q=foo%2Fa"})
     assert str(url) == "http://webservice?u=http%3A%2F%2Fexample.com%3Fq%3Dfoo%252Fa"
 
 
+# Tests for query parameter percent encoding.
+#
+# Percent-encoding in `url={}` should match browser URL bar behavior.
+
+
+def test_query_with_existing_percent_encoding():
+    # Valid percent encoded sequences should not be double encoded.
+    url = httpx.URL("http://webservice?u=phrase%20with%20spaces")
+    assert str(url) == "http://webservice?u=phrase%20with%20spaces"
+
+
+def test_query_requiring_percent_encoding():
+    # Characters that require percent encoding should be encoded.
+    url = httpx.URL("http://webservice?u=phrase with spaces")
+    assert str(url) == "http://webservice?u=phrase%20with%20spaces"
+
+
+def test_query_with_mixed_percent_encoding():
+    # When a mix of encoded and unencoded characters are present,
+    # characters that require percent encoding should be encoded,
+    # while existing sequences should not be double encoded.
+    url = httpx.URL("http://webservice?u=phrase%20with spaces")
+    assert str(url) == "http://webservice?u=phrase%20with%20spaces"
+
+
 # Tests for invalid URLs