# For 'path' we need to drop ? and # from the GEN_DELIMS set.
parsed_path: str = quote(path, safe=SUB_DELIMS + ":/[]@")
# For 'query' we need to drop '#' from the GEN_DELIMS set.
- # We also exclude '/' because it is more robust to replace it with a percent
- # encoding despite it not being a requirement of the spec.
parsed_query: typing.Optional[str] = (
- None if query is None else quote(query, safe=SUB_DELIMS + ":?[]@")
+ None if query is None else quote(query, safe=SUB_DELIMS + ":/?[]@")
)
# For 'fragment' we can include all of the GEN_DELIMS set.
parsed_fragment: typing.Optional[str] = (
if char not in NON_ESCAPED_CHARS:
return False
- # Any '%' characters must be valid '%xx' escape sequences.
- return string.count("%") == len(PERCENT_ENCODED_REGEX.findall(string))
+ return True
-def quote(string: str, safe: str = "/") -> str:
+def percent_encoded(string: str, safe: str = "/") -> str:
"""
- Use percent-encoding to quote a string if required.
+ Use percent-encoding to quote a string.
"""
if is_safe(string, safe=safe):
return string
)
+def quote(string: str, safe: str = "/") -> str:
+ """
+ Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.
+
+ See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
+
+ * `string`: The string to be percent-escaped.
+ * `safe`: A string containing characters that may be treated as safe, and do not
+ need to be escaped. Unreserved characters are always treated as safe.
+ See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3
+ """
+ parts = []
+ current_position = 0
+ for match in re.finditer(PERCENT_ENCODED_REGEX, string):
+ start_position, end_position = match.start(), match.end()
+ matched_text = match.group(0)
+ # Add any text up to the '%xx' escape sequence.
+ if start_position != current_position:
+ leading_text = string[current_position:start_position]
+ parts.append(percent_encoded(leading_text, safe=safe))
+
+ # Add the '%xx' escape sequence.
+ parts.append(matched_text)
+ current_position = end_position
+
+ # Add any text after the final '%xx' escape sequence.
+ if current_position != len(string):
+ trailing_text = string[current_position:]
+ parts.append(percent_encoded(trailing_text, safe=safe))
+
+ return "".join(parts)
+
+
def urlencode(items: typing.List[typing.Tuple[str, str]]) -> str:
"""
We can use a much simpler version of the stdlib urlencode here because
- https://github.com/encode/httpx/issues/2721
- https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
"""
- return "&".join([quote(k, safe="") + "=" + quote(v, safe="") for k, v in items])
+ return "&".join(
+ [
+ percent_encoded(k, safe="") + "=" + percent_encoded(v, safe="")
+ for k, v in items
+ ]
+ )
# Tests for percent encoding across path, query, and fragment...
-def test_path_percent_encoding():
- # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS
- url = httpx.URL("https://example.com/!$&'()*+,;= abc ABC 123 :/[]@")
- assert url.raw_path == b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@"
- assert url.path == "/!$&'()*+,;= abc ABC 123 :/[]@"
- assert url.query == b""
- assert url.fragment == ""
-
-
-def test_query_percent_encoding():
- # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS
- url = httpx.URL("https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@" + "?")
- assert url.raw_path == b"/?!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?"
- assert url.path == "/"
- assert url.query == b"!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?"
- assert url.fragment == ""
-
-
-def test_fragment_percent_encoding():
- # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS
- url = httpx.URL("https://example.com/#!$&'()*+,;= abc ABC 123 :/[]@" + "?#")
- assert url.raw_path == b"/"
- assert url.path == "/"
- assert url.query == b""
- assert url.fragment == "!$&'()*+,;= abc ABC 123 :/[]@?#"
+@pytest.mark.parametrize(
+ "url,raw_path,path,query,fragment",
+ [
+ # URL with unescaped chars in path.
+ (
+ "https://example.com/!$&'()*+,;= abc ABC 123 :/[]@",
+ b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@",
+ "/!$&'()*+,;= abc ABC 123 :/[]@",
+ b"",
+ "",
+ ),
+ # URL with escaped chars in path.
+ (
+ "https://example.com/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@",
+ b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@",
+ "/!$&'()*+,;= abc ABC 123 :/[]@",
+ b"",
+ "",
+ ),
+ # URL with mix of unescaped and escaped chars in path.
+ # WARNING: This has the incorrect behaviour, adding the test as an interim step.
+ (
+ "https://example.com/ %61%62%63",
+ b"/%20%61%62%63",
+ "/ abc",
+ b"",
+ "",
+ ),
+ # URL with unescaped chars in query.
+ (
+ "https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@?",
+ b"/?!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?",
+ "/",
+ b"!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?",
+ "",
+ ),
+ # URL with escaped chars in query.
+ (
+ "https://example.com/?!$&%27()*+,;=%20abc%20ABC%20123%20:%2F[]@?",
+ b"/?!$&%27()*+,;=%20abc%20ABC%20123%20:%2F[]@?",
+ "/",
+ b"!$&%27()*+,;=%20abc%20ABC%20123%20:%2F[]@?",
+ "",
+ ),
+ # URL with mix of unescaped and escaped chars in query.
+ (
+ "https://example.com/?%20%97%98%99",
+ b"/?%20%97%98%99",
+ "/",
+ b"%20%97%98%99",
+ "",
+ ),
+ # URL encoding characters in fragment.
+ (
+ "https://example.com/#!$&'()*+,;= abc ABC 123 :/[]@?#",
+ b"/",
+ "/",
+ b"",
+ "!$&'()*+,;= abc ABC 123 :/[]@?#",
+ ),
+ ],
+)
+def test_path_query_fragment(url, raw_path, path, query, fragment):
+ url = httpx.URL(url)
+ assert url.raw_path == raw_path
+ assert url.path == path
+ assert url.query == query
+ assert url.fragment == fragment
def test_url_query_encoding():
"""
- URL query parameters should use '%20' to encoding spaces,
+ URL query parameters should use '%20' for encoding spaces,
and should treat '/' as a safe character. This behaviour differs
across clients, but we're matching browser behaviour here.
and https://github.com/encode/httpx/discussions/2460
"""
url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
- assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
+ assert url.raw_path == b"/?a=b%20c&d=e/f"
url = httpx.URL("https://www.example.com/", params={"a": "b c", "d": "e/f"})
assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
-def test_url_with_url_encoded_path():
- url = httpx.URL("https://www.example.com/path%20to%20somewhere")
- assert url.path == "/path to somewhere"
- assert url.query == b""
- assert url.raw_path == b"/path%20to%20somewhere"
-
-
def test_url_params():
url = httpx.URL("https://example.org:123/path/to/somewhere", params={"a": "123"})
assert str(url) == "https://example.org:123/path/to/somewhere?a=123"