From: Tom Christie Date: Tue, 9 May 2023 13:20:12 +0000 (+0100) Subject: Fix for gen-delims escaping behaviour in path/query/fragment (#2701) X-Git-Tag: 0.24.1~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ee432c0d3098375df258b6d5844e7569080b2369;p=thirdparty%2Fhttpx.git Fix for gen-delims escaping behaviour in path/query/fragment (#2701) --- diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 5ee6e582..69ff0b4b 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -253,12 +253,19 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: if has_authority: path = normalize_path(path) - parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") + # The GEN_DELIMS set is... : / ? # [ ] @ + # These do not need to be percent-quoted unless they serve as delimiters for the + # specific component. + + # For 'path' we need to drop ? and # from the GEN_DELIMS set. + parsed_path: str = quote(path, safe=SUB_DELIMS + ":/[]@") + # For 'query' we need to drop '#' from the GEN_DELIMS set. parsed_query: typing.Optional[str] = ( - None if query is None else quote(query, safe=SUB_DELIMS + "/?") + None if query is None else quote(query, safe=SUB_DELIMS + ":/?[]@") ) + # For 'fragment' we can include all of the GEN_DELIMS set. parsed_fragment: typing.Optional[str] = ( - None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + None if fragment is None else quote(fragment, safe=SUB_DELIMS + ":/?#[]@") ) # The parsed ASCII bytestrings are our canonical form. diff --git a/tests/test_urlparse.py b/tests/test_urlparse.py index 575ec84a..0347d312 100644 --- a/tests/test_urlparse.py +++ b/tests/test_urlparse.py @@ -247,3 +247,33 @@ def test_copy_with(): url = url.copy_with(path="/abc") assert str(url) == "http://example.com/abc" + + +# Tests for percent encoding across path, query, and fragement... + + +def test_path_percent_encoding(): + # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS + url = httpx.URL("https://example.com/!$&'()*+,;= abc ABC 123 :/[]@") + assert url.raw_path == b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@" + assert url.path == "/!$&'()*+,;= abc ABC 123 :/[]@" + assert url.query == b"" + assert url.fragment == "" + + +def test_query_percent_encoding(): + # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS + url = httpx.URL("https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@" + "?") + assert url.raw_path == b"/?!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?" + assert url.path == "/" + assert url.query == b"!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?" + assert url.fragment == "" + + +def test_fragment_percent_encoding(): + # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS + url = httpx.URL("https://example.com/#!$&'()*+,;= abc ABC 123 :/[]@" + "?#") + assert url.raw_path == b"/" + assert url.path == "/" + assert url.query == b"" + assert url.fragment == "!$&'()*+,;= abc ABC 123 :/[]@?#"