From: Yann Ylavic Date: Thu, 7 Oct 2021 12:00:20 +0000 (+0000) Subject: core: Add ap_unescape_url_ex() for better decoding control, and deprecate X-Git-Tag: 2.5.0-alpha2-ci-test-only~765 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=48b5dfd6968cb076537b605d368d5fd889ebae86;p=thirdparty%2Fapache%2Fhttpd.git core: Add ap_unescape_url_ex() for better decoding control, and deprecate unused AP_NORMALIZE_DROP_PARAMETERS flag. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1893971 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/changes-entries/ap_unescape_url_ex.txt b/changes-entries/ap_unescape_url_ex.txt new file mode 100644 index 00000000000..3982a2941d3 --- /dev/null +++ b/changes-entries/ap_unescape_url_ex.txt @@ -0,0 +1,3 @@ + *) core: Add ap_unescape_url_ex() for better decoding control, and deprecate + unused AP_NORMALIZE_DROP_PARAMETERS flag. + [Yann Ylavic, Ruediger Pluem, Stefan Eissing] \ No newline at end of file diff --git a/include/ap_mmn.h b/include/ap_mmn.h index 496bc4b24fd..8dbd8f7f313 100644 --- a/include/ap_mmn.h +++ b/include/ap_mmn.h @@ -693,6 +693,9 @@ * adding ap_proxy_tunnel_conn_bytes_{in,out}(). * 20210924.1 (2.5.1-dev) Add ap_proxy_fill_error_brigade() * 20210926.0 (2.5.1-dev) Add dav_get_liveprop_element(), remove DAV_PROP_ELEMENT. + * 20210926.1 (2.5.1-dev) Add ap_unescape_url_ex() and deprecate + * AP_NORMALIZE_DROP_PARAMETERS + * */ #define MODULE_MAGIC_COOKIE 0x41503235UL /* "AP25" */ diff --git a/include/httpd.h b/include/httpd.h index 5a4a61979db..e5375d77c22 100644 --- a/include/httpd.h +++ b/include/httpd.h @@ -1802,6 +1802,18 @@ AP_DECLARE(int) ap_unescape_url(char *url); */ AP_DECLARE(int) ap_unescape_url_keep2f(char *url, int decode_slashes); +#define AP_UNESCAPE_URL_KEEP_UNRESERVED (1u << 0) +#define AP_UNESCAPE_URL_FORBID_SLASHES (1u << 1) +#define AP_UNESCAPE_URL_KEEP_SLASHES (1u << 2) + +/** + * Unescape a URL, with options + * @param url The url to unescape + * @param flags Bitmask of AP_UNESCAPE_URL_* flags + * @return 0 on success, non-zero otherwise + */ +AP_DECLARE(int) ap_unescape_url_ex(char *url, unsigned int flags); + /** * Unescape an application/x-www-form-urlencoded string * @param query The query to unescape @@ -1831,7 +1843,7 @@ AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path) #define AP_NORMALIZE_NOT_ABOVE_ROOT (1u << 1) #define AP_NORMALIZE_DECODE_UNRESERVED (1u << 2) #define AP_NORMALIZE_MERGE_SLASHES (1u << 3) -#define AP_NORMALIZE_DROP_PARAMETERS (1u << 4) +#define AP_NORMALIZE_DROP_PARAMETERS (0) /* deprecated */ /** * Remove all ////, /./ and /xx/../ substrings from a path, and more diff --git a/server/gen_test_char.c b/server/gen_test_char.c index 6c6566ec3c0..248216b03b0 100644 --- a/server/gen_test_char.c +++ b/server/gen_test_char.c @@ -54,6 +54,7 @@ #define T_ESCAPE_URLENCODED (0x40) #define T_HTTP_CTRLS (0x80) #define T_VCHAR_OBSTEXT (0x100) +#define T_URI_UNRESERVED (0x200) int main(int argc, char *argv[]) { @@ -71,6 +72,7 @@ int main(int argc, char *argv[]) "#define T_ESCAPE_URLENCODED (%u)\n" "#define T_HTTP_CTRLS (%u)\n" "#define T_VCHAR_OBSTEXT (%u)\n" + "#define T_URI_UNRESERVED (%u)\n" "\n" "static const unsigned short test_char_table[256] = {", T_ESCAPE_SHELL_CMD, @@ -81,7 +83,9 @@ int main(int argc, char *argv[]) T_ESCAPE_FORENSIC, T_ESCAPE_URLENCODED, T_HTTP_CTRLS, - T_VCHAR_OBSTEXT); + T_VCHAR_OBSTEXT, + T_URI_UNRESERVED + ); for (c = 0; c < 256; ++c) { flags = 0; @@ -164,6 +168,12 @@ int main(int argc, char *argv[]) flags |= T_ESCAPE_FORENSIC; } + /* Characters in the RFC 3986 "unreserved" set. + * https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 */ + if (c && (apr_isalnum(c) || strchr("-._~", c))) { + flags |= T_URI_UNRESERVED; + } + printf("0x%03x%c", flags, (c < 255) ? ',' : ' '); } diff --git a/server/request.c b/server/request.c index df0ea6758f6..cd2908da5d1 100644 --- a/server/request.c +++ b/server/request.c @@ -243,14 +243,15 @@ AP_DECLARE(int) ap_process_request_internal(request_rec *r) /* Ignore URL unescaping for translated URIs already */ if (access_status != DONE && r->parsed_uri.path) { core_dir_config *d = ap_get_core_module_config(r->per_dir_config); - - if (d->allow_encoded_slashes) { - access_status = ap_unescape_url_keep2f(r->parsed_uri.path, - d->decode_encoded_slashes); + /* Unreserved chars were already decoded by ap_normalize_path() */ + unsigned int unescape_flags = AP_UNESCAPE_URL_KEEP_UNRESERVED; + if (!d->allow_encoded_slashes) { + unescape_flags |= AP_UNESCAPE_URL_FORBID_SLASHES; } - else { - access_status = ap_unescape_url(r->parsed_uri.path); + else if (!d->decode_encoded_slashes) { + unescape_flags |= AP_UNESCAPE_URL_KEEP_SLASHES; } + access_status = ap_unescape_url_ex(r->parsed_uri.path, unescape_flags); if (access_status) { if (access_status == HTTP_NOT_FOUND) { if (! d->allow_encoded_slashes) { diff --git a/server/util.c b/server/util.c index 87412eb97c1..6d0b67a3ed8 100644 --- a/server/util.c +++ b/server/util.c @@ -531,23 +531,20 @@ AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags) * be decoded to their corresponding unreserved characters by * URI normalizers. */ - if (decode_unreserved - && path[l] == '%' && apr_isxdigit(path[l + 1]) - && apr_isxdigit(path[l + 2])) { - const char c = x2c(&path[l + 1]); - if (apr_isalnum(c) || (c && strchr("-._~", c))) { - /* Replace last char and fall through as the current - * read position */ - l += 2; - path[l] = c; + if (decode_unreserved && path[l] == '%') { + if (apr_isxdigit(path[l + 1]) && apr_isxdigit(path[l + 2])) { + const char c = x2c(&path[l + 1]); + if (TEST_CHAR(c, T_URI_UNRESERVED)) { + /* Replace last char and fall through as the current + * read position */ + l += 2; + path[l] = c; + } + } + else { + /* Invalid encoding */ + ret = 0; } - } - - if ((flags & AP_NORMALIZE_DROP_PARAMETERS) && path[l] == ';') { - do { - l++; - } while (!IS_SLASH_OR_NUL(path[l])); - continue; } if (w == 0 || IS_SLASH(path[w - 1])) { @@ -1890,8 +1887,12 @@ static char x2c(const char *what) * decoding %00 or a forbidden character returns HTTP_NOT_FOUND */ -static int unescape_url(char *url, const char *forbid, const char *reserved) +static int unescape_url(char *url, const char *forbid, const char *reserved, + unsigned int flags) { + const int keep_slashes = (flags & AP_UNESCAPE_URL_KEEP_SLASHES) != 0, + forbid_slashes = (flags & AP_UNESCAPE_URL_FORBID_SLASHES) != 0, + keep_unreserved = (flags & AP_UNESCAPE_URL_KEEP_UNRESERVED) != 0; int badesc, badpath; char *x, *y; @@ -1920,12 +1921,16 @@ static int unescape_url(char *url, const char *forbid, const char *reserved) char decoded; decoded = x2c(y + 1); if ((decoded == '\0') + || (forbid_slashes && IS_SLASH(decoded)) || (forbid && ap_strchr_c(forbid, decoded))) { badpath = 1; *x = decoded; y += 2; } - else if (reserved && ap_strchr_c(reserved, decoded)) { + else if ((keep_unreserved && TEST_CHAR(decoded, + T_URI_UNRESERVED)) + || (keep_slashes && IS_SLASH(decoded)) + || (reserved && ap_strchr_c(reserved, decoded))) { *x++ = *y++; *x++ = *y++; *x = *y; @@ -1951,19 +1956,24 @@ static int unescape_url(char *url, const char *forbid, const char *reserved) AP_DECLARE(int) ap_unescape_url(char *url) { /* Traditional */ - return unescape_url(url, SLASHES, NULL); + return unescape_url(url, SLASHES, NULL, 0); } AP_DECLARE(int) ap_unescape_url_keep2f(char *url, int decode_slashes) { /* AllowEncodedSlashes (corrected) */ if (decode_slashes) { /* no chars reserved */ - return unescape_url(url, NULL, NULL); + return unescape_url(url, NULL, NULL, 0); } else { /* reserve (do not decode) encoded slashes */ - return unescape_url(url, NULL, SLASHES); + return unescape_url(url, NULL, SLASHES, 0); } } +AP_DECLARE(int) ap_unescape_url_ex(char *url, unsigned int flags) +{ + return unescape_url(url, NULL, NULL, flags); +} + #ifdef NEW_APIS /* IFDEF these out until they've been thought through. * Just a germ of an API extension for now @@ -1973,7 +1983,7 @@ AP_DECLARE(int) ap_unescape_url_proxy(char *url) /* leave RFC1738 reserved characters intact, * so proxied URLs * don't get mangled. Where does that leave encoded '&' ? */ - return unescape_url(url, NULL, "/;?"); + return unescape_url(url, NULL, "/;?", 0); } AP_DECLARE(int) ap_unescape_url_reserved(char *url, const char *reserved) { @@ -1995,7 +2005,7 @@ AP_DECLARE(int) ap_unescape_urlencoded(char *query) } /* unescape everything else */ - return unescape_url(query, NULL, NULL); + return unescape_url(query, NULL, NULL, 0); } AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname, @@ -2011,7 +2021,7 @@ AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname, AP_DECLARE(int) ap_unescape_all(char *url) { - return unescape_url(url, NULL, NULL); + return unescape_url(url, NULL, NULL, 0); } /* c2x takes an unsigned, and expects the caller has guaranteed that