*/
AP_DECLARE(int) ap_unescape_url_keep2f(char *url, int decode_slashes);
+#define AP_UNESCAPE_URL_KEEP_UNRESERVED (1u << 0)
+#define AP_UNESCAPE_URL_FORBID_SLASHES (1u << 1)
+#define AP_UNESCAPE_URL_KEEP_SLASHES (1u << 2)
+
+/**
+ * Unescape a URL, with options
+ * @param url The url to unescape
+ * @param flags Bitmask of AP_UNESCAPE_URL_* flags
+ * @return 0 on success, non-zero otherwise
+ */
+AP_DECLARE(int) ap_unescape_url_ex(char *url, unsigned int flags);
+
/**
* Unescape an application/x-www-form-urlencoded string
* @param query The query to unescape
#define AP_NORMALIZE_NOT_ABOVE_ROOT (1u << 1)
#define AP_NORMALIZE_DECODE_UNRESERVED (1u << 2)
#define AP_NORMALIZE_MERGE_SLASHES (1u << 3)
-#define AP_NORMALIZE_DROP_PARAMETERS (1u << 4)
+#define AP_NORMALIZE_DROP_PARAMETERS (0) /* deprecated */
/**
* Remove all ////, /./ and /xx/../ substrings from a path, and more
#define T_ESCAPE_URLENCODED (0x40)
#define T_HTTP_CTRLS (0x80)
#define T_VCHAR_OBSTEXT (0x100)
+#define T_URI_UNRESERVED (0x200)
int main(int argc, char *argv[])
{
"#define T_ESCAPE_URLENCODED (%u)\n"
"#define T_HTTP_CTRLS (%u)\n"
"#define T_VCHAR_OBSTEXT (%u)\n"
+ "#define T_URI_UNRESERVED (%u)\n"
"\n"
"static const unsigned short test_char_table[256] = {",
T_ESCAPE_SHELL_CMD,
T_ESCAPE_FORENSIC,
T_ESCAPE_URLENCODED,
T_HTTP_CTRLS,
- T_VCHAR_OBSTEXT);
+ T_VCHAR_OBSTEXT,
+ T_URI_UNRESERVED
+ );
for (c = 0; c < 256; ++c) {
flags = 0;
flags |= T_ESCAPE_FORENSIC;
}
+ /* Characters in the RFC 3986 "unreserved" set.
+ * https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 */
+ if (c && (apr_isalnum(c) || strchr("-._~", c))) {
+ flags |= T_URI_UNRESERVED;
+ }
+
printf("0x%03x%c", flags, (c < 255) ? ',' : ' ');
}
/* Ignore URL unescaping for translated URIs already */
if (access_status != DONE && r->parsed_uri.path) {
core_dir_config *d = ap_get_core_module_config(r->per_dir_config);
-
- if (d->allow_encoded_slashes) {
- access_status = ap_unescape_url_keep2f(r->parsed_uri.path,
- d->decode_encoded_slashes);
+ /* Unreserved chars were already decoded by ap_normalize_path() */
+ unsigned int unescape_flags = AP_UNESCAPE_URL_KEEP_UNRESERVED;
+ if (!d->allow_encoded_slashes) {
+ unescape_flags |= AP_UNESCAPE_URL_FORBID_SLASHES;
}
- else {
- access_status = ap_unescape_url(r->parsed_uri.path);
+ else if (!d->decode_encoded_slashes) {
+ unescape_flags |= AP_UNESCAPE_URL_KEEP_SLASHES;
}
+ access_status = ap_unescape_url_ex(r->parsed_uri.path, unescape_flags);
if (access_status) {
if (access_status == HTTP_NOT_FOUND) {
if (! d->allow_encoded_slashes) {
* be decoded to their corresponding unreserved characters by
* URI normalizers.
*/
- if (decode_unreserved
- && path[l] == '%' && apr_isxdigit(path[l + 1])
- && apr_isxdigit(path[l + 2])) {
- const char c = x2c(&path[l + 1]);
- if (apr_isalnum(c) || (c && strchr("-._~", c))) {
- /* Replace last char and fall through as the current
- * read position */
- l += 2;
- path[l] = c;
+ if (decode_unreserved && path[l] == '%') {
+ if (apr_isxdigit(path[l + 1]) && apr_isxdigit(path[l + 2])) {
+ const char c = x2c(&path[l + 1]);
+ if (TEST_CHAR(c, T_URI_UNRESERVED)) {
+ /* Replace last char and fall through as the current
+ * read position */
+ l += 2;
+ path[l] = c;
+ }
+ }
+ else {
+ /* Invalid encoding */
+ ret = 0;
}
- }
-
- if ((flags & AP_NORMALIZE_DROP_PARAMETERS) && path[l] == ';') {
- do {
- l++;
- } while (!IS_SLASH_OR_NUL(path[l]));
- continue;
}
if (w == 0 || IS_SLASH(path[w - 1])) {
* decoding %00 or a forbidden character returns HTTP_NOT_FOUND
*/
-static int unescape_url(char *url, const char *forbid, const char *reserved)
+static int unescape_url(char *url, const char *forbid, const char *reserved,
+ unsigned int flags)
{
+ const int keep_slashes = (flags & AP_UNESCAPE_URL_KEEP_SLASHES) != 0,
+ forbid_slashes = (flags & AP_UNESCAPE_URL_FORBID_SLASHES) != 0,
+ keep_unreserved = (flags & AP_UNESCAPE_URL_KEEP_UNRESERVED) != 0;
int badesc, badpath;
char *x, *y;
char decoded;
decoded = x2c(y + 1);
if ((decoded == '\0')
+ || (forbid_slashes && IS_SLASH(decoded))
|| (forbid && ap_strchr_c(forbid, decoded))) {
badpath = 1;
*x = decoded;
y += 2;
}
- else if (reserved && ap_strchr_c(reserved, decoded)) {
+ else if ((keep_unreserved && TEST_CHAR(decoded,
+ T_URI_UNRESERVED))
+ || (keep_slashes && IS_SLASH(decoded))
+ || (reserved && ap_strchr_c(reserved, decoded))) {
*x++ = *y++;
*x++ = *y++;
*x = *y;
AP_DECLARE(int) ap_unescape_url(char *url)
{
/* Traditional */
- return unescape_url(url, SLASHES, NULL);
+ return unescape_url(url, SLASHES, NULL, 0);
}
AP_DECLARE(int) ap_unescape_url_keep2f(char *url, int decode_slashes)
{
/* AllowEncodedSlashes (corrected) */
if (decode_slashes) {
/* no chars reserved */
- return unescape_url(url, NULL, NULL);
+ return unescape_url(url, NULL, NULL, 0);
} else {
/* reserve (do not decode) encoded slashes */
- return unescape_url(url, NULL, SLASHES);
+ return unescape_url(url, NULL, SLASHES, 0);
}
}
+AP_DECLARE(int) ap_unescape_url_ex(char *url, unsigned int flags)
+{
+ return unescape_url(url, NULL, NULL, flags);
+}
+
#ifdef NEW_APIS
/* IFDEF these out until they've been thought through.
* Just a germ of an API extension for now
/* leave RFC1738 reserved characters intact, * so proxied URLs
* don't get mangled. Where does that leave encoded '&' ?
*/
- return unescape_url(url, NULL, "/;?");
+ return unescape_url(url, NULL, "/;?", 0);
}
AP_DECLARE(int) ap_unescape_url_reserved(char *url, const char *reserved)
{
}
/* unescape everything else */
- return unescape_url(query, NULL, NULL);
+ return unescape_url(query, NULL, NULL, 0);
}
AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname,
AP_DECLARE(int) ap_unescape_all(char *url)
{
- return unescape_url(url, NULL, NULL);
+ return unescape_url(url, NULL, NULL, 0);
}
/* c2x takes an unsigned, and expects the caller has guaranteed that