/** Should we treat backslash characters as path segment separators? */
int path_backslash_separators;
+ int query_backslash_separators;
/** Should we treat paths as case insensitive? */
int path_case_insensitive;
+ int query_case_insensitive;
/** Should we compress multiple path segment separators into one? */
int path_compress_separators;
+ int query_compress_separators;
/** This parameter is used to predict how a server will react when control
* characters are present in a request path, but does not affect path
* normalization.
*/
int path_control_char_handling;
+ int query_control_char_handling;
/** Should the parser convert UTF-8 into a single-byte stream, using
* best-fit?
/** Should we URL-decode encoded path segment separators? */
int path_decode_separators;
+ int query_decode_separators;
/** Should we decode %u-encoded characters? */
int path_decode_u_encoding;
+ int query_decode_u_encoding;
/** How do handle invalid encodings: URL_DECODER_LEAVE_PERCENT,
* URL_DECODER_REMOVE_PERCENT or URL_DECODER_DECODE_INVALID.
*/
int path_invalid_encoding_handling;
+ int query_invalid_encoding_handling;
/** Controls how invalid UTF-8 characters are handled. */
int path_invalid_utf8_handling;
/** Controls how encoded NUL bytes are handled. */
int path_nul_encoded_handling;
+ int query_nul_encoded_handling;
/** Controls how raw NUL bytes are handled. */
int path_nul_raw_handling;
+ int query_nul_raw_handling;
/** The replacement character used when there is no best-fit mapping. */
unsigned char path_replacement_char;
void htp_config_set_bestfit_map(htp_cfg_t *cfg, unsigned char *map);
void htp_config_set_path_backslash_separators(htp_cfg_t *cfg, int backslash_separators);
+void htp_config_set_query_backslash_separators(htp_cfg_t *cfg, int backslash_separators);
void htp_config_set_path_case_insensitive(htp_cfg_t *cfg, int path_case_insensitive);
+void htp_config_set_query_case_insensitive(htp_cfg_t *cfg, int case_insensitive);
void htp_config_set_path_compress_separators(htp_cfg_t *cfg, int compress_separators);
+void htp_config_set_query_compress_separators(htp_cfg_t *cfg, int compress_separators);
void htp_config_set_path_control_char_handling(htp_cfg_t *cfg, int control_char_handling);
+void htp_config_set_query_control_char_handling(htp_cfg_t *cfg, int control_char_handling);
void htp_config_set_path_convert_utf8(htp_cfg_t *cfg, int convert_utf8);
void htp_config_set_path_decode_separators(htp_cfg_t *cfg, int backslash_separators);
+void htp_config_set_query_decode_separators(htp_cfg_t *cfg, int decode_separators);
void htp_config_set_path_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding);
+void htp_config_set_query_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding);
void htp_config_set_path_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_encoding_handling);
+void htp_config_set_query_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_encoding_handling);
void htp_config_set_path_invalid_utf8_handling(htp_cfg_t *cfg, int invalid_utf8_handling);
void htp_config_set_path_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_handling);
+void htp_config_set_query_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_handling);
void htp_config_set_path_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling);
+void htp_config_set_query_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling);
void htp_config_set_path_replacement_char(htp_cfg_t *cfg, int replacement_char);
void htp_config_set_path_unicode_mapping(htp_cfg_t *cfg, int unicode_mapping);
void htp_replace_hostname(htp_connp_t *connp, htp_uri_t *parsed_uri, bstr *hostname);
int htp_decode_path_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path);
+int htp_decode_query_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path);
void htp_uriencoding_normalize_inplace(bstr *s);
cfg->path_backslash_separators = backslash_separators;
}
+/**
+ * Configures whether backslash characters are treated as query segment separators. They
+ * are not on Unix systems, but are on Windows systems. If this setting is enabled, a query
+ * such as "/one\two/three" will be converted to "/one/two/three".
+ *
+ * @param cfg
+ * @param backslash_separators
+ */
+void htp_config_set_query_backslash_separators(htp_cfg_t *cfg, int backslash_separators) {
+ cfg->query_backslash_separators = backslash_separators;
+}
+
/**
* Configures filesystem sensitivity. This setting affects
* how URL paths are normalized. There are no path modifications by default, but
cfg->path_case_insensitive = case_insensitive;
}
+/**
+ * Configures filesystem sensitivity. This setting affects
+ * how URL querys are normalized. There are no query modifications by default, but
+ * on a case-insensitive systems query will be converted to lowercase.
+ *
+ * @param cfg
+ * @param case_insensitive
+ */
+void htp_config_set_query_case_insensitive(htp_cfg_t *cfg, int case_insensitive) {
+ cfg->query_case_insensitive = case_insensitive;
+}
+
/**
* Configures whether consecutive path segment separators will be compressed. When
* enabled, a path such as "/one//two" will be normalized to "/one/two". The backslash_separators
cfg->path_compress_separators = compress_separators;
}
+/**
+ * Configures whether consecutive query segment separators will be compressed. When
+ * enabled, a query such as "/one//two" will be normalized to "/one/two". The backslash_separators
+ * and decode_separators parameters are used before compression takes place. For example, if
+ * backshasl_deparators and decode_separators are both enabled, the query "/one\\/two\/%5cthree/%2f//four"
+ * will be converted to "/one/two/three/four".
+ *
+ * @param cfg
+ * @param compress_separators
+ */
+void htp_config_set_query_compress_separators(htp_cfg_t *cfg, int compress_separators) {
+ cfg->query_compress_separators = compress_separators;
+}
+
/**
* This parameter is used to predict how a server will react when control
* characters are present in a request path, but does not affect path
cfg->path_control_char_handling = control_char_handling;
}
+/**
+ * This parameter is used to predict how a server will react when control
+ * characters are present in a request query, but does not affect query
+ * normalization.
+ *
+ * @param cfg
+ * @param control_char_handling Use NONE with servers that ignore control characters in
+ * request query, and STATUS_400 with servers that respond
+ * with 400.
+ */
+void htp_config_set_query_control_char_handling(htp_cfg_t *cfg, int control_char_handling) {
+ cfg->query_control_char_handling = control_char_handling;
+}
+
/**
* Controls the UTF-8 treatment of request paths. One option is to only validate
* path as UTF-8. In this case, the UTF-8 flags will be raised as appropriate, and
cfg->path_decode_separators = decode_separators;
}
+/**
+ * Configures whether encoded query segment separators will be decoded. Apache does
+ * not do this, but IIS does. If enabled, a query such as "/one%2ftwo" will be normalized
+ * to "/one/two". If the backslash_separators option is also enabled, encoded backslash
+ * characters will be converted too (and subseqently normalized to forward slashes).
+ *
+ * @param cfg
+ * @param decode_separators
+ */
+void htp_config_set_query_decode_separators(htp_cfg_t *cfg, int decode_separators) {
+ cfg->query_decode_separators = decode_separators;
+}
+
/**
* Configures whether %u-encoded sequences in path will be decoded. Such sequences
* will be treated as invalid URL encoding if decoding is not desireable.
cfg->path_decode_u_encoding = decode_u_encoding;
}
+/**
+ * Configures whether %u-encoded sequences in query will be decoded. Such sequences
+ * will be treated as invalid URL encoding if decoding is not desireable.
+ *
+ * @param cfg
+ * @param decode_u_encoding
+ */
+void htp_config_set_query_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding) {
+ cfg->query_decode_u_encoding = decode_u_encoding;
+}
+
/**
* Configures how server reacts to invalid encoding in path.
*
cfg->path_invalid_encoding_handling = invalid_encoding_handling;
}
+/**
+ * Configures how server reacts to invalid encoding in query.
+ *
+ * @param cfg
+ * @param invalid_encoding_handling The available options are: URL_DECODER_PRESERVE_PERCENT,
+ * URL_DECODER_REMOVE_PERCENT, URL_DECODER_DECODE_INVALID
+ * and URL_DECODER_STATUS_400.
+ */
+void htp_config_set_query_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_encoding_handling) {
+ cfg->query_invalid_encoding_handling = invalid_encoding_handling;
+}
/**
cfg->path_nul_encoded_handling = nul_encoded_handling;
}
+/**
+ * Configures how server reacts to encoded NUL bytes. Some servers will terminate
+ * query at NUL, while some will respond with 400 or 404. When the termination option
+ * is not used, the NUL byte will remain in the query.
+ *
+ * @param cfg
+ * @param nul_encoded_handling Possible values: TERMINATE, STATUS_400, STATUS_404
+ */
+void htp_config_set_query_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_handling) {
+ cfg->query_nul_encoded_handling = nul_encoded_handling;
+}
+
/**
* Configures how server reacts to raw NUL bytes. Some servers will terminate
* path at NUL, while some will respond with 400 or 404. When the termination option
cfg->path_nul_raw_handling = nul_raw_handling;
}
+/**
+ * Configures how server reacts to raw NUL bytes. Some servers will terminate
+ * query at NUL, while some will respond with 400 or 404. When the termination option
+ * is not used, the NUL byte will remain in the query.
+ *
+ * @param cfg
+ * @param nul_raw_handling Possible values: TERMINATE, STATUS_400, STATUS_404
+ */
+void htp_config_set_query_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling) {
+ cfg->query_nul_raw_handling = nul_raw_handling;
+}
+
/**
* Sets the replacement characater that will be used to in the lossy best-fit
* mapping from Unicode characters into single-byte streams. The question mark
cfg->path_backslash_separators = YES;
cfg->path_decode_separators = YES;
cfg->path_compress_separators = YES;
+
+// cfg->query_backslash_separators = YES;
+ cfg->query_decode_separators = YES;
+// cfg->query_compress_separators = YES;
break;
case HTP_SERVER_IDS:
cfg->path_decode_u_encoding = YES;
cfg->path_unicode_mapping = BESTFIT;
cfg->path_convert_utf8 = YES;
+
+// cfg->query_backslash_separators = YES;
+ cfg->query_case_insensitive = YES;
+ cfg->query_decode_separators = YES;
+// cfg->query_compress_separators = YES;
+ cfg->query_decode_u_encoding = YES;
break;
case HTP_SERVER_APACHE :
cfg->path_compress_separators = YES;
cfg->path_invalid_encoding_handling = URL_DECODER_STATUS_400;
cfg->path_control_char_handling = NONE;
+
+// cfg->query_backslash_separators = NO;
+ cfg->query_decode_separators = NO;
+// cfg->query_compress_separators = YES;
+ cfg->query_invalid_encoding_handling = URL_DECODER_STATUS_400;
+ cfg->query_control_char_handling = NONE;
break;
case HTP_SERVER_IIS_5_1:
cfg->path_decode_u_encoding = YES;
cfg->path_unicode_mapping = BESTFIT;
cfg->path_control_char_handling = NONE;
+
+// cfg->query_backslash_separators = YES;
+ cfg->query_decode_separators = NO;
+// cfg->query_compress_separators = YES;
+ cfg->query_invalid_encoding_handling = URL_DECODER_PRESERVE_PERCENT;
+ cfg->query_decode_u_encoding = YES;
+// cfg->query_unicode_mapping = BESTFIT;
+ cfg->query_control_char_handling = NONE;
break;
case HTP_SERVER_IIS_6_0:
cfg->path_decode_u_encoding = YES;
cfg->path_unicode_mapping = STATUS_400;
cfg->path_control_char_handling = STATUS_400;
+
+// cfg->query_backslash_separators = YES;
+ cfg->query_decode_separators = YES;
+// cfg->query_compress_separators = YES;
+ cfg->query_invalid_encoding_handling = URL_DECODER_STATUS_400;
+ cfg->query_decode_u_encoding = YES;
+// cfg->query_unicode_mapping = STATUS_400;
+ cfg->query_control_char_handling = STATUS_400;
break;
case HTP_SERVER_IIS_7_0:
cfg->path_compress_separators = YES;
cfg->path_invalid_encoding_handling = URL_DECODER_STATUS_400;
cfg->path_control_char_handling = STATUS_400;
+
+// cfg->query_backslash_separators = YES;
+ cfg->query_decode_separators = YES;
+// cfg->query_compress_separators = YES;
+ cfg->query_invalid_encoding_handling = URL_DECODER_STATUS_400;
+ cfg->query_control_char_handling = STATUS_400;
break;
default:
return r;
}
+/**
+ * Decode a request query according to the settings in the
+ * provided configuration structure.
+ *
+ * @param cfg
+ * @param tx
+ * @param query
+ */
+int htp_decode_query_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *query) {
+ if (query == NULL)
+ return -1;
+
+ unsigned char *data = (unsigned char *) bstr_ptr(query);
+ if (data == NULL) {
+ return -1;
+ }
+ size_t len = bstr_len(query);
+
+ // TODO I don't like this function. It's too complex.
+
+ size_t rpos = 0;
+ size_t wpos = 0;
+ int previous_was_separator = 0;
+
+ while (rpos < len) {
+ int c = data[rpos];
+
+ // Decode encoded characters
+ if (c == '%') {
+ if (rpos + 2 < len) {
+ int handled = 0;
+
+ if (cfg->query_decode_u_encoding) {
+ // Check for the %u encoding
+ if ((data[rpos + 1] == 'u') || (data[rpos + 1] == 'U')) {
+ handled = 1;
+
+ if (cfg->query_decode_u_encoding == STATUS_400) {
+ tx->response_status_expected_number = 400;
+ }
+
+ if (rpos + 5 < len) {
+ if (isxdigit(data[rpos + 2]) && (isxdigit(data[rpos + 3]))
+ && isxdigit(data[rpos + 4]) && (isxdigit(data[rpos + 5]))) {
+ // Decode a valid %u encoding
+ c = decode_u_encoding(cfg, tx, &data[rpos + 2]);
+ rpos += 6;
+
+ if (c == 0) {
+ tx->flags |= HTP_PATH_ENCODED_NUL;
+
+ if (cfg->query_nul_encoded_handling == STATUS_400) {
+ tx->response_status_expected_number = 400;
+ } else if (cfg->query_nul_encoded_handling == STATUS_404) {
+ tx->response_status_expected_number = 404;
+ }
+ }
+ } else {
+ // Invalid %u encoding
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ switch (cfg->query_invalid_encoding_handling) {
+ case URL_DECODER_REMOVE_PERCENT:
+ // Do not place anything in output; eat
+ // the percent character
+ rpos++;
+ continue;
+ break;
+ case URL_DECODER_PRESERVE_PERCENT:
+ // Leave the percent character in output
+ rpos++;
+ break;
+ case URL_DECODER_DECODE_INVALID:
+ // Decode invalid %u encoding
+ c = decode_u_encoding(cfg, tx, &data[rpos + 2]);
+ rpos += 6;
+ break;
+ case URL_DECODER_STATUS_400:
+ // Set expected status to 400
+ tx->response_status_expected_number = 400;
+
+ // Decode invalid %u encoding
+ c = decode_u_encoding(cfg, tx, &data[rpos + 2]);
+ rpos += 6;
+ break;
+ break;
+ default:
+ // Unknown setting
+ return -1;
+ break;
+ }
+ }
+ } else {
+ // Invalid %u encoding (not enough data)
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ if (cfg->query_invalid_encoding_handling == URL_DECODER_REMOVE_PERCENT) {
+ // Remove the percent character from output
+ rpos++;
+ continue;
+ } else {
+ rpos++;
+ }
+ }
+ }
+ }
+
+ // Handle standard URL encoding
+ if (!handled) {
+ if ((isxdigit(data[rpos + 1])) && (isxdigit(data[rpos + 2]))) {
+ c = x2c(&data[rpos + 1]);
+
+ if (c == 0) {
+ tx->flags |= HTP_PATH_ENCODED_NUL;
+
+ switch (cfg->query_nul_encoded_handling) {
+ case TERMINATE:
+ bstr_len_adjust(query, wpos);
+ return 1;
+ break;
+ case STATUS_400:
+ tx->response_status_expected_number = 400;
+ break;
+ case STATUS_404:
+ tx->response_status_expected_number = 404;
+ break;
+ }
+ }
+
+ if ((c == '/') || ((cfg->query_backslash_separators) && (c == '\\'))) {
+ tx->flags |= HTP_PATH_ENCODED_SEPARATOR;
+
+ switch (cfg->query_decode_separators) {
+ case STATUS_404:
+ tx->response_status_expected_number = 404;
+ // Fall-through
+ case NO:
+ // Leave encoded
+ c = '%';
+ rpos++;
+ break;
+ case YES:
+ // Decode
+ rpos += 3;
+ break;
+ }
+ } else {
+ // Decode
+ rpos += 3;
+ }
+ } else {
+ // Invalid encoding
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ switch (cfg->query_invalid_encoding_handling) {
+ case URL_DECODER_REMOVE_PERCENT:
+ // Do not place anything in output; eat
+ // the percent character
+ rpos++;
+ continue;
+ break;
+ case URL_DECODER_PRESERVE_PERCENT:
+ // Leave the percent character in output
+ rpos++;
+ break;
+ case URL_DECODER_DECODE_INVALID:
+ // Decode
+ c = x2c(&data[rpos + 1]);
+ rpos += 3;
+ // Note: What if an invalid encoding decodes into a path
+ // separator? This is theoretical at the moment, because
+ // the only platform we know doesn't convert separators is
+ // Apache, who will also respond with 400 if invalid encoding
+ // is encountered. Thus no check for a separator here.
+ break;
+ case URL_DECODER_STATUS_400:
+ // Backend will reject request with 400, which means
+ // that it does not matter what we do.
+ tx->response_status_expected_number = 400;
+
+ // Preserve the percent character
+ rpos++;
+ break;
+ default:
+ // Unknown setting
+ return -1;
+ break;
+ }
+ }
+ }
+ } else {
+ // Invalid encoding (not enough data)
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ if (cfg->query_invalid_encoding_handling == URL_DECODER_REMOVE_PERCENT) {
+ // Do not place the percent character in output
+ rpos++;
+ continue;
+ } else {
+ rpos++;
+ }
+ }
+ } else {
+ // One non-encoded character
+
+ // Is it a NUL byte?
+ if (c == 0) {
+ switch (cfg->query_nul_raw_handling) {
+ case TERMINATE:
+ // Terminate path with a raw NUL byte
+ bstr_len_adjust(query, wpos);
+ return 1;
+ break;
+ case STATUS_400:
+ // Leave the NUL byte, but set the expected status
+ tx->response_status_expected_number = 400;
+ break;
+ case STATUS_404:
+ // Leave the NUL byte, but set the expected status
+ tx->response_status_expected_number = 404;
+ break;
+ }
+ }
+
+ rpos++;
+ }
+
+ // Place the character into output
+
+ // Check for control characters
+ if (c < 0x20) {
+ if (cfg->query_control_char_handling == STATUS_400) {
+ tx->response_status_expected_number = 400;
+ }
+ }
+
+ // Convert backslashes to forward slashes, if necessary
+ if ((c == '\\') && (cfg->query_backslash_separators)) {
+ c = '/';
+ }
+
+ // Lowercase characters, if necessary
+ if (cfg->query_case_insensitive) {
+ c = tolower(c);
+ }
+
+ // If we're compressing separators then we need
+ // to track if the previous character was a separator
+ if (cfg->query_compress_separators) {
+ if (c == '/') {
+ if (!previous_was_separator) {
+ data[wpos++] = c;
+ previous_was_separator = 1;
+ } else {
+ // Do nothing; we don't want
+ // another separator in output
+ }
+ } else {
+ data[wpos++] = c;
+ previous_was_separator = 0;
+ }
+ } else {
+ data[wpos++] = c;
+ }
+ }
+
+ bstr_len_adjust(query, wpos);
+
+ return 1;
+}
+
/**
* Decode a request path according to the settings in the
* provided configuration structure.