From: Victor Julien Date: Wed, 20 Feb 2013 15:13:14 +0000 (+0100) Subject: Add separate libhtp query string normalization function and configuration toggles... X-Git-Tag: suricata-1.4.1~53 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d41c762689a08e6814dc93e8bfebeceab97175c3;p=thirdparty%2Fsuricata.git Add separate libhtp query string normalization function and configuration toggles for it. --- diff --git a/libhtp/htp/htp.h b/libhtp/htp/htp.h index f44b1a6da4..c0a988f7d5 100644 --- a/libhtp/htp/htp.h +++ b/libhtp/htp/htp.h @@ -323,18 +323,22 @@ struct htp_cfg_t { /** Should we treat backslash characters as path segment separators? */ int path_backslash_separators; + int query_backslash_separators; /** Should we treat paths as case insensitive? */ int path_case_insensitive; + int query_case_insensitive; /** Should we compress multiple path segment separators into one? */ int path_compress_separators; + int query_compress_separators; /** This parameter is used to predict how a server will react when control * characters are present in a request path, but does not affect path * normalization. */ int path_control_char_handling; + int query_control_char_handling; /** Should the parser convert UTF-8 into a single-byte stream, using * best-fit? @@ -343,23 +347,28 @@ struct htp_cfg_t { /** Should we URL-decode encoded path segment separators? */ int path_decode_separators; + int query_decode_separators; /** Should we decode %u-encoded characters? */ int path_decode_u_encoding; + int query_decode_u_encoding; /** How do handle invalid encodings: URL_DECODER_LEAVE_PERCENT, * URL_DECODER_REMOVE_PERCENT or URL_DECODER_DECODE_INVALID. */ int path_invalid_encoding_handling; + int query_invalid_encoding_handling; /** Controls how invalid UTF-8 characters are handled. */ int path_invalid_utf8_handling; /** Controls how encoded NUL bytes are handled. */ int path_nul_encoded_handling; + int query_nul_encoded_handling; /** Controls how raw NUL bytes are handled. */ int path_nul_raw_handling; + int query_nul_raw_handling; /** The replacement character used when there is no best-fit mapping. */ unsigned char path_replacement_char; @@ -1001,16 +1010,25 @@ void htp_config_register_log(htp_cfg_t *cfg, int (*callback_fn)(htp_log_t *)); void htp_config_set_bestfit_map(htp_cfg_t *cfg, unsigned char *map); void htp_config_set_path_backslash_separators(htp_cfg_t *cfg, int backslash_separators); +void htp_config_set_query_backslash_separators(htp_cfg_t *cfg, int backslash_separators); void htp_config_set_path_case_insensitive(htp_cfg_t *cfg, int path_case_insensitive); +void htp_config_set_query_case_insensitive(htp_cfg_t *cfg, int case_insensitive); void htp_config_set_path_compress_separators(htp_cfg_t *cfg, int compress_separators); +void htp_config_set_query_compress_separators(htp_cfg_t *cfg, int compress_separators); void htp_config_set_path_control_char_handling(htp_cfg_t *cfg, int control_char_handling); +void htp_config_set_query_control_char_handling(htp_cfg_t *cfg, int control_char_handling); void htp_config_set_path_convert_utf8(htp_cfg_t *cfg, int convert_utf8); void htp_config_set_path_decode_separators(htp_cfg_t *cfg, int backslash_separators); +void htp_config_set_query_decode_separators(htp_cfg_t *cfg, int decode_separators); void htp_config_set_path_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding); +void htp_config_set_query_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding); void htp_config_set_path_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_encoding_handling); +void htp_config_set_query_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_encoding_handling); void htp_config_set_path_invalid_utf8_handling(htp_cfg_t *cfg, int invalid_utf8_handling); void htp_config_set_path_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_handling); +void htp_config_set_query_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_handling); void htp_config_set_path_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling); +void htp_config_set_query_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling); void htp_config_set_path_replacement_char(htp_cfg_t *cfg, int replacement_char); void htp_config_set_path_unicode_mapping(htp_cfg_t *cfg, int unicode_mapping); @@ -1116,6 +1134,7 @@ bstr *htp_normalize_hostname_inplace(bstr *input); void htp_replace_hostname(htp_connp_t *connp, htp_uri_t *parsed_uri, bstr *hostname); int htp_decode_path_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path); +int htp_decode_query_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path); void htp_uriencoding_normalize_inplace(bstr *s); diff --git a/libhtp/htp/htp_config.c b/libhtp/htp/htp_config.c index bd2c3fe802..bceba1f075 100644 --- a/libhtp/htp/htp_config.c +++ b/libhtp/htp/htp_config.c @@ -462,6 +462,18 @@ void htp_config_set_path_backslash_separators(htp_cfg_t *cfg, int backslash_sepa cfg->path_backslash_separators = backslash_separators; } +/** + * Configures whether backslash characters are treated as query segment separators. They + * are not on Unix systems, but are on Windows systems. If this setting is enabled, a query + * such as "/one\two/three" will be converted to "/one/two/three". + * + * @param cfg + * @param backslash_separators + */ +void htp_config_set_query_backslash_separators(htp_cfg_t *cfg, int backslash_separators) { + cfg->query_backslash_separators = backslash_separators; +} + /** * Configures filesystem sensitivity. This setting affects * how URL paths are normalized. There are no path modifications by default, but @@ -474,6 +486,18 @@ void htp_config_set_path_case_insensitive(htp_cfg_t *cfg, int case_insensitive) cfg->path_case_insensitive = case_insensitive; } +/** + * Configures filesystem sensitivity. This setting affects + * how URL querys are normalized. There are no query modifications by default, but + * on a case-insensitive systems query will be converted to lowercase. + * + * @param cfg + * @param case_insensitive + */ +void htp_config_set_query_case_insensitive(htp_cfg_t *cfg, int case_insensitive) { + cfg->query_case_insensitive = case_insensitive; +} + /** * Configures whether consecutive path segment separators will be compressed. When * enabled, a path such as "/one//two" will be normalized to "/one/two". The backslash_separators @@ -488,6 +512,20 @@ void htp_config_set_path_compress_separators(htp_cfg_t *cfg, int compress_separa cfg->path_compress_separators = compress_separators; } +/** + * Configures whether consecutive query segment separators will be compressed. When + * enabled, a query such as "/one//two" will be normalized to "/one/two". The backslash_separators + * and decode_separators parameters are used before compression takes place. For example, if + * backshasl_deparators and decode_separators are both enabled, the query "/one\\/two\/%5cthree/%2f//four" + * will be converted to "/one/two/three/four". + * + * @param cfg + * @param compress_separators + */ +void htp_config_set_query_compress_separators(htp_cfg_t *cfg, int compress_separators) { + cfg->query_compress_separators = compress_separators; +} + /** * This parameter is used to predict how a server will react when control * characters are present in a request path, but does not affect path @@ -502,6 +540,20 @@ void htp_config_set_path_control_char_handling(htp_cfg_t *cfg, int control_char_ cfg->path_control_char_handling = control_char_handling; } +/** + * This parameter is used to predict how a server will react when control + * characters are present in a request query, but does not affect query + * normalization. + * + * @param cfg + * @param control_char_handling Use NONE with servers that ignore control characters in + * request query, and STATUS_400 with servers that respond + * with 400. + */ +void htp_config_set_query_control_char_handling(htp_cfg_t *cfg, int control_char_handling) { + cfg->query_control_char_handling = control_char_handling; +} + /** * Controls the UTF-8 treatment of request paths. One option is to only validate * path as UTF-8. In this case, the UTF-8 flags will be raised as appropriate, and @@ -528,6 +580,19 @@ void htp_config_set_path_decode_separators(htp_cfg_t *cfg, int decode_separators cfg->path_decode_separators = decode_separators; } +/** + * Configures whether encoded query segment separators will be decoded. Apache does + * not do this, but IIS does. If enabled, a query such as "/one%2ftwo" will be normalized + * to "/one/two". If the backslash_separators option is also enabled, encoded backslash + * characters will be converted too (and subseqently normalized to forward slashes). + * + * @param cfg + * @param decode_separators + */ +void htp_config_set_query_decode_separators(htp_cfg_t *cfg, int decode_separators) { + cfg->query_decode_separators = decode_separators; +} + /** * Configures whether %u-encoded sequences in path will be decoded. Such sequences * will be treated as invalid URL encoding if decoding is not desireable. @@ -539,6 +604,17 @@ void htp_config_set_path_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding cfg->path_decode_u_encoding = decode_u_encoding; } +/** + * Configures whether %u-encoded sequences in query will be decoded. Such sequences + * will be treated as invalid URL encoding if decoding is not desireable. + * + * @param cfg + * @param decode_u_encoding + */ +void htp_config_set_query_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding) { + cfg->query_decode_u_encoding = decode_u_encoding; +} + /** * Configures how server reacts to invalid encoding in path. * @@ -551,6 +627,17 @@ void htp_config_set_path_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_e cfg->path_invalid_encoding_handling = invalid_encoding_handling; } +/** + * Configures how server reacts to invalid encoding in query. + * + * @param cfg + * @param invalid_encoding_handling The available options are: URL_DECODER_PRESERVE_PERCENT, + * URL_DECODER_REMOVE_PERCENT, URL_DECODER_DECODE_INVALID + * and URL_DECODER_STATUS_400. + */ +void htp_config_set_query_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_encoding_handling) { + cfg->query_invalid_encoding_handling = invalid_encoding_handling; +} /** @@ -577,6 +664,18 @@ void htp_config_set_path_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_ha cfg->path_nul_encoded_handling = nul_encoded_handling; } +/** + * Configures how server reacts to encoded NUL bytes. Some servers will terminate + * query at NUL, while some will respond with 400 or 404. When the termination option + * is not used, the NUL byte will remain in the query. + * + * @param cfg + * @param nul_encoded_handling Possible values: TERMINATE, STATUS_400, STATUS_404 + */ +void htp_config_set_query_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_handling) { + cfg->query_nul_encoded_handling = nul_encoded_handling; +} + /** * Configures how server reacts to raw NUL bytes. Some servers will terminate * path at NUL, while some will respond with 400 or 404. When the termination option @@ -589,6 +688,18 @@ void htp_config_set_path_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling) cfg->path_nul_raw_handling = nul_raw_handling; } +/** + * Configures how server reacts to raw NUL bytes. Some servers will terminate + * query at NUL, while some will respond with 400 or 404. When the termination option + * is not used, the NUL byte will remain in the query. + * + * @param cfg + * @param nul_raw_handling Possible values: TERMINATE, STATUS_400, STATUS_404 + */ +void htp_config_set_query_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling) { + cfg->query_nul_raw_handling = nul_raw_handling; +} + /** * Sets the replacement characater that will be used to in the lossy best-fit * mapping from Unicode characters into single-byte streams. The question mark @@ -651,6 +762,10 @@ int htp_config_set_server_personality(htp_cfg_t *cfg, int personality) { cfg->path_backslash_separators = YES; cfg->path_decode_separators = YES; cfg->path_compress_separators = YES; + +// cfg->query_backslash_separators = YES; + cfg->query_decode_separators = YES; +// cfg->query_compress_separators = YES; break; case HTP_SERVER_IDS: @@ -666,6 +781,12 @@ int htp_config_set_server_personality(htp_cfg_t *cfg, int personality) { cfg->path_decode_u_encoding = YES; cfg->path_unicode_mapping = BESTFIT; cfg->path_convert_utf8 = YES; + +// cfg->query_backslash_separators = YES; + cfg->query_case_insensitive = YES; + cfg->query_decode_separators = YES; +// cfg->query_compress_separators = YES; + cfg->query_decode_u_encoding = YES; break; case HTP_SERVER_APACHE : @@ -680,6 +801,12 @@ int htp_config_set_server_personality(htp_cfg_t *cfg, int personality) { cfg->path_compress_separators = YES; cfg->path_invalid_encoding_handling = URL_DECODER_STATUS_400; cfg->path_control_char_handling = NONE; + +// cfg->query_backslash_separators = NO; + cfg->query_decode_separators = NO; +// cfg->query_compress_separators = YES; + cfg->query_invalid_encoding_handling = URL_DECODER_STATUS_400; + cfg->query_control_char_handling = NONE; break; case HTP_SERVER_IIS_5_1: @@ -695,6 +822,14 @@ int htp_config_set_server_personality(htp_cfg_t *cfg, int personality) { cfg->path_decode_u_encoding = YES; cfg->path_unicode_mapping = BESTFIT; cfg->path_control_char_handling = NONE; + +// cfg->query_backslash_separators = YES; + cfg->query_decode_separators = NO; +// cfg->query_compress_separators = YES; + cfg->query_invalid_encoding_handling = URL_DECODER_PRESERVE_PERCENT; + cfg->query_decode_u_encoding = YES; +// cfg->query_unicode_mapping = BESTFIT; + cfg->query_control_char_handling = NONE; break; case HTP_SERVER_IIS_6_0: @@ -710,6 +845,14 @@ int htp_config_set_server_personality(htp_cfg_t *cfg, int personality) { cfg->path_decode_u_encoding = YES; cfg->path_unicode_mapping = STATUS_400; cfg->path_control_char_handling = STATUS_400; + +// cfg->query_backslash_separators = YES; + cfg->query_decode_separators = YES; +// cfg->query_compress_separators = YES; + cfg->query_invalid_encoding_handling = URL_DECODER_STATUS_400; + cfg->query_decode_u_encoding = YES; +// cfg->query_unicode_mapping = STATUS_400; + cfg->query_control_char_handling = STATUS_400; break; case HTP_SERVER_IIS_7_0: @@ -724,6 +867,12 @@ int htp_config_set_server_personality(htp_cfg_t *cfg, int personality) { cfg->path_compress_separators = YES; cfg->path_invalid_encoding_handling = URL_DECODER_STATUS_400; cfg->path_control_char_handling = STATUS_400; + +// cfg->query_backslash_separators = YES; + cfg->query_decode_separators = YES; +// cfg->query_compress_separators = YES; + cfg->query_invalid_encoding_handling = URL_DECODER_STATUS_400; + cfg->query_control_char_handling = STATUS_400; break; default: diff --git a/libhtp/htp/htp_util.c b/libhtp/htp/htp_util.c index 8fac26b818..9363001a45 100644 --- a/libhtp/htp/htp_util.c +++ b/libhtp/htp/htp_util.c @@ -918,6 +918,277 @@ int decode_u_encoding(htp_cfg_t *cfg, htp_tx_t *tx, unsigned char *data) { return r; } +/** + * Decode a request query according to the settings in the + * provided configuration structure. + * + * @param cfg + * @param tx + * @param query + */ +int htp_decode_query_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *query) { + if (query == NULL) + return -1; + + unsigned char *data = (unsigned char *) bstr_ptr(query); + if (data == NULL) { + return -1; + } + size_t len = bstr_len(query); + + // TODO I don't like this function. It's too complex. + + size_t rpos = 0; + size_t wpos = 0; + int previous_was_separator = 0; + + while (rpos < len) { + int c = data[rpos]; + + // Decode encoded characters + if (c == '%') { + if (rpos + 2 < len) { + int handled = 0; + + if (cfg->query_decode_u_encoding) { + // Check for the %u encoding + if ((data[rpos + 1] == 'u') || (data[rpos + 1] == 'U')) { + handled = 1; + + if (cfg->query_decode_u_encoding == STATUS_400) { + tx->response_status_expected_number = 400; + } + + if (rpos + 5 < len) { + if (isxdigit(data[rpos + 2]) && (isxdigit(data[rpos + 3])) + && isxdigit(data[rpos + 4]) && (isxdigit(data[rpos + 5]))) { + // Decode a valid %u encoding + c = decode_u_encoding(cfg, tx, &data[rpos + 2]); + rpos += 6; + + if (c == 0) { + tx->flags |= HTP_PATH_ENCODED_NUL; + + if (cfg->query_nul_encoded_handling == STATUS_400) { + tx->response_status_expected_number = 400; + } else if (cfg->query_nul_encoded_handling == STATUS_404) { + tx->response_status_expected_number = 404; + } + } + } else { + // Invalid %u encoding + tx->flags |= HTP_PATH_INVALID_ENCODING; + + switch (cfg->query_invalid_encoding_handling) { + case URL_DECODER_REMOVE_PERCENT: + // Do not place anything in output; eat + // the percent character + rpos++; + continue; + break; + case URL_DECODER_PRESERVE_PERCENT: + // Leave the percent character in output + rpos++; + break; + case URL_DECODER_DECODE_INVALID: + // Decode invalid %u encoding + c = decode_u_encoding(cfg, tx, &data[rpos + 2]); + rpos += 6; + break; + case URL_DECODER_STATUS_400: + // Set expected status to 400 + tx->response_status_expected_number = 400; + + // Decode invalid %u encoding + c = decode_u_encoding(cfg, tx, &data[rpos + 2]); + rpos += 6; + break; + break; + default: + // Unknown setting + return -1; + break; + } + } + } else { + // Invalid %u encoding (not enough data) + tx->flags |= HTP_PATH_INVALID_ENCODING; + + if (cfg->query_invalid_encoding_handling == URL_DECODER_REMOVE_PERCENT) { + // Remove the percent character from output + rpos++; + continue; + } else { + rpos++; + } + } + } + } + + // Handle standard URL encoding + if (!handled) { + if ((isxdigit(data[rpos + 1])) && (isxdigit(data[rpos + 2]))) { + c = x2c(&data[rpos + 1]); + + if (c == 0) { + tx->flags |= HTP_PATH_ENCODED_NUL; + + switch (cfg->query_nul_encoded_handling) { + case TERMINATE: + bstr_len_adjust(query, wpos); + return 1; + break; + case STATUS_400: + tx->response_status_expected_number = 400; + break; + case STATUS_404: + tx->response_status_expected_number = 404; + break; + } + } + + if ((c == '/') || ((cfg->query_backslash_separators) && (c == '\\'))) { + tx->flags |= HTP_PATH_ENCODED_SEPARATOR; + + switch (cfg->query_decode_separators) { + case STATUS_404: + tx->response_status_expected_number = 404; + // Fall-through + case NO: + // Leave encoded + c = '%'; + rpos++; + break; + case YES: + // Decode + rpos += 3; + break; + } + } else { + // Decode + rpos += 3; + } + } else { + // Invalid encoding + tx->flags |= HTP_PATH_INVALID_ENCODING; + + switch (cfg->query_invalid_encoding_handling) { + case URL_DECODER_REMOVE_PERCENT: + // Do not place anything in output; eat + // the percent character + rpos++; + continue; + break; + case URL_DECODER_PRESERVE_PERCENT: + // Leave the percent character in output + rpos++; + break; + case URL_DECODER_DECODE_INVALID: + // Decode + c = x2c(&data[rpos + 1]); + rpos += 3; + // Note: What if an invalid encoding decodes into a path + // separator? This is theoretical at the moment, because + // the only platform we know doesn't convert separators is + // Apache, who will also respond with 400 if invalid encoding + // is encountered. Thus no check for a separator here. + break; + case URL_DECODER_STATUS_400: + // Backend will reject request with 400, which means + // that it does not matter what we do. + tx->response_status_expected_number = 400; + + // Preserve the percent character + rpos++; + break; + default: + // Unknown setting + return -1; + break; + } + } + } + } else { + // Invalid encoding (not enough data) + tx->flags |= HTP_PATH_INVALID_ENCODING; + + if (cfg->query_invalid_encoding_handling == URL_DECODER_REMOVE_PERCENT) { + // Do not place the percent character in output + rpos++; + continue; + } else { + rpos++; + } + } + } else { + // One non-encoded character + + // Is it a NUL byte? + if (c == 0) { + switch (cfg->query_nul_raw_handling) { + case TERMINATE: + // Terminate path with a raw NUL byte + bstr_len_adjust(query, wpos); + return 1; + break; + case STATUS_400: + // Leave the NUL byte, but set the expected status + tx->response_status_expected_number = 400; + break; + case STATUS_404: + // Leave the NUL byte, but set the expected status + tx->response_status_expected_number = 404; + break; + } + } + + rpos++; + } + + // Place the character into output + + // Check for control characters + if (c < 0x20) { + if (cfg->query_control_char_handling == STATUS_400) { + tx->response_status_expected_number = 400; + } + } + + // Convert backslashes to forward slashes, if necessary + if ((c == '\\') && (cfg->query_backslash_separators)) { + c = '/'; + } + + // Lowercase characters, if necessary + if (cfg->query_case_insensitive) { + c = tolower(c); + } + + // If we're compressing separators then we need + // to track if the previous character was a separator + if (cfg->query_compress_separators) { + if (c == '/') { + if (!previous_was_separator) { + data[wpos++] = c; + previous_was_separator = 1; + } else { + // Do nothing; we don't want + // another separator in output + } + } else { + data[wpos++] = c; + previous_was_separator = 0; + } + } else { + data[wpos++] = c; + } + } + + bstr_len_adjust(query, wpos); + + return 1; +} + /** * Decode a request path according to the settings in the * provided configuration structure.