From: Mike Stepanek (mstepane) Date: Fri, 6 Mar 2020 13:18:32 +0000 (+0000) Subject: Merge pull request #2018 in SNORT/snort3 from ~DERAMADA/snort3:hi_http_uri to master X-Git-Tag: 3.0.0-269~13 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e8abd8e4601a3d3e5b620adde9bb5f8f54eecc21;p=thirdparty%2Fsnort3.git Merge pull request #2018 in SNORT/snort3 from ~DERAMADA/snort3:hi_http_uri to master Squashed commit of the following: commit d06d71e6983cde3acc12c1955425235e771258c8 Author: deramada Date: Wed Feb 19 10:01:15 2020 -0500 http_inspect: change http_uri to only include path and query for absolute and absolute path uris --- diff --git a/doc/http_inspect.txt b/doc/http_inspect.txt index 755966d62..4fb7e587a 100644 --- a/doc/http_inspect.txt +++ b/doc/http_inspect.txt @@ -355,6 +355,14 @@ is the scheme, "www.samplehost.com" is the host, "287" is the port, "/basic/example/of/path" is the path, "with-query" is the query, and "and-fragment" is the fragment. +http_uri represents the normalized uri, normalization of components depends +on uri type. If the uri is of type absolute (contains all six components) or +absolute path (contains path, query and fragment) then the path and query +components are normalized. In these cases, http_uri represents the normalized +path and query (/path?query). If the uri is of type authority (host and port), +the host is normalized and http_uri represents the normalized host with the port +number. In all other cases http_uri is the same as http_raw_uri. + Note: this section uses informal language to explain some things. Nothing here is intended to conflict with the technical language of the HTTP RFCs and the implementation follows the RFCs. diff --git a/src/service_inspectors/http_inspect/http_uri.cc b/src/service_inspectors/http_inspect/http_uri.cc index 2b13b6f7b..64c0c2e78 100644 --- a/src/service_inspectors/http_inspect/http_uri.cc +++ b/src/service_inspectors/http_inspect/http_uri.cc @@ -191,126 +191,131 @@ void HttpUri::normalize() // Almost all HTTP requests are honest and rarely need expensive normalization processing. We // do a quick scan for red flags and only perform normalization if something comes up. // Otherwise we set the normalized fields to point at the raw values. - if ((host.length() > 0) && - UriNormalizer::need_norm(host, false, uri_param, infractions, events)) - *infractions += INF_URI_NEED_NORM_HOST; - if ((path.length() > 0) && - UriNormalizer::need_norm(path, true, uri_param, infractions, events)) - *infractions += INF_URI_NEED_NORM_PATH; - if ((query.length() > 0) && - UriNormalizer::need_norm(query, false, uri_param, infractions, events)) - *infractions += INF_URI_NEED_NORM_QUERY; - if ((fragment.length() > 0) && - UriNormalizer::need_norm(fragment, false, uri_param, infractions, events)) - *infractions += INF_URI_NEED_NORM_FRAGMENT; - - if (!((*infractions & INF_URI_NEED_NORM_PATH) || (*infractions & INF_URI_NEED_NORM_HOST) || - (*infractions & INF_URI_NEED_NORM_QUERY) || (*infractions & INF_URI_NEED_NORM_FRAGMENT))) + switch (uri_type) { - // This URI is OK, normalization not required - host_norm.set(host); - path_norm.set(path); - query_norm.set(query); - fragment_norm.set(fragment); - classic_norm.set(uri); - check_oversize_dir(path_norm); - return; - } + case URI_ASTERISK: + case URI__PROBLEMATIC: + classic_norm.set(uri); + return; + case URI_AUTHORITY: + { + if ((host.length() > 0) && + UriNormalizer::need_norm(host, false, uri_param, infractions, events)) + { + const int total_length = uri.length(); - HttpModule::increment_peg_counts(PEG_URI_NORM); + uint8_t* const new_buf = new uint8_t[total_length]; + uint8_t* current = new_buf; - // Create a new buffer containing the normalized URI by normalizing each individual piece. - const uint32_t total_length = uri.length() + UriNormalizer::URI_NORM_EXPANSION; - uint8_t* const new_buf = new uint8_t[total_length]; - uint8_t* current = new_buf; - if (scheme.length() >= 0) - { - memcpy(current, scheme.start(), scheme.length()); - current += scheme.length(); - memcpy(current, "://", 3); - current += 3; - } - if (host.length() > 0) - { - if (*infractions & INF_URI_NEED_NORM_HOST) - UriNormalizer::normalize(host, host_norm, false, current, uri_param, infractions, - events); - else - { - // The host component is not changing but other parts of the URI are being normalized. - // We need a copy of the raw host to provide that part of the normalized URI buffer we - // are assembling. But the normalized component will refer to the original raw buffer - // on the chance that the data retention policy in use might keep it longer. - memcpy(current, host.start(), host.length()); - host_norm.set(host); - } - current += host_norm.length(); - } - if (port.length() >= 0) - { - memcpy(current, ":", 1); - current += 1; - memcpy(current, port.start(), port.length()); - current += port.length(); - } - if (path.length() > 0) - { - if (*infractions & INF_URI_NEED_NORM_PATH) - UriNormalizer::normalize(path, path_norm, true, current, uri_param, infractions, - events); - else - { - memcpy(current, path.start(), path.length()); - path_norm.set(path); - } - current += path_norm.length(); - } - if (query.length() >= 0) - { - memcpy(current, "?", 1); - current += 1; - if (*infractions & INF_URI_NEED_NORM_QUERY) - UriNormalizer::normalize(query, query_norm, false, current, uri_param, infractions, - events); - else - { - memcpy(current, query.start(), query.length()); - query_norm.set(query); + *infractions += INF_URI_NEED_NORM_HOST; + + HttpModule::increment_peg_counts(PEG_URI_NORM); + + UriNormalizer::normalize(host, host_norm, false, current, uri_param, infractions, + events); + + current += host_norm.length(); + + if (port.length() >= 0) + { + memcpy(current, ":", 1); + current += 1; + memcpy(current, port.start(), port.length()); + current += port.length(); + } + + assert(current - new_buf <= total_length); + + classic_norm.set(current - new_buf, new_buf, true); + return; + } + + classic_norm.set(uri); + return; } - current += query_norm.length(); - } - if (fragment.length() >= 0) - { - memcpy(current, "#", 1); - current += 1; - if (*infractions & INF_URI_NEED_NORM_FRAGMENT) - UriNormalizer::normalize(fragment, fragment_norm, false, current, uri_param, - infractions, events); - else + case URI_ABSPATH: + case URI_ABSOLUTE: { - memcpy(current, fragment.start(), fragment.length()); - fragment_norm.set(fragment); - } - current += fragment_norm.length(); - } - assert(current - new_buf <= total_length); + if ((path.length() > 0) && + UriNormalizer::need_norm(path, true, uri_param, infractions, events)) + *infractions += INF_URI_NEED_NORM_PATH; + if ((query.length() > 0) && + UriNormalizer::need_norm(query, false, uri_param, infractions, events)) + *infractions += INF_URI_NEED_NORM_QUERY; - if ((*infractions & INF_URI_MULTISLASH) || (*infractions & INF_URI_SLASH_DOT) || - (*infractions & INF_URI_SLASH_DOT_DOT)) - { - HttpModule::increment_peg_counts(PEG_URI_PATH); - } + if (!((*infractions & INF_URI_NEED_NORM_PATH) || (*infractions & INF_URI_NEED_NORM_QUERY))) + { + // This URI is OK, normalization not required + path_norm.set(path); + query_norm.set(query); + + const int path_len = (path.length() > 0) ? path.length() : 0; + // query_len = length of query + 1 (? char) + const int query_len = (query.length() >= 0) ? query.length() + 1 : 0; - if ((*infractions & INF_URI_U_ENCODE) || (*infractions & INF_URI_UNKNOWN_PERCENT) || - (*infractions & INF_URI_PERCENT_UNRESERVED) || (*infractions & INF_URI_PERCENT_UTF8_2B) || - (*infractions & INF_URI_PERCENT_UTF8_3B) || (*infractions & INF_URI_DOUBLE_DECODE)) - { - HttpModule::increment_peg_counts(PEG_URI_CODING); - } + classic_norm.set(path_len + query_len, abs_path.start()); + + check_oversize_dir(path_norm); + return; + } + + HttpModule::increment_peg_counts(PEG_URI_NORM); + + // Create a new buffer containing the normalized URI by normalizing each individual piece. + int total_length = path.length() ? path.length() + UriNormalizer::URI_NORM_EXPANSION : 0; + total_length += (query.length() >= 0) ? query.length() + 1 : 0; + uint8_t* const new_buf = new uint8_t[total_length]; + uint8_t* current = new_buf; + + if (path.length() > 0) + { + if (*infractions & INF_URI_NEED_NORM_PATH) + UriNormalizer::normalize(path, path_norm, true, current, uri_param, infractions, + events); + else + { + memcpy(current, path.start(), path.length()); + path_norm.set(path); + } + current += path_norm.length(); + } + if (query.length() >= 0) + { + memcpy(current, "?", 1); + current += 1; + if (*infractions & INF_URI_NEED_NORM_QUERY) + UriNormalizer::normalize(query, query_norm, false, current, uri_param, infractions, + events); + else + { + memcpy(current, query.start(), query.length()); + query_norm.set(query); + } + current += query_norm.length(); + } + + assert(current - new_buf <= total_length); + + if ((*infractions & INF_URI_MULTISLASH) || (*infractions & INF_URI_SLASH_DOT) || + (*infractions & INF_URI_SLASH_DOT_DOT)) + { + HttpModule::increment_peg_counts(PEG_URI_PATH); + } - check_oversize_dir(path_norm); + if ((*infractions & INF_URI_U_ENCODE) || (*infractions & INF_URI_UNKNOWN_PERCENT) || + (*infractions & INF_URI_PERCENT_UNRESERVED) || (*infractions & INF_URI_PERCENT_UTF8_2B) || + (*infractions & INF_URI_PERCENT_UTF8_3B) || (*infractions & INF_URI_DOUBLE_DECODE)) + { + HttpModule::increment_peg_counts(PEG_URI_CODING); + } + + check_oversize_dir(path_norm); - classic_norm.set(current - new_buf, new_buf, true); + classic_norm.set(current - new_buf, new_buf, true); + } + default: + return; + } } size_t HttpUri::get_file_proc_hash() @@ -325,3 +330,45 @@ size_t HttpUri::get_file_proc_hash() return abs_path_hash; } + +const Field& HttpUri::get_norm_host() +{ + if (host_norm.length() != STAT_NOT_COMPUTE) + return host_norm; + + if (host.length() > 0 and + UriNormalizer::need_norm(host, false, uri_param, infractions, events)) + { + uint8_t *buf = new uint8_t[host.length()]; + + *infractions += INF_URI_NEED_NORM_HOST; + + UriNormalizer::normalize(host, host_norm, false, buf, uri_param, + infractions, events, true); + } + else + host_norm.set(host); + + return host_norm; +} + +const Field& HttpUri::get_norm_fragment() +{ + if (fragment_norm.length() != STAT_NOT_COMPUTE) + return fragment_norm; + + if ((fragment.length() > 0) and + UriNormalizer::need_norm(fragment, false, uri_param, infractions, events)) + { + uint8_t *buf = new uint8_t[fragment.length()]; + + *infractions += INF_URI_NEED_NORM_FRAGMENT; + + UriNormalizer::normalize(fragment, fragment_norm, false, buf, uri_param, + infractions, events, true); + } + else + fragment_norm.set(fragment); + + return fragment_norm; +} diff --git a/src/service_inspectors/http_inspect/http_uri.h b/src/service_inspectors/http_inspect/http_uri.h index 30876918c..7152f4b5b 100644 --- a/src/service_inspectors/http_inspect/http_uri.h +++ b/src/service_inspectors/http_inspect/http_uri.h @@ -50,10 +50,10 @@ public: const Field& get_query() { return query; } const Field& get_fragment() { return fragment; } - const Field& get_norm_host() { return host_norm; } + const Field& get_norm_host(); const Field& get_norm_path() { return path_norm; } const Field& get_norm_query() { return query_norm; } - const Field& get_norm_fragment() { return fragment_norm; } + const Field& get_norm_fragment(); const Field& get_norm_classic() { return classic_norm; } size_t get_file_proc_hash(); diff --git a/src/service_inspectors/http_inspect/http_uri_norm.cc b/src/service_inspectors/http_inspect/http_uri_norm.cc index af1c61178..8611e6dfc 100644 --- a/src/service_inspectors/http_inspect/http_uri_norm.cc +++ b/src/service_inspectors/http_inspect/http_uri_norm.cc @@ -32,7 +32,8 @@ using namespace HttpEnums; using namespace snort; void UriNormalizer::normalize(const Field& input, Field& result, bool do_path, uint8_t* buffer, - const HttpParaList::UriParam& uri_param, HttpInfractions* infractions, HttpEventGen* events) + const HttpParaList::UriParam& uri_param, HttpInfractions* infractions, HttpEventGen* events, + bool own_the_buffer) { // Normalize percent encodings and similar escape sequences int32_t data_length = norm_char_clean(input, buffer, uri_param, infractions, events); @@ -47,7 +48,7 @@ void UriNormalizer::normalize(const Field& input, Field& result, bool do_path, u data_length = norm_path_clean(buffer, data_length, infractions, events); } - result.set(data_length, buffer); + result.set(data_length, buffer, own_the_buffer); } bool UriNormalizer::need_norm(const Field& uri_component, bool do_path, diff --git a/src/service_inspectors/http_inspect/http_uri_norm.h b/src/service_inspectors/http_inspect/http_uri_norm.h index 721141492..ec82da66e 100644 --- a/src/service_inspectors/http_inspect/http_uri_norm.h +++ b/src/service_inspectors/http_inspect/http_uri_norm.h @@ -38,7 +38,7 @@ public: HttpEventGen* events); static void normalize(const Field& input, Field& result, bool do_path, uint8_t* buffer, const HttpParaList::UriParam& uri_param, HttpInfractions* infractions, - HttpEventGen* events); + HttpEventGen* events, bool own_the_buffer = false); static bool classic_need_norm(const Field& uri_component, bool do_path, const HttpParaList::UriParam& uri_param); static void classic_normalize(const Field& input, Field& result, bool do_path,