From: Mike Stepanek (mstepane) Date: Tue, 19 Jan 2021 12:36:05 +0000 (+0000) Subject: Merge pull request #2701 in SNORT/snort3 from ~THOPETER/snort3:nhttp151 to master X-Git-Tag: 3.1.1.0~19 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3b6f877f425c43777316d06c22b7b53407b89b86;p=thirdparty%2Fsnort3.git Merge pull request #2701 in SNORT/snort3 from ~THOPETER/snort3:nhttp151 to master Squashed commit of the following: commit 590e02e4b68adfb5105de46c844b31c8cf3aaac5 Author: Tom Peters Date: Mon Jan 11 18:49:18 2021 -0500 http_inspect: validate and normalize scheme --- diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index 07a62cb57..fbfd24e86 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -141,12 +141,15 @@ particular. Often used with the OPTIONS method. This is not normalized. 2. Authority: any URI used with the CONNECT method. The entire URI is treated as an authority. -3. Absolute Path: a URI that begins with a slash. Consists of only an absolute path with no scheme -or authority present. +3. Origin: a URI that begins with a slash. Consists of only an absolute path with no scheme or +authority present. -4. Absolute URI: a URI which includes a scheme and a host as well as an absolute path. E.g. +4. Absolute: a URI which includes a scheme and a host as well as an absolute path. E.g. http://example.com/path/to/resource. +In addition there are malformed URIs that don't meet any of the four types. These are protocol +errors and will trigger an alert. Because their format is unrecognized they are not normalized. + Step 2: Decompose the URI into its up to six constituent pieces: scheme, host, port, path, query, and fragment. @@ -159,9 +162,10 @@ will only match the query portion of the URI. Step 3: Normalize the individual pieces. -The scheme and port are not normalized. The other four pieces are normalized in a fashion similar -to 2.X with an important exception. Path-related normalizations such as eliminating directory -traversals and squeezing out extra slashes are only done for the path. +The port is not normalized. The scheme is normalized to lower case. The other four pieces are +normalized in a fashion similar to 2.X with an important exception. Path-related normalizations +such as eliminating directory traversals and squeezing out extra slashes are only done for the +path. The normalized URI pieces can be accessed via rules. For example: http_uri: path; content: “foo/bar”. diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index cae4b20c7..b28b03ffe 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -99,7 +99,7 @@ enum MethodId { // URI formats enum UriType { URI__NOT_COMPUTE=-14, URI__PROBLEMATIC=-12, URI_ASTERISK = 2, URI_AUTHORITY, - URI_ABSPATH, URI_ABSOLUTE }; + URI_ORIGIN, URI_ABSOLUTE }; // Body compression types enum CompressId { CMP_NONE=2, CMP_GZIP, CMP_DEFLATE }; @@ -150,7 +150,7 @@ enum Infraction INF_URI_PERCENT_UTF8_3B, INF_URI_PERCENT_UNRESERVED, INF_URI_PERCENT_UTF8_2B, - INF_NOT_USED_1, + INF_URI_NEED_NORM_SCHEME, INF_URI_PERCENT_OTHER, INF_URI_BAD_CHAR, INF_URI_8BIT_CHAR, @@ -381,6 +381,7 @@ enum EventSid extern const int8_t as_hex[256]; extern const bool token_char[256]; +extern const bool scheme_char[256]; extern const bool is_sp_tab[256]; extern const bool is_cr_lf[256]; extern const bool is_sp_tab_lf[256]; diff --git a/src/service_inspectors/http_inspect/http_msg_request.cc b/src/service_inspectors/http_inspect/http_msg_request.cc index 9e5f27893..475050961 100644 --- a/src/service_inspectors/http_inspect/http_msg_request.cc +++ b/src/service_inspectors/http_inspect/http_msg_request.cc @@ -325,6 +325,7 @@ void HttpMsgRequest::print_section(FILE* output) uri->get_uri().print(output, "URI"); fprintf(output, "URI Type: %d\n", uri->get_uri_type()); uri->get_scheme().print(output, "Scheme"); + uri->get_norm_scheme().print(output, "Normalized Scheme"); uri->get_authority().print(output, "Authority"); uri->get_host().print(output, "Host Name"); uri->get_norm_host().print(output, "Normalized Host Name"); diff --git a/src/service_inspectors/http_inspect/http_msg_section.cc b/src/service_inspectors/http_inspect/http_msg_section.cc index cf744ab20..04d2be869 100644 --- a/src/service_inspectors/http_inspect/http_msg_section.cc +++ b/src/service_inspectors/http_inspect/http_msg_section.cc @@ -356,7 +356,7 @@ const Field& HttpMsgSection::get_classic_buffer(Cursor& c, const HttpBufferInfo& switch ((UriComponent)buf.sub_id) { case UC_SCHEME: - return uri->get_scheme(); + return raw ? uri->get_scheme() : uri->get_norm_scheme(); case UC_HOST: return raw ? uri->get_host() : uri->get_norm_host(); case UC_PORT: diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index 16207e0d6..7b95fb0a5 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -500,6 +500,34 @@ const bool HttpEnums::token_char[256] = false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false }; +// Characters allowed in the scheme portion of a URI: 0-9, a-z, A-Z, plus, minus, and period. +const bool HttpEnums::scheme_char[256] = +{ + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, false, false, false, true, false, true, true, false, + true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, + + false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, + + false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, + + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false +}; + const bool HttpEnums::is_sp_tab[256] = { false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, diff --git a/src/service_inspectors/http_inspect/http_uri.cc b/src/service_inspectors/http_inspect/http_uri.cc index 77ee45b1e..22a6ca454 100644 --- a/src/service_inspectors/http_inspect/http_uri.cc +++ b/src/service_inspectors/http_inspect/http_uri.cc @@ -50,10 +50,10 @@ void HttpUri::parse_uri() authority.set(uri); abs_path.set(STAT_NOT_PRESENT); } - // Absolute path is a path but no scheme or authority + // Origin form is a path but no scheme or authority else if (uri.start()[0] == '/') { - uri_type = URI_ABSPATH; + uri_type = URI_ORIGIN; scheme.set(STAT_NOT_PRESENT); authority.set(STAT_NOT_PRESENT); abs_path.set(uri); @@ -61,12 +61,18 @@ void HttpUri::parse_uri() // Absolute URI includes scheme, authority, and path else { + // :/// // Find the "://" and then the "/" int j; int k; - for (j = 0; (j < uri.length()) && (uri.start()[j] != ':'); j++); + for (j = 0; (j < uri.length()) && (uri.start()[j] != ':') && scheme_char[uri.start()[j]]; + j++); for (k = j+3; (k < uri.length()) && (uri.start()[k] != '/'); k++); - if ((k < uri.length()) && (uri.start()[j+1] == '/') && (uri.start()[j+2] == '/')) + + // Verify that 1) we found ://, 2) we found /, 3) scheme begins with a letter, and + // 4) scheme consists of legal characters (RFC 3986 3.1) + if ((k < uri.length()) && (uri.start()[j] == ':') && (uri.start()[j+1] == '/') && + (uri.start()[j+2] == '/') && (uri.start()[0] >= 'A')) { uri_type = URI_ABSOLUTE; scheme.set(j, uri.start()); @@ -233,7 +239,7 @@ void HttpUri::normalize() classic_norm.set(uri); return; } - case URI_ABSPATH: + case URI_ORIGIN: case URI_ABSOLUTE: { if ((path.length() > 0) && @@ -342,6 +348,32 @@ void HttpUri::normalize() } } +const Field& HttpUri::get_norm_scheme() +{ + if (scheme_norm.length() != STAT_NOT_COMPUTE) + return scheme_norm; + + // Normalize upper case to lower case + int k = 0; + for (; (k < scheme.length()) && ((scheme.start()[k] < 'A') || (scheme.start()[k] > 'Z')); k++); + + if (k < scheme.length()) + { + uint8_t* const buf = new uint8_t[scheme.length()]; + *infractions += INF_URI_NEED_NORM_SCHEME; + for (int i=0; i < scheme.length(); i++) + { + buf[i] = scheme.start()[i] + + (((scheme.start()[i] < 'A') || (scheme.start()[i] > 'Z')) ? 0 : 'a' - 'A'); + } + scheme_norm.set(scheme.length(), buf, true); + } + else + scheme_norm.set(scheme); + + return scheme_norm; +} + const Field& HttpUri::get_norm_host() { if (host_norm.length() != STAT_NOT_COMPUTE) diff --git a/src/service_inspectors/http_inspect/http_uri.h b/src/service_inspectors/http_inspect/http_uri.h index 866538865..4e782edd6 100644 --- a/src/service_inspectors/http_inspect/http_uri.h +++ b/src/service_inspectors/http_inspect/http_uri.h @@ -50,6 +50,7 @@ public: const Field& get_query() { return query; } const Field& get_fragment() { return fragment; } + const Field& get_norm_scheme(); const Field& get_norm_host(); const Field& get_norm_path() { return path_norm; } const Field& get_norm_query() { return query_norm; } @@ -68,6 +69,7 @@ private: Field query; Field fragment; + Field scheme_norm; Field host_norm; Field path_norm; Field query_norm;