From: Mike Stepanek (mstepane) <mstepane@cisco.com>
Date: Fri, 6 Mar 2020 13:18:32 +0000 (+0000)
Subject: Merge pull request #2018 in SNORT/snort3 from ~DERAMADA/snort3:hi_http_uri to master
X-Git-Tag: 3.0.0-269~13
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e8abd8e4601a3d3e5b620adde9bb5f8f54eecc21;p=thirdparty%2Fsnort3.git

Merge pull request #2018 in SNORT/snort3 from ~DERAMADA/snort3:hi_http_uri to master

Squashed commit of the following:

commit d06d71e6983cde3acc12c1955425235e771258c8
Author: deramada <deramada@cisco.com>
Date:   Wed Feb 19 10:01:15 2020 -0500

    http_inspect: change http_uri to only include path and query for absolute and absolute path uris
---

diff --git a/doc/http_inspect.txt b/doc/http_inspect.txt
index 755966d62..4fb7e587a 100644
--- a/doc/http_inspect.txt
+++ b/doc/http_inspect.txt
@@ -355,6 +355,14 @@ is the scheme, "www.samplehost.com" is the host, "287" is the port,
 "/basic/example/of/path" is the path, "with-query" is the query, and
 "and-fragment" is the fragment.
 
+http_uri represents the normalized uri, normalization of components depends 
+on uri type. If the uri is of type absolute (contains all six components) or 
+absolute path (contains path, query and fragment) then the path and query 
+components are normalized. In these cases, http_uri represents the normalized
+path and query (/path?query). If the uri is of type authority (host and port),
+the host is normalized and http_uri represents the normalized host with the port
+number. In all other cases http_uri is the same as http_raw_uri.  
+
 Note: this section uses informal language to explain some things. Nothing
 here is intended to conflict with the technical language of the HTTP RFCs
 and the implementation follows the RFCs.
diff --git a/src/service_inspectors/http_inspect/http_uri.cc b/src/service_inspectors/http_inspect/http_uri.cc
index 2b13b6f7b..64c0c2e78 100644
--- a/src/service_inspectors/http_inspect/http_uri.cc
+++ b/src/service_inspectors/http_inspect/http_uri.cc
@@ -191,126 +191,131 @@ void HttpUri::normalize()
     // Almost all HTTP requests are honest and rarely need expensive normalization processing. We
     // do a quick scan for red flags and only perform normalization if something comes up.
     // Otherwise we set the normalized fields to point at the raw values.
-    if ((host.length() > 0) &&
-            UriNormalizer::need_norm(host, false, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_HOST;
-    if ((path.length() > 0) &&
-            UriNormalizer::need_norm(path, true, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_PATH;
-    if ((query.length() > 0) &&
-            UriNormalizer::need_norm(query, false, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_QUERY;
-    if ((fragment.length() > 0) &&
-            UriNormalizer::need_norm(fragment, false, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_FRAGMENT;
-
-    if (!((*infractions & INF_URI_NEED_NORM_PATH)  || (*infractions & INF_URI_NEED_NORM_HOST) ||
-          (*infractions & INF_URI_NEED_NORM_QUERY) || (*infractions & INF_URI_NEED_NORM_FRAGMENT)))
+    switch (uri_type)
     {
-        // This URI is OK, normalization not required
-        host_norm.set(host);
-        path_norm.set(path);
-        query_norm.set(query);
-        fragment_norm.set(fragment);
-        classic_norm.set(uri);
-        check_oversize_dir(path_norm);
-        return;
-    }
+        case URI_ASTERISK:
+        case URI__PROBLEMATIC:
+            classic_norm.set(uri);
+            return;
+        case URI_AUTHORITY:
+        {
+            if ((host.length() > 0) && 
+                    UriNormalizer::need_norm(host, false, uri_param, infractions, events))
+            {
+                const int total_length = uri.length();
 
-    HttpModule::increment_peg_counts(PEG_URI_NORM);
+                uint8_t* const new_buf = new uint8_t[total_length];
+                uint8_t* current = new_buf;
 
-    // Create a new buffer containing the normalized URI by normalizing each individual piece.
-    const uint32_t total_length = uri.length() + UriNormalizer::URI_NORM_EXPANSION;
-    uint8_t* const new_buf = new uint8_t[total_length];
-    uint8_t* current = new_buf;
-    if (scheme.length() >= 0)
-    {
-        memcpy(current, scheme.start(), scheme.length());
-        current += scheme.length();
-        memcpy(current, "://", 3);
-        current += 3;
-    }
-    if (host.length() > 0)
-    {
-        if (*infractions & INF_URI_NEED_NORM_HOST)
-            UriNormalizer::normalize(host, host_norm, false, current, uri_param, infractions,
-                events);
-        else
-        {
-            // The host component is not changing but other parts of the URI are being normalized.
-            // We need a copy of the raw host to provide that part of the normalized URI buffer we
-            // are assembling. But the normalized component will refer to the original raw buffer
-            // on the chance that the data retention policy in use might keep it longer.
-            memcpy(current, host.start(), host.length());
-            host_norm.set(host);
-        }
-        current += host_norm.length();
-    }
-    if (port.length() >= 0)
-    {
-        memcpy(current, ":", 1);
-        current += 1;
-        memcpy(current, port.start(), port.length());
-        current += port.length();
-    }
-    if (path.length() > 0)
-    {
-        if (*infractions & INF_URI_NEED_NORM_PATH)
-            UriNormalizer::normalize(path, path_norm, true, current, uri_param, infractions,
-                events);
-        else
-        {
-            memcpy(current, path.start(), path.length());
-            path_norm.set(path);
-        }
-        current += path_norm.length();
-    }
-    if (query.length() >= 0)
-    {
-        memcpy(current, "?", 1);
-        current += 1;
-        if (*infractions & INF_URI_NEED_NORM_QUERY)
-            UriNormalizer::normalize(query, query_norm, false, current, uri_param, infractions,
-                events);
-        else
-        {
-            memcpy(current, query.start(), query.length());
-            query_norm.set(query);
+                *infractions += INF_URI_NEED_NORM_HOST;
+
+                HttpModule::increment_peg_counts(PEG_URI_NORM);
+
+                UriNormalizer::normalize(host, host_norm, false, current, uri_param, infractions,
+                    events);
+
+                current += host_norm.length();
+
+                if (port.length() >= 0)
+                {
+                    memcpy(current, ":", 1);
+                    current += 1;
+                    memcpy(current, port.start(), port.length());
+                    current += port.length();
+                }
+
+                assert(current - new_buf <= total_length);
+
+                classic_norm.set(current - new_buf, new_buf, true);
+                return;
+            }
+
+            classic_norm.set(uri);
+            return;
         }
-        current += query_norm.length();
-    }
-    if (fragment.length() >= 0)
-    {
-        memcpy(current, "#", 1);
-        current += 1;
-        if (*infractions & INF_URI_NEED_NORM_FRAGMENT)
-            UriNormalizer::normalize(fragment, fragment_norm, false, current, uri_param,
-                infractions, events);
-        else
+        case URI_ABSPATH:
+        case URI_ABSOLUTE:
         {
-            memcpy(current, fragment.start(), fragment.length());
-            fragment_norm.set(fragment);
-        }
-        current += fragment_norm.length();
-    }
-    assert(current - new_buf <= total_length);
+            if ((path.length() > 0) &&
+                    UriNormalizer::need_norm(path, true, uri_param, infractions, events))
+                *infractions += INF_URI_NEED_NORM_PATH;
+            if ((query.length() > 0) &&
+                    UriNormalizer::need_norm(query, false, uri_param, infractions, events))
+                *infractions += INF_URI_NEED_NORM_QUERY;
 
-    if ((*infractions & INF_URI_MULTISLASH) || (*infractions & INF_URI_SLASH_DOT) ||
-        (*infractions & INF_URI_SLASH_DOT_DOT))
-    {
-        HttpModule::increment_peg_counts(PEG_URI_PATH);
-    }
+            if (!((*infractions & INF_URI_NEED_NORM_PATH)  || (*infractions & INF_URI_NEED_NORM_QUERY)))
+            {
+                // This URI is OK, normalization not required
+                path_norm.set(path);
+                query_norm.set(query);
+ 
+                const int path_len = (path.length() > 0) ? path.length() : 0;
+                // query_len = length of query + 1 (? char)
+                const int query_len = (query.length() >= 0) ? query.length() + 1 : 0;
 
-    if ((*infractions & INF_URI_U_ENCODE) || (*infractions & INF_URI_UNKNOWN_PERCENT) ||
-        (*infractions & INF_URI_PERCENT_UNRESERVED) || (*infractions & INF_URI_PERCENT_UTF8_2B) ||
-        (*infractions & INF_URI_PERCENT_UTF8_3B) || (*infractions & INF_URI_DOUBLE_DECODE))
-    {
-        HttpModule::increment_peg_counts(PEG_URI_CODING);
-    }
+                classic_norm.set(path_len + query_len, abs_path.start());
+
+                check_oversize_dir(path_norm);
+                return;
+            }
+
+            HttpModule::increment_peg_counts(PEG_URI_NORM);
+
+            // Create a new buffer containing the normalized URI by normalizing each individual piece.
+            int total_length = path.length() ? path.length() + UriNormalizer::URI_NORM_EXPANSION : 0;
+            total_length += (query.length() >= 0) ? query.length() + 1 : 0;
+            uint8_t* const new_buf = new uint8_t[total_length];
+            uint8_t* current = new_buf;
+
+            if (path.length() > 0)
+            {
+                if (*infractions & INF_URI_NEED_NORM_PATH)
+                    UriNormalizer::normalize(path, path_norm, true, current, uri_param, infractions,
+                        events);
+                else
+                {
+                    memcpy(current, path.start(), path.length());
+                    path_norm.set(path);
+                }
+                current += path_norm.length();
+            }
+            if (query.length() >= 0)
+            {
+                memcpy(current, "?", 1);
+                current += 1;
+                if (*infractions & INF_URI_NEED_NORM_QUERY)
+                    UriNormalizer::normalize(query, query_norm, false, current, uri_param, infractions,
+                        events);
+                else
+                {
+                    memcpy(current, query.start(), query.length());
+                    query_norm.set(query);
+                }
+                current += query_norm.length();
+            }
+
+            assert(current - new_buf <= total_length);
+
+            if ((*infractions & INF_URI_MULTISLASH) || (*infractions & INF_URI_SLASH_DOT) ||
+                (*infractions & INF_URI_SLASH_DOT_DOT))
+            {
+                HttpModule::increment_peg_counts(PEG_URI_PATH);
+            }
 
-    check_oversize_dir(path_norm);
+            if ((*infractions & INF_URI_U_ENCODE) || (*infractions & INF_URI_UNKNOWN_PERCENT) ||
+                (*infractions & INF_URI_PERCENT_UNRESERVED) || (*infractions & INF_URI_PERCENT_UTF8_2B) ||
+                (*infractions & INF_URI_PERCENT_UTF8_3B) || (*infractions & INF_URI_DOUBLE_DECODE))
+            {
+                HttpModule::increment_peg_counts(PEG_URI_CODING);
+            }
+
+            check_oversize_dir(path_norm);
 
-    classic_norm.set(current - new_buf, new_buf, true);
+            classic_norm.set(current - new_buf, new_buf, true);
+        }
+        default:
+            return;
+    }
 }
 
 size_t HttpUri::get_file_proc_hash()
@@ -325,3 +330,45 @@ size_t HttpUri::get_file_proc_hash()
 
     return abs_path_hash;
 }
+
+const Field& HttpUri::get_norm_host()
+{
+    if (host_norm.length() != STAT_NOT_COMPUTE)
+        return host_norm;
+
+    if (host.length() > 0 and
+        UriNormalizer::need_norm(host, false, uri_param, infractions, events))
+    {
+        uint8_t *buf = new uint8_t[host.length()];
+
+        *infractions += INF_URI_NEED_NORM_HOST;
+         
+        UriNormalizer::normalize(host, host_norm, false, buf, uri_param, 
+            infractions, events, true);
+    }
+    else
+        host_norm.set(host);
+
+    return host_norm;
+}
+
+const Field& HttpUri::get_norm_fragment()
+{
+    if (fragment_norm.length() != STAT_NOT_COMPUTE)
+        return fragment_norm;
+
+    if ((fragment.length() > 0) and 
+        UriNormalizer::need_norm(fragment, false, uri_param, infractions, events))
+    {
+        uint8_t *buf = new uint8_t[fragment.length()];
+
+        *infractions += INF_URI_NEED_NORM_FRAGMENT;
+
+        UriNormalizer::normalize(fragment, fragment_norm, false, buf, uri_param,
+            infractions, events, true);
+    }
+    else
+        fragment_norm.set(fragment);
+
+    return fragment_norm;
+}
diff --git a/src/service_inspectors/http_inspect/http_uri.h b/src/service_inspectors/http_inspect/http_uri.h
index 30876918c..7152f4b5b 100644
--- a/src/service_inspectors/http_inspect/http_uri.h
+++ b/src/service_inspectors/http_inspect/http_uri.h
@@ -50,10 +50,10 @@ public:
     const Field& get_query() { return query; }
     const Field& get_fragment() { return fragment; }
 
-    const Field& get_norm_host() { return host_norm; }
+    const Field& get_norm_host();
     const Field& get_norm_path() { return path_norm; }
     const Field& get_norm_query() { return query_norm; }
-    const Field& get_norm_fragment() { return fragment_norm; }
+    const Field& get_norm_fragment();
     const Field& get_norm_classic() { return classic_norm; }
     size_t get_file_proc_hash();
 
diff --git a/src/service_inspectors/http_inspect/http_uri_norm.cc b/src/service_inspectors/http_inspect/http_uri_norm.cc
index af1c61178..8611e6dfc 100644
--- a/src/service_inspectors/http_inspect/http_uri_norm.cc
+++ b/src/service_inspectors/http_inspect/http_uri_norm.cc
@@ -32,7 +32,8 @@ using namespace HttpEnums;
 using namespace snort;
 
 void UriNormalizer::normalize(const Field& input, Field& result, bool do_path, uint8_t* buffer,
-    const HttpParaList::UriParam& uri_param, HttpInfractions* infractions, HttpEventGen* events)
+    const HttpParaList::UriParam& uri_param, HttpInfractions* infractions, HttpEventGen* events,
+    bool own_the_buffer)
 {
     // Normalize percent encodings and similar escape sequences
     int32_t data_length = norm_char_clean(input, buffer, uri_param, infractions, events);
@@ -47,7 +48,7 @@ void UriNormalizer::normalize(const Field& input, Field& result, bool do_path, u
         data_length = norm_path_clean(buffer, data_length, infractions, events);
     }
 
-    result.set(data_length, buffer);
+    result.set(data_length, buffer, own_the_buffer);
 }
 
 bool UriNormalizer::need_norm(const Field& uri_component, bool do_path,
diff --git a/src/service_inspectors/http_inspect/http_uri_norm.h b/src/service_inspectors/http_inspect/http_uri_norm.h
index 721141492..ec82da66e 100644
--- a/src/service_inspectors/http_inspect/http_uri_norm.h
+++ b/src/service_inspectors/http_inspect/http_uri_norm.h
@@ -38,7 +38,7 @@ public:
         HttpEventGen* events);
     static void normalize(const Field& input, Field& result, bool do_path, uint8_t* buffer,
         const HttpParaList::UriParam& uri_param, HttpInfractions* infractions,
-        HttpEventGen* events);
+        HttpEventGen* events, bool own_the_buffer = false);
     static bool classic_need_norm(const Field& uri_component, bool do_path,
         const HttpParaList::UriParam& uri_param);
     static void classic_normalize(const Field& input, Field& result, bool do_path,