Merge pull request #2018 in SNORT/snort3 from ~DERAMADA/snort3:hi_http_uri to master

author Mike Stepanek (mstepane) <mstepane@cisco.com>

Fri, 6 Mar 2020 13:18:32 +0000 (13:18 +0000)

committer Mike Stepanek (mstepane) <mstepane@cisco.com>

Fri, 6 Mar 2020 13:18:32 +0000 (13:18 +0000)
author Mike Stepanek (mstepane) <mstepane@cisco.com>
Fri, 6 Mar 2020 13:18:32 +0000 (13:18 +0000)
committer Mike Stepanek (mstepane) <mstepane@cisco.com>
Fri, 6 Mar 2020 13:18:32 +0000 (13:18 +0000)
diff --git a/doc/http_inspect.txt b/doc/http_inspect.txt

index 755966d6267c066c63e9e80ca079fb8fe672eb42..4fb7e587a785ccd6a32cfcf153bb3d5a9ccf9a70 100644 (file)
--- a/doc/http_inspect.txt
+++ b/doc/http_inspect.txt
@@ -355,6 +355,14 @@ is the scheme, "www.samplehost.com" is the host, "287" is the port,
  "/basic/example/of/path" is the path, "with-query" is the query, and
  "and-fragment" is the fragment.
  
+http_uri represents the normalized uri, normalization of components depends 
+on uri type. If the uri is of type absolute (contains all six components) or 
+absolute path (contains path, query and fragment) then the path and query 
+components are normalized. In these cases, http_uri represents the normalized
+path and query (/path?query). If the uri is of type authority (host and port),
+the host is normalized and http_uri represents the normalized host with the port
+number. In all other cases http_uri is the same as http_raw_uri.  
+
  Note: this section uses informal language to explain some things. Nothing
  here is intended to conflict with the technical language of the HTTP RFCs
  and the implementation follows the RFCs.
diff --git a/src/service_inspectors/http_inspect/http_uri.cc b/src/service_inspectors/http_inspect/http_uri.cc

index 2b13b6f7be2c188d9325947a3b80e7b5367dfeb6..64c0c2e7869aa129d2d54032dd11f87cdedab0dd 100644 (file)
--- a/src/service_inspectors/http_inspect/http_uri.cc
+++ b/src/service_inspectors/http_inspect/http_uri.cc
@@ -191,126 +191,131 @@ void HttpUri::normalize()
      // Almost all HTTP requests are honest and rarely need expensive normalization processing. We
      // do a quick scan for red flags and only perform normalization if something comes up.
      // Otherwise we set the normalized fields to point at the raw values.
-    if ((host.length() > 0) &&
-            UriNormalizer::need_norm(host, false, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_HOST;
-    if ((path.length() > 0) &&
-            UriNormalizer::need_norm(path, true, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_PATH;
-    if ((query.length() > 0) &&
-            UriNormalizer::need_norm(query, false, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_QUERY;
-    if ((fragment.length() > 0) &&
-            UriNormalizer::need_norm(fragment, false, uri_param, infractions, events))
-        *infractions += INF_URI_NEED_NORM_FRAGMENT;
-
-    if (!((*infractions & INF_URI_NEED_NORM_PATH)  || (*infractions & INF_URI_NEED_NORM_HOST) ||
-          (*infractions & INF_URI_NEED_NORM_QUERY) || (*infractions & INF_URI_NEED_NORM_FRAGMENT)))
+    switch (uri_type)
      {
-        // This URI is OK, normalization not required
-        host_norm.set(host);
-        path_norm.set(path);
-        query_norm.set(query);
-        fragment_norm.set(fragment);
-        classic_norm.set(uri);
-        check_oversize_dir(path_norm);
-        return;
-    }
+        case URI_ASTERISK:
+        case URI__PROBLEMATIC:
+            classic_norm.set(uri);
+            return;
+        case URI_AUTHORITY:
+        {
+            if ((host.length() > 0) && 
+                    UriNormalizer::need_norm(host, false, uri_param, infractions, events))
+            {
+                const int total_length = uri.length();
  
-    HttpModule::increment_peg_counts(PEG_URI_NORM);
+                uint8_t* const new_buf = new uint8_t[total_length];
+                uint8_t* current = new_buf;
  
-    // Create a new buffer containing the normalized URI by normalizing each individual piece.
-    const uint32_t total_length = uri.length() + UriNormalizer::URI_NORM_EXPANSION;
-    uint8_t* const new_buf = new uint8_t[total_length];
-    uint8_t* current = new_buf;
-    if (scheme.length() >= 0)
-    {
-        memcpy(current, scheme.start(), scheme.length());
-        current += scheme.length();
-        memcpy(current, "://", 3);
-        current += 3;
-    }
-    if (host.length() > 0)
-    {
-        if (*infractions & INF_URI_NEED_NORM_HOST)
-            UriNormalizer::normalize(host, host_norm, false, current, uri_param, infractions,
-                events);
-        else
-        {
-            // The host component is not changing but other parts of the URI are being normalized.
-            // We need a copy of the raw host to provide that part of the normalized URI buffer we
-            // are assembling. But the normalized component will refer to the original raw buffer
-            // on the chance that the data retention policy in use might keep it longer.
-            memcpy(current, host.start(), host.length());
-            host_norm.set(host);
-        }
-        current += host_norm.length();
-    }
-    if (port.length() >= 0)
-    {
-        memcpy(current, ":", 1);
-        current += 1;
-        memcpy(current, port.start(), port.length());
-        current += port.length();
-    }
-    if (path.length() > 0)
-    {
-        if (*infractions & INF_URI_NEED_NORM_PATH)
-            UriNormalizer::normalize(path, path_norm, true, current, uri_param, infractions,
-                events);
-        else
-        {
-            memcpy(current, path.start(), path.length());
-            path_norm.set(path);
-        }
-        current += path_norm.length();
-    }
-    if (query.length() >= 0)
-    {
-        memcpy(current, "?", 1);
-        current += 1;
-        if (*infractions & INF_URI_NEED_NORM_QUERY)
-            UriNormalizer::normalize(query, query_norm, false, current, uri_param, infractions,
-                events);
-        else
-        {
-            memcpy(current, query.start(), query.length());
-            query_norm.set(query);
+                *infractions += INF_URI_NEED_NORM_HOST;
+
+                HttpModule::increment_peg_counts(PEG_URI_NORM);
+
+                UriNormalizer::normalize(host, host_norm, false, current, uri_param, infractions,
+                    events);
+
+                current += host_norm.length();
+
+                if (port.length() >= 0)
+                {
+                    memcpy(current, ":", 1);
+                    current += 1;
+                    memcpy(current, port.start(), port.length());
+                    current += port.length();
+                }
+
+                assert(current - new_buf <= total_length);
+
+                classic_norm.set(current - new_buf, new_buf, true);
+                return;
+            }
+
+            classic_norm.set(uri);
+            return;
          }
-        current += query_norm.length();
-    }
-    if (fragment.length() >= 0)
-    {
-        memcpy(current, "#", 1);
-        current += 1;
-        if (*infractions & INF_URI_NEED_NORM_FRAGMENT)
-            UriNormalizer::normalize(fragment, fragment_norm, false, current, uri_param,
-                infractions, events);
-        else
+        case URI_ABSPATH:
+        case URI_ABSOLUTE:
          {
-            memcpy(current, fragment.start(), fragment.length());
-            fragment_norm.set(fragment);
-        }
-        current += fragment_norm.length();
-    }
-    assert(current - new_buf <= total_length);
+            if ((path.length() > 0) &&
+                    UriNormalizer::need_norm(path, true, uri_param, infractions, events))
+                *infractions += INF_URI_NEED_NORM_PATH;
+            if ((query.length() > 0) &&
+                    UriNormalizer::need_norm(query, false, uri_param, infractions, events))
+                *infractions += INF_URI_NEED_NORM_QUERY;
  
-    if ((*infractions & INF_URI_MULTISLASH) || (*infractions & INF_URI_SLASH_DOT) ||
-        (*infractions & INF_URI_SLASH_DOT_DOT))
-    {
-        HttpModule::increment_peg_counts(PEG_URI_PATH);
-    }
+            if (!((*infractions & INF_URI_NEED_NORM_PATH)  || (*infractions & INF_URI_NEED_NORM_QUERY)))
+            {
+                // This URI is OK, normalization not required
+                path_norm.set(path);
+                query_norm.set(query);
+ 
+                const int path_len = (path.length() > 0) ? path.length() : 0;
+                // query_len = length of query + 1 (? char)
+                const int query_len = (query.length() >= 0) ? query.length() + 1 : 0;
  
-    if ((*infractions & INF_URI_U_ENCODE) || (*infractions & INF_URI_UNKNOWN_PERCENT) ||
-        (*infractions & INF_URI_PERCENT_UNRESERVED) || (*infractions & INF_URI_PERCENT_UTF8_2B) ||
-        (*infractions & INF_URI_PERCENT_UTF8_3B) || (*infractions & INF_URI_DOUBLE_DECODE))
-    {
-        HttpModule::increment_peg_counts(PEG_URI_CODING);
-    }
+                classic_norm.set(path_len + query_len, abs_path.start());
+
+                check_oversize_dir(path_norm);
+                return;
+            }
+
+            HttpModule::increment_peg_counts(PEG_URI_NORM);
+
+            // Create a new buffer containing the normalized URI by normalizing each individual piece.
+            int total_length = path.length() ? path.length() + UriNormalizer::URI_NORM_EXPANSION : 0;
+            total_length += (query.length() >= 0) ? query.length() + 1 : 0;
+            uint8_t* const new_buf = new uint8_t[total_length];
+            uint8_t* current = new_buf;
+
+            if (path.length() > 0)
+            {
+                if (*infractions & INF_URI_NEED_NORM_PATH)
+                    UriNormalizer::normalize(path, path_norm, true, current, uri_param, infractions,
+                        events);
+                else
+                {
+                    memcpy(current, path.start(), path.length());
+                    path_norm.set(path);
+                }
+                current += path_norm.length();
+            }
+            if (query.length() >= 0)
+            {
+                memcpy(current, "?", 1);
+                current += 1;
+                if (*infractions & INF_URI_NEED_NORM_QUERY)
+                    UriNormalizer::normalize(query, query_norm, false, current, uri_param, infractions,
+                        events);
+                else
+                {
+                    memcpy(current, query.start(), query.length());
+                    query_norm.set(query);
+                }
+                current += query_norm.length();
+            }
+
+            assert(current - new_buf <= total_length);
+
+            if ((*infractions & INF_URI_MULTISLASH) || (*infractions & INF_URI_SLASH_DOT) ||
+                (*infractions & INF_URI_SLASH_DOT_DOT))
+            {
+                HttpModule::increment_peg_counts(PEG_URI_PATH);
+            }
  
-    check_oversize_dir(path_norm);
+            if ((*infractions & INF_URI_U_ENCODE) || (*infractions & INF_URI_UNKNOWN_PERCENT) ||
+                (*infractions & INF_URI_PERCENT_UNRESERVED) || (*infractions & INF_URI_PERCENT_UTF8_2B) ||
+                (*infractions & INF_URI_PERCENT_UTF8_3B) || (*infractions & INF_URI_DOUBLE_DECODE))
+            {
+                HttpModule::increment_peg_counts(PEG_URI_CODING);
+            }
+
+            check_oversize_dir(path_norm);
  
-    classic_norm.set(current - new_buf, new_buf, true);
+            classic_norm.set(current - new_buf, new_buf, true);
+        }
+        default:
+            return;
+    }
  }
  
  size_t HttpUri::get_file_proc_hash()
@@ -325,3 +330,45 @@ size_t HttpUri::get_file_proc_hash()
  
      return abs_path_hash;
  }
+
+const Field& HttpUri::get_norm_host()
+{
+    if (host_norm.length() != STAT_NOT_COMPUTE)
+        return host_norm;
+
+    if (host.length() > 0 and
+        UriNormalizer::need_norm(host, false, uri_param, infractions, events))
+    {
+        uint8_t *buf = new uint8_t[host.length()];
+
+        *infractions += INF_URI_NEED_NORM_HOST;
+         
+        UriNormalizer::normalize(host, host_norm, false, buf, uri_param, 
+            infractions, events, true);
+    }
+    else
+        host_norm.set(host);
+
+    return host_norm;
+}
+
+const Field& HttpUri::get_norm_fragment()
+{
+    if (fragment_norm.length() != STAT_NOT_COMPUTE)
+        return fragment_norm;
+
+    if ((fragment.length() > 0) and 
+        UriNormalizer::need_norm(fragment, false, uri_param, infractions, events))
+    {
+        uint8_t *buf = new uint8_t[fragment.length()];
+
+        *infractions += INF_URI_NEED_NORM_FRAGMENT;
+
+        UriNormalizer::normalize(fragment, fragment_norm, false, buf, uri_param,
+            infractions, events, true);
+    }
+    else
+        fragment_norm.set(fragment);
+
+    return fragment_norm;
+}
diff --git a/src/service_inspectors/http_inspect/http_uri.h b/src/service_inspectors/http_inspect/http_uri.h

index 30876918cbcbac6d5353e57b6d41eb68bd5e037f..7152f4b5b2df2f037a86dc02b0f584526ec7ca26 100644 (file)
--- a/src/service_inspectors/http_inspect/http_uri.h
+++ b/src/service_inspectors/http_inspect/http_uri.h
@@ -50,10 +50,10 @@ public:
      const Field& get_query() { return query; }
      const Field& get_fragment() { return fragment; }
  
-    const Field& get_norm_host() { return host_norm; }
+    const Field& get_norm_host();
      const Field& get_norm_path() { return path_norm; }
      const Field& get_norm_query() { return query_norm; }
-    const Field& get_norm_fragment() { return fragment_norm; }
+    const Field& get_norm_fragment();
      const Field& get_norm_classic() { return classic_norm; }
      size_t get_file_proc_hash();
  
diff --git a/src/service_inspectors/http_inspect/http_uri_norm.cc b/src/service_inspectors/http_inspect/http_uri_norm.cc

index af1c611781c7ff1941a628e730a5dd2eebea1c5a..8611e6dfcc8555ff63b83f87decac27353eb4067 100644 (file)
--- a/src/service_inspectors/http_inspect/http_uri_norm.cc
+++ b/src/service_inspectors/http_inspect/http_uri_norm.cc
@@ -32,7 +32,8 @@ using namespace HttpEnums;
  using namespace snort;
  
  void UriNormalizer::normalize(const Field& input, Field& result, bool do_path, uint8_t* buffer,
-    const HttpParaList::UriParam& uri_param, HttpInfractions* infractions, HttpEventGen* events)
+    const HttpParaList::UriParam& uri_param, HttpInfractions* infractions, HttpEventGen* events,
+    bool own_the_buffer)
  {
      // Normalize percent encodings and similar escape sequences
      int32_t data_length = norm_char_clean(input, buffer, uri_param, infractions, events);
@@ -47,7 +48,7 @@ void UriNormalizer::normalize(const Field& input, Field& result, bool do_path, u
          data_length = norm_path_clean(buffer, data_length, infractions, events);
      }
  
-    result.set(data_length, buffer);
+    result.set(data_length, buffer, own_the_buffer);
  }
  
  bool UriNormalizer::need_norm(const Field& uri_component, bool do_path,
diff --git a/src/service_inspectors/http_inspect/http_uri_norm.h b/src/service_inspectors/http_inspect/http_uri_norm.h

index 72114149252618aeed7a898a2f851b5158a6e196..ec82da66ee9f2b9f268764e8f035018da761af82 100644 (file)
--- a/src/service_inspectors/http_inspect/http_uri_norm.h
+++ b/src/service_inspectors/http_inspect/http_uri_norm.h
@@ -38,7 +38,7 @@ public:
          HttpEventGen* events);
      static void normalize(const Field& input, Field& result, bool do_path, uint8_t* buffer,
          const HttpParaList::UriParam& uri_param, HttpInfractions* infractions,
-        HttpEventGen* events);
+        HttpEventGen* events, bool own_the_buffer = false);
      static bool classic_need_norm(const Field& uri_component, bool do_path,
          const HttpParaList::UriParam& uri_param);
      static void classic_normalize(const Field& input, Field& result, bool do_path,
author	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Fri, 6 Mar 2020 13:18:32 +0000 (13:18 +0000)
committer	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Fri, 6 Mar 2020 13:18:32 +0000 (13:18 +0000)
doc/http_inspect.txt		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_uri.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_uri.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_uri_norm.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_uri_norm.h		patch \| blob \| blame \| history