Merge pull request #575 in SNORT/snort3 from utf_decoding to master

author Russ Combs (rucombs) <rucombs@cisco.com>

Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)

committer Russ Combs (rucombs) <rucombs@cisco.com>

Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)
author Russ Combs (rucombs) <rucombs@cisco.com>
Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)
committer Russ Combs (rucombs) <rucombs@cisco.com>
Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)
diff --git a/extra/src/inspectors/http_server/hi_main.cc b/extra/src/inspectors/http_server/hi_main.cc

index 0283734bb963f94646ce82e66d003e6d163fd031..742ffe2867a59b92d58229842e45aec3ff40799a 100644 (file)
--- a/extra/src/inspectors/http_server/hi_main.cc
+++ b/extra/src/inspectors/http_server/hi_main.cc
@@ -135,7 +135,7 @@ void HttpFlowData::init()
  HttpFlowData::HttpFlowData() : FlowData(flow_id)
  {
      memset(&session, 0, sizeof(session));
-    init_decode_utf_state(&session.utf_state);
+    session.utf_state = new UtfDecodeSession();
  }
  
  HttpFlowData::~HttpFlowData()
@@ -1182,6 +1182,9 @@ void FreeHttpSessionData(void* data)
      if (hsd->mime_ssn)
          delete hsd->mime_ssn;
  
+    if (hsd->utf_state)
+        delete hsd->utf_state;
+
      if ( hsd->fd_state != 0 )
      {
          File_Decomp_StopFree(hsd->fd_state);
diff --git a/extra/src/inspectors/http_server/hi_main.h b/extra/src/inspectors/http_server/hi_main.h

index 65fe609d506fd369f9ca1c75e34081ba5222b714..276833eda74289bcadf248511bcae3be6cfece44 100644 (file)
--- a/extra/src/inspectors/http_server/hi_main.h
+++ b/extra/src/inspectors/http_server/hi_main.h
@@ -125,7 +125,7 @@ typedef struct _HttpSessionData
      DECOMPRESS_STATE* decomp_state;
      HTTP_LOG_STATE* log_state;
      sfip_t* true_ip;
-    decode_utf_state_t utf_state;
+    UtfDecodeSession* utf_state;
      uint8_t log_flags;
      uint8_t cli_small_chunk_count;
      uint8_t srv_small_chunk_count;
diff --git a/extra/src/inspectors/http_server/hi_server.cc b/extra/src/inspectors/http_server/hi_server.cc

index e806f4cffe33e92a62cf2642ade70252144a4d12..b8f66772d3466346b451b4527b32a1b4ab1f7fc8 100644 (file)
--- a/extra/src/inspectors/http_server/hi_server.cc
+++ b/extra/src/inspectors/http_server/hi_server.cc
@@ -345,7 +345,7 @@ static inline const u_char* extract_http_content_type_charset(
      sf_unfold_header(p, end-p, unfold_buf, sizeof(unfold_buf), &unfold_size, 0, 0);
      if (!unfold_size)
      {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
          return p;
      }
      p += unfold_size;
@@ -356,14 +356,14 @@ static inline const u_char* extract_http_content_type_charset(
      ptr = SnortStrcasestr(ptr, (int)(ptr_end - ptr), "text");
      if (!ptr)
      {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
          return p;
      }
  
      ptr = SnortStrcasestr(ptr, (int)(ptr_end - ptr), "utf-");
      if (!ptr)
      {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
          return p;
      }
      ptr += 4; /* length of "utf-" */
@@ -371,28 +371,28 @@ static inline const u_char* extract_http_content_type_charset(
  
      if ((cmplen > 0) && (*ptr == '8'))
      {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
      }
      else if ((cmplen > 0) && (*ptr == '7'))
      {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF7);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF7);
          hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF7);
      }
      else if (cmplen >= 4)
      {
          if ( !strncasecmp(ptr, "16le", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF16LE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF16LE);
          else if ( !strncasecmp(ptr, "16be", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF16BE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF16BE);
          else if ( !strncasecmp(ptr, "32le", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF32LE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF32LE);
          else if ( !strncasecmp(ptr, "32be", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF32BE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF32BE);
          else
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
      }
      else
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
  
      return p;
  }
@@ -1673,7 +1673,7 @@ static int HttpResponseInspection(HI_SESSION* session, Packet* p, const unsigned
                  }
              }
  
-            if ((get_decode_utf_state_charset(&(sd->utf_state)) != CHARSET_DEFAULT)
+            if ((sd->utf_state->get_decode_utf_state_charset() != CHARSET_DEFAULT)
                  || (ServerConf->normalize_javascript && Server->response.body_size))
              {
                  if ( Server->response.body_size < sizeof(HttpDecodeBuf.data) )
diff --git a/extra/src/inspectors/http_server/hi_server_norm.cc b/extra/src/inspectors/http_server/hi_server_norm.cc

index 3111519df365a602d762d34a728d3b25a4f144f0..ba4696f35f93f04f2f712e96395995d6cdae49a1 100644 (file)
--- a/extra/src/inspectors/http_server/hi_server_norm.cc
+++ b/extra/src/inspectors/http_server/hi_server_norm.cc
@@ -183,76 +183,24 @@ int hi_server_norm(HI_SESSION* session, HttpSessionData* hsd)
  
      if (session->server_conf->normalize_utf && (ServerResp->body_size > 0))
      {
-        int bytes_copied, result, charset;
+        int bytes_copied;
+        bool decoded;
  
          if (hsd)
          {
-            charset = get_decode_utf_state_charset(&(hsd->utf_state));
+            decoded = hsd->utf_state->decode_utf((const char*)ServerResp->body, ServerResp->body_size,
+                (char*)HttpDecodeBuf.data, sizeof(HttpDecodeBuf.data), &bytes_copied);
  
-            if (charset == CHARSET_UNKNOWN)
-            {
-                /* Got a text content type but no charset.
-                 * Look for potential BOM (Byte Order Mark) */
-                if (ServerResp->body_size >= 4)
+                if (!decoded)
                  {
-                    uint8_t size = 0;
-
-                    if (!memcmp(ServerResp->body, "\x00\x00\xFE\xFF", 4))
-                    {
-                        charset = CHARSET_UTF32BE;
-                        size = 4;
-                    }
-                    else if (!memcmp(ServerResp->body, "\xFF\xFE\x00\x00", 4))
-                    {
-                        charset = CHARSET_UTF32LE;
-                        size = 4;
-                    }
-                    else if (!memcmp(ServerResp->body, "\xFE\xFF", 2))
-                    {
-                        charset = CHARSET_UTF16BE;
-                        size = 2;
-                    }
-                    else if (!memcmp(ServerResp->body, "\xFF\xFE", 2))
-                    {
-                        charset = CHARSET_UTF16LE;
-                        size = 2;
-                    }
-                    else
-                        charset = CHARSET_DEFAULT; // ensure we don't try again
-
-                    ServerResp->body += size;
-                    ServerResp->body_size -= size;
+                    hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF_NORM_FAIL);
                  }
-                else
-                    charset = CHARSET_DEFAULT; // ensure we don't try again
-
-                set_decode_utf_state_charset(&(hsd->utf_state), charset);
-            }
-
-            /* Normalize server responses with utf-16le, utf-16be, utf-32le,
-               or utf-32be charsets.*/
-            switch (charset)
-            {
-            case CHARSET_UTF16LE:
-            case CHARSET_UTF16BE:
-            case CHARSET_UTF32LE:
-            case CHARSET_UTF32BE:
-                result = DecodeUTF((char*)ServerResp->body, ServerResp->body_size,
-                    (char*)HttpDecodeBuf.data, sizeof(HttpDecodeBuf.data),
-                    &bytes_copied,
-                    &(hsd->utf_state));
-
-                if (result == DECODE_UTF_FAILURE)
+                else if ( bytes_copied )
                  {
-                    hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF_NORM_FAIL);
+                    SetHttpDecode((uint16_t)bytes_copied);
+                    ServerResp->body = HttpDecodeBuf.data;
+                    ServerResp->body_size = HttpDecodeBuf.len;
                  }
-                SetHttpDecode((uint16_t)bytes_copied);
-                ServerResp->body = HttpDecodeBuf.data;
-                ServerResp->body_size = HttpDecodeBuf.len;
-                break;
-            default:
-                break;
-            }
          }
      }
  
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_enum.h b/src/service_inspectors/nhttp_inspect/nhttp_enum.h

index e6cf60b43efd682ce0f14ab7abd7badc792adac7..156ab86d8009e02e1b92efcff0b511daf3da2474 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_enum.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_enum.h
@@ -190,6 +190,8 @@ enum Infraction
      INF_CHUNKED_BEFORE_END,
      INF_OVERSIZE_DIR,
      INF_POST_WO_BODY,
+    INF_UTF_NORM_FAIL,
+    INF_UTF7,
      INF__MAX_VALUE
  };
  
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_flow_data.cc b/src/service_inspectors/nhttp_inspect/nhttp_flow_data.cc

index a0a068e0592e445f83df53fad76c0c59e0e0e43a..71d0a2485d5402ba2a8a6fb9b1960ea7cbd78669 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_flow_data.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_flow_data.cc
@@ -75,6 +75,11 @@ NHttpFlowData::~NHttpFlowData()
          delete mime_state;
      }
  
+    if (utf_state != nullptr )
+    {
+        delete utf_state;
+    }
+
      delete_pipeline();
  }
  
@@ -119,6 +124,11 @@ void NHttpFlowData::half_reset(SourceId source_id)
          if (transaction[SRC_SERVER]->final_response())
              expected_trans_num[SRC_SERVER]++;
          status_code_num = STAT_NOT_PRESENT;
+        if (utf_state != nullptr)
+        {
+            delete utf_state;
+            utf_state = nullptr;
+        }
      }
  }
  
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_flow_data.h b/src/service_inspectors/nhttp_inspect/nhttp_flow_data.h

index 13c6824bed2f480989b65a71c2ebd85bd0cdf799..c9891cb8cd012471bd779833db69f23b592cfa61 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_flow_data.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_flow_data.h
@@ -25,6 +25,7 @@
  
  #include "stream/stream_api.h"
  #include "mime/file_mime_process.h"
+#include "utils/util_utf.h"
  
  #include "nhttp_cutter.h"
  #include "nhttp_infractions.h"
@@ -107,6 +108,7 @@ private:
      int64_t detect_depth_remaining[2] = { NHttpEnums::STAT_NOT_PRESENT,
          NHttpEnums::STAT_NOT_PRESENT };
      MimeSession* mime_state = nullptr;  // SRC_CLIENT only
+    UtfDecodeSession* utf_state = nullptr; //SRC_SERVER only
      uint64_t expected_trans_num[2] = { 1, 1 };
  
      // number of user data octets seen so far (regular body or chunks)
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_module.cc b/src/service_inspectors/nhttp_inspect/nhttp_module.cc

index 3347536f20f573b1b53521370fabf1b303940a26..df26c04c8b82e4e46f2affb327d70cc4e5d1465d 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_module.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_module.cc
@@ -34,6 +34,7 @@ const Parameter NHttpModule::nhttp_params[] =
      { "response_depth", Parameter::PT_INT, "-1:", "-1",
            "maximum response message body bytes to examine (-1 no limit)" },
      { "unzip", Parameter::PT_BOOL, nullptr, "true", "decompress gzip and deflate message bodies" },
+    { "normalize_utf", Parameter::PT_BOOL, nullptr, "true", "normalize charset utf encodings" },
      { "bad_characters", Parameter::PT_BIT_LIST, "255", nullptr,
            "alert when any of specified bytes are present in URI after percent decoding" },
      { "ignore_unreserved", Parameter::PT_STRING, "(optional)", nullptr,
@@ -95,6 +96,10 @@ bool NHttpModule::set(const char*, Value& val, SnortConfig*)
      {
          params->unzip = val.get_bool();
      }
+    else if (val.is("normalize_utf"))
+    {
+        params->normalize_utf = val.get_bool();
+    }
      else if (val.is("bad_characters"))
      {
          val.get_bits(params->uri_param.bad_characters);
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_module.h b/src/service_inspectors/nhttp_inspect/nhttp_module.h

index dff3023e418af1382915d1e2874b29658955098d..c83c7c8bbce2dfad39ef6c090f620749f360ad38 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_module.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_module.h
@@ -36,6 +36,7 @@ public:
      long request_depth;
      long response_depth;
      bool unzip;
+    bool normalize_utf = true;
      struct UriParam
      {
      public:
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_msg_body.cc b/src/service_inspectors/nhttp_inspect/nhttp_msg_body.cc

index 7535f927a4b71caec6345350fc455b803a072760..0a988d3a5d060a24c1cd9af8a0a60bc20326d820 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_msg_body.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_msg_body.cc
@@ -47,20 +47,35 @@ NHttpMsgBody::~NHttpMsgBody()
  {
      if (classic_client_body_alloc)
          classic_client_body.delete_buffer();
+
+    if (decoded_body_alloc)
+        decoded_body.delete_buffer();
  }
  
  void NHttpMsgBody::analyze()
  {
-    detect_data.length = (msg_text.length <= session_data->detect_depth_remaining[source_id]) ?
-       msg_text.length : session_data->detect_depth_remaining[source_id];
-    detect_data.start = msg_text.start;
+    do_utf_decoding(msg_text, decoded_body, decoded_body_alloc);
+    if ( decoded_body_alloc )
+    {
+        detect_data.length = (decoded_body.length <= session_data->detect_depth_remaining[source_id]) ?
+           decoded_body.length : session_data->detect_depth_remaining[source_id];
+        detect_data.start = decoded_body.start;
+    }
+    else
+    {
+        detect_data.length = (msg_text.length <= session_data->detect_depth_remaining[source_id]) ?
+           msg_text.length : session_data->detect_depth_remaining[source_id];
+        detect_data.start = msg_text.start;
+    }
+
      session_data->detect_depth_remaining[source_id] -= detect_data.length;
  
      // Always set file data. File processing will later set a new value in some cases.
      file_data.length = detect_data.length;
+
      if (file_data.length > 0)
      {
-        file_data.start = msg_text.start;
+        file_data.start = detect_data.start;
          set_file_data(const_cast<uint8_t*>(file_data.start), (unsigned)file_data.length);
      }
  
@@ -72,6 +87,36 @@ void NHttpMsgBody::analyze()
      body_octets += msg_text.length;
  }
  
+void NHttpMsgBody::do_utf_decoding(const Field& input, Field& output, bool& decoded_alloc)
+{
+
+    if (!params->normalize_utf || source_id == SRC_CLIENT )
+        return;
+
+    if (session_data->utf_state && session_data->utf_state->is_utf_encoding_present() )
+    {
+        int bytes_copied;
+        bool decoded;
+        uint8_t* buffer = new uint8_t[input.length];
+        decoded = session_data->utf_state->decode_utf((const char*)input.start, input.length,
+                            (char*)buffer, input.length, &bytes_copied);
+        if (!decoded)
+        {
+            delete[] buffer;
+            infractions += INF_UTF_NORM_FAIL;
+            events.create_event(EVENT_UTF_NORM_FAIL);
+        }
+        else if ( bytes_copied )
+        {
+            output.set(bytes_copied, buffer);
+            decoded_alloc = true;
+        }
+        else
+            delete[] buffer;
+    }
+
+}
+
  void NHttpMsgBody::do_file_processing()
  {
      // Using the trick that cutter is deleted when regular or chunked body is complete
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_msg_body.h b/src/service_inspectors/nhttp_inspect/nhttp_msg_body.h

index 9da03277a3b67139af26a692fae40bf1049ea201..b87619ff0f0f6e8f22bb04c303a406aa79147c27 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_msg_body.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_msg_body.h
@@ -50,12 +50,15 @@ protected:
  
  private:
      void do_file_processing();
+    void do_utf_decoding(const Field& input, Field& output, bool& decoded_alloc);
  
      Field detect_data;
      Field file_data;
      const bool detection_section;
      Field classic_client_body;   // URI normalization applied
      bool classic_client_body_alloc = false;
+    Field decoded_body;
+    bool decoded_body_alloc = false;
  };
  
  #endif
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_msg_body_chunk.cc b/src/service_inspectors/nhttp_inspect/nhttp_msg_body_chunk.cc

index 7bfb4b68f0fed0722186290f62b3e22ecc85a876..48016ca8a512522e7cb5725e43df4fe8a8bd1a56 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_msg_body_chunk.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_msg_body_chunk.cc
@@ -40,6 +40,12 @@ void NHttpMsgBodyChunk::update_flow()
              delete session_data->mime_state;
              session_data->mime_state = nullptr;
          }
+
+        if ((source_id == SRC_SERVER) && (session_data->utf_state != nullptr))
+        {
+            delete session_data->utf_state;
+            session_data->utf_state = nullptr;
+        }
      }
      else
      {
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_msg_head_shared.h b/src/service_inspectors/nhttp_inspect/nhttp_msg_head_shared.h

index 757a8e0f42cff5f3d15831a97011f007029afc47..cfcf2065349f6d153d3c25315f41d1ca8f1f0c2a 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_msg_head_shared.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_msg_head_shared.h
@@ -52,6 +52,8 @@ public:
      static const StrCode header_list[];
      static const StrCode trans_code_list[];
      static const StrCode content_code_list[];
+    static const StrCode charset_code_list[];
+    static const StrCode charset_code_opt_list[];
  
  protected:
      NHttpMsgHeadShared(const uint8_t* buffer, const uint16_t buf_size,
@@ -73,6 +75,7 @@ private:
      static const HeaderNormalizer NORMALIZER_BASIC;
      static const HeaderNormalizer NORMALIZER_NUMBER;
      static const HeaderNormalizer NORMALIZER_TOKEN_LIST;
+    static const HeaderNormalizer NORMALIZER_CHARSET;
      static const HeaderNormalizer NORMALIZER_CAT;
      static const HeaderNormalizer NORMALIZER_COOKIE;
  
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_msg_header.cc b/src/service_inspectors/nhttp_inspect/nhttp_msg_header.cc

index 7e7d607c76cd0aebc64e75407c52d818170db335..f2996f6e782b56926fee36b5b1cf1fd0f6f4b013 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_msg_header.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_msg_header.cc
@@ -185,6 +185,7 @@ void NHttpMsgHeader::prepare_body()
      }
      setup_file_processing();
      setup_decompression();
+    setup_utf_decoding();
      update_depth();
      session_data->infractions[source_id].reset();
      session_data->events[source_id].reset();
@@ -260,6 +261,54 @@ void NHttpMsgHeader::setup_decompression()
      }
  }
  
+void NHttpMsgHeader::setup_utf_decoding()
+{
+    Field last_token;
+    CharsetCode charset_code;
+
+    if (!params->normalize_utf || source_id == SRC_CLIENT )
+        return;
+
+    const Field& norm_content_type = get_header_value_norm(HEAD_CONTENT_TYPE);
+    if (norm_content_type.length <= 0)
+        return;
+
+    get_last_token(norm_content_type, last_token, ';');
+
+    // No semicolon in the Content-Type header
+    if ( last_token.length == norm_content_type.length )
+    {
+        if( SnortStrnStr((const char*)norm_content_type.start, norm_content_type.length, "text") )
+        {
+            charset_code = CHARSET_UNKNOWN;
+        }
+        else
+            return;
+    }
+    else
+    {
+
+        charset_code = (CharsetCode)str_to_code(last_token.start, last_token.length, NHttpMsgHeadShared::charset_code_list);
+
+        if( charset_code == CHARSET_OTHER )
+        {
+            charset_code = (CharsetCode)substr_to_code(last_token.start, last_token.length, NHttpMsgHeadShared::charset_code_opt_list);
+
+            if( charset_code != CHARSET_UNKNOWN ) 
+                return;
+        }
+        else if ( charset_code == CHARSET_UTF7 )
+        {
+            infractions += INF_UTF7;
+            events.create_event(EVENT_UTF7);
+        }
+    }
+
+    session_data->utf_state = new UtfDecodeSession();
+    session_data->utf_state->set_decode_utf_state_charset(charset_code);
+}
+
+
  #ifdef REG_TEST
  void NHttpMsgHeader::print_section(FILE* output)
  {
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_msg_header.h b/src/service_inspectors/nhttp_inspect/nhttp_msg_header.h

index 10610b236ea7fe42bb1372f8c8a112f8c6c72890..7cda8fda4c383e010bcaf9dbf98524c566979f03 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_msg_header.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_msg_header.h
@@ -46,6 +46,7 @@ private:
      void prepare_body();
      void setup_file_processing();
      void setup_decompression();
+    void setup_utf_decoding();
  
      bool detection_section = true;
  
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_normalizers.cc b/src/service_inspectors/nhttp_inspect/nhttp_normalizers.cc

index 1a149b6c8ab7a302052d2f4871ec7d71fea52a09..c45493cfaea03d4a2b5af91c0bab54cf5c817bfb 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_normalizers.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_normalizers.cc
@@ -54,6 +54,19 @@ int32_t norm_remove_lws(const uint8_t* in_buf, int32_t in_length, uint8_t* out_b
      }
      return length;
  }
+//FIXIT - norm_remove_lws and norm_remove_quotes_lws could be combined into one function
+int32_t norm_remove_quotes_lws(const uint8_t* in_buf, int32_t in_length, uint8_t* out_buf,
+    NHttpInfractions&, NHttpEventGen&)
+{
+    int32_t length = 0;
+    for (int32_t k=0; k < in_length; k++)
+    {
+        if (in_buf[k] == '\'' || in_buf[k] == '\"' || is_sp_tab[in_buf[k]])
+            continue;
+        out_buf[length++] = in_buf[k];
+    }
+    return length;
+}
  
  // Other header-value processing functions (not using the standard normalization signature)
  // Convert a decimal field such as Content-Length to an integer.
@@ -77,16 +90,23 @@ int64_t norm_decimal_integer(const Field& input)
      return total;
  }
  
+void get_last_token(const Field& input, Field& last_token, char ichar)
+{
+    assert(input.length > 0);
+    for (last_token.start = input.start + input.length - 1; (last_token.start >= input.start) &&
+        (*(last_token.start)!= ichar); (last_token.start)--);
+    (last_token.start)++;
+    last_token.length = input.length - (last_token.start - input.start);
+    return;
+}
+
  // Find the last token in a comma-separated field and convert it to an enum
  int32_t norm_last_token_code(const Field& input, const StrCode table[])
  {
-    assert(input.length > 0);
-    const uint8_t* last_start;
-    for (last_start = input.start + input.length - 1; (last_start >= input.start) &&
-        (*last_start != ','); last_start--);
-    last_start++;
-    const int32_t last_length = input.length - (last_start - input.start);
-    return str_to_code(last_start, last_length, table);
+    Field last_token;
+    get_last_token(input, last_token, ',');
+
+    return str_to_code(last_token.start, last_token.length, table);
  }
  
  // Given a comma-separated list of words, does "chunked" appear before the last word
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_normalizers.h b/src/service_inspectors/nhttp_inspect/nhttp_normalizers.h

index fdc62c0b664db168805d674395cb91a33616c762..6eedd15ddd01fd3deacaa9b50a959ca4c9bb7bca 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_normalizers.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_normalizers.h
@@ -34,8 +34,10 @@
  typedef int32_t (NormFunc)(const uint8_t*, int32_t, uint8_t*, NHttpInfractions&, NHttpEventGen&);
  NormFunc norm_to_lower;
  NormFunc norm_remove_lws;
+NormFunc norm_remove_quotes_lws;
  
  // Other normalization-related utilities
+void get_last_token(const Field& input, Field& last_token, char ichar);
  int64_t norm_decimal_integer(const Field& input);
  int32_t norm_last_token_code(const Field& input, const StrCode table[]);
  bool chunked_before_end(const Field& input);
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.cc b/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.cc

index b192c9e5b38dbca51bafca9b95449cb9f29ca5b5..b409930dc3efa95eba8e76891dd139dd0948f7b7 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.cc
@@ -38,3 +38,17 @@ SO_PUBLIC int32_t str_to_code(const uint8_t* text, const int32_t text_len, const
      return NHttpEnums::STAT_OTHER;
  }
  
+SO_PUBLIC int32_t substr_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[])
+{
+    for (int32_t k=0; table[k].name != nullptr; k++)
+    {
+        int32_t len =  (text_len <= (int)strlen(table[k].name) ) ? text_len : (int)strlen(table[k].name);
+
+        if (memcmp(text, table[k].name, len) == 0)
+        {
+            return table[k].code;
+        }
+    }
+    return NHttpEnums::STAT_OTHER;
+}
+
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.h b/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.h

index 2d979cf5b0e66bf44fb9dfc8b9f5a681e410235b..f089d28673aa07a0ffc94202ade4c5350b13601f 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.h
+++ b/src/service_inspectors/nhttp_inspect/nhttp_str_to_code.h
@@ -27,6 +27,7 @@ struct StrCode
  };
  
  int32_t str_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[]);
+int32_t substr_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[]);
  
  #endif
  
diff --git a/src/service_inspectors/nhttp_inspect/nhttp_tables.cc b/src/service_inspectors/nhttp_inspect/nhttp_tables.cc

index d1e4c0d394289ba0809af157cc6d33cc952e6583..b0e1e9fcad713f3848643aa57e6b85a86f0e4e38 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/nhttp_tables.cc
+++ b/src/service_inspectors/nhttp_inspect/nhttp_tables.cc
@@ -27,6 +27,8 @@
  #include "framework/module.h"
  #include "framework/counts.h"
  
+#include "utils/util_utf.h"
+
  #include "nhttp_enum.h"
  #include "nhttp_str_to_code.h"
  #include "nhttp_normalizers.h"
@@ -173,6 +175,24 @@ const StrCode NHttpMsgHeadShared::content_code_list[] =
      { 0,                         nullptr }
  };
  
+const StrCode NHttpMsgHeadShared::charset_code_list[] =
+{
+    { CHARSET_DEFAULT,       "charset=utf-8" },
+    { CHARSET_UTF7,          "charset=utf-7" },
+    { CHARSET_UTF16LE,       "charset=utf-16le" },
+    { CHARSET_UTF16BE,       "charset=utf-16be" },
+    { CHARSET_UTF32LE,       "charset=utf-32le" },
+    { CHARSET_UTF32BE,       "charset=utf-32be" },
+    { 0,                     nullptr }
+};
+
+const StrCode NHttpMsgHeadShared::charset_code_opt_list[] =
+{
+    { CHARSET_UNKNOWN,       "charset=utf-" },
+    { CHARSET_IRRELEVANT,    "charset=" },
+    { 0,                     nullptr }
+};
+
  const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_BASIC
      { false, nullptr, nullptr, nullptr };
  
@@ -182,6 +202,9 @@ const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_NUMBER
  const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_TOKEN_LIST
      { true, norm_remove_lws, norm_to_lower, nullptr };
  
+const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_CHARSET
+    { true, norm_remove_quotes_lws, norm_to_lower, nullptr };
+
  const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_CAT
      { true, norm_remove_lws, nullptr, nullptr };
  
@@ -245,7 +268,7 @@ const HeaderNormalizer* const NHttpMsgHeadShared::header_norms[HEAD__MAX_VALUE]
      [HEAD_CONTENT_LOCATION] = &NORMALIZER_BASIC,
      [HEAD_CONTENT_MD5] = &NORMALIZER_BASIC,
      [HEAD_CONTENT_RANGE] = &NORMALIZER_BASIC,
-    [HEAD_CONTENT_TYPE] = &NORMALIZER_BASIC,
+    [HEAD_CONTENT_TYPE] = &NORMALIZER_CHARSET,
      [HEAD_EXPIRES] = &NORMALIZER_BASIC,
      [HEAD_LAST_MODIFIED] = &NORMALIZER_BASIC,
      [HEAD_X_FORWARDED_FOR] = &NORMALIZER_CAT,
diff --git a/src/service_inspectors/nhttp_inspect/test/nhttp_module_test.cc b/src/service_inspectors/nhttp_inspect/test/nhttp_module_test.cc

index c854b6be64bc67994f40b314af67fc750a0571be..7064e52e8fc0943f9edfc34a6c34bac84b7ba0b7 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/test/nhttp_module_test.cc
+++ b/src/service_inspectors/nhttp_inspect/test/nhttp_module_test.cc
@@ -44,6 +44,7 @@ void Value::get_bits(std::bitset<256ul>&) const {}
  int SnortEventqAdd(unsigned int, unsigned int, RuleType) { return 0; }
  
  int32_t str_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
+int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
  long NHttpTestManager::print_amount {};
  bool NHttpTestManager::print_hex {};
  
diff --git a/src/service_inspectors/nhttp_inspect/test/nhttp_normalizers_test.cc b/src/service_inspectors/nhttp_inspect/test/nhttp_normalizers_test.cc

index a05dfb6d199d88aedb6e463fcf3a78765bb4fa4b..f544fc8201694318bf28890b34422e2374406251 100644 (file)
--- a/src/service_inspectors/nhttp_inspect/test/nhttp_normalizers_test.cc
+++ b/src/service_inspectors/nhttp_inspect/test/nhttp_normalizers_test.cc
@@ -28,6 +28,7 @@
  
  // Stubs whose sole purpose is to make the test code link
  int32_t str_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
+int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
  const bool NHttpEnums::is_sp_tab[256] {};
  long NHttpTestManager::print_amount {};
  bool NHttpTestManager::print_hex {};
diff --git a/src/utils/util_utf.cc b/src/utils/util_utf.cc

index ff06d938b2db83e2efccf6a5a20c0ad57173e258..0689924395e79b1c053aed6d468def3d230fda62 100644 (file)
--- a/src/utils/util_utf.cc
+++ b/src/utils/util_utf.cc
@@ -22,6 +22,7 @@
  #include "util_utf.h"
  
  #include <stdlib.h>
+#include <string.h>
  
  #define DSTATE_FIRST 0
  #define DSTATE_SECOND 1
@@ -30,47 +31,36 @@
  
  void keep_utf_lib() { }
  
-/* init a new decode_utf_state_t */
-int init_decode_utf_state(decode_utf_state_t* p)
+UtfDecodeSession::UtfDecodeSession()
  {
-    if (p == NULL)
-        return DECODE_UTF_FAILURE;
-
-    p->state = DSTATE_FIRST;
-    p->charset = CHARSET_DEFAULT;
-    return DECODE_UTF_SUCCESS;
+    init_decode_utf_state();
  }
  
-/* terminate a decode_utf_state_t.
-   returns DECODE_UTF_FAILURE if we're not at the base state. */
-int term_decode_utf_state(decode_utf_state_t* dead)
+/* init a new decode_utf_state_t */
+void UtfDecodeSession::init_decode_utf_state()
  {
-    if (dead == NULL)
-        return DECODE_UTF_FAILURE;
-
-    if (dead->state != DSTATE_FIRST)
-        return DECODE_UTF_FAILURE;
-
-    return DECODE_UTF_SUCCESS;
+    dstate.state = DSTATE_FIRST;
+    dstate.charset = CHARSET_DEFAULT;
  }
  
  /* setters & getters */
-int set_decode_utf_state_charset(decode_utf_state_t* dstate, int charset)
+void UtfDecodeSession::set_decode_utf_state_charset(CharsetCode charset)
  {
-    if (dstate == NULL)
-        return DECODE_UTF_FAILURE;
-
-    dstate->state = DSTATE_FIRST;
-    dstate->charset = charset;
-    return DECODE_UTF_SUCCESS;
+    dstate.state = DSTATE_FIRST;
+    dstate.charset = charset;
  }
  
-int get_decode_utf_state_charset(decode_utf_state_t* dstate)
+CharsetCode UtfDecodeSession::get_decode_utf_state_charset()
  {
-    if (dstate == NULL)
-        return DECODE_UTF_FAILURE;
+    return dstate.charset;
+}
  
-    return dstate->charset;
+bool UtfDecodeSession::is_utf_encoding_present()
+{
+    if ( get_decode_utf_state_charset() > CHARSET_IRRELEVANT )
+        return true;
+    else
+        return false;
  }
  
  /* Decode UTF-16le from src to dst.
@@ -80,39 +70,34 @@ int get_decode_utf_state_charset(decode_utf_state_t* dstate)
   * dst          => buffer to write translated text
   * dst_len      => length allocated for dst
   * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
   *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
   */
  
-static int DecodeUTF16LE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF16LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
  {
-    char* src_index = src;
+    const char* src_index = src;
      char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
-
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
+    bool result = true;
  
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
      {
          /* Copy first byte, skip second, failing if second byte != 0 */
-        switch (dstate->state)
+        switch (dstate.state)
          {
          case DSTATE_FIRST:
              *dst_index++ = *src_index++;
-            dstate->state = DSTATE_SECOND;
+            dstate.state = DSTATE_SECOND;
              break;
          case DSTATE_SECOND:
              if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            dstate->state = DSTATE_FIRST;
+                result = false;
+            dstate.state = DSTATE_FIRST;
              break;
          default:
-            return DECODE_UTF_FAILURE;
+            return false;
          }
      }
  
@@ -128,39 +113,34 @@ static int DecodeUTF16LE(char* src, unsigned int src_len, char* dst, unsigned in
   * dst          => buffer to write translated text
   * dst_len      => length allocated for dst
   * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
   *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
   */
  
-static int DecodeUTF16BE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF16BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
  {
-    char* src_index = src;
+    const char* src_index = src;
      char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
-
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
+    bool result = true;
  
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
      {
          /* Skip first byte, copy second. */
-        switch (dstate->state)
+        switch (dstate.state)
          {
          case DSTATE_FIRST:
              if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            dstate->state = DSTATE_SECOND;
+                result = false;
+            dstate.state = DSTATE_SECOND;
              break;
          case DSTATE_SECOND:
              *dst_index++ = *src_index++;
-            dstate->state = DSTATE_FIRST;
+            dstate.state = DSTATE_FIRST;
              break;
          default:
-            return DECODE_UTF_FAILURE;
+            return false;
          }
      }
  
@@ -176,44 +156,39 @@ static int DecodeUTF16BE(char* src, unsigned int src_len, char* dst, unsigned in
   * dst          => buffer to write translated text
   * dst_len      => length allocated for dst
   * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
   *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
   */
  
-static int DecodeUTF32LE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF32LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
  {
-    char* src_index = src;
+    const char* src_index = src;
      char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
+    bool result = true;
  
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
-
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
      {
          /* Copy the first byte, then skip three. */
-        switch (dstate->state)
+        switch (dstate.state)
          {
          case DSTATE_FIRST:
              *dst_index++ = *src_index++;
-            dstate->state++;
+            dstate.state++;
              break;
          case DSTATE_SECOND:
          case DSTATE_THIRD:
          case DSTATE_FOURTH:
              if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            if (dstate->state == DSTATE_FOURTH)
-                dstate->state = DSTATE_FIRST;
+                result = false;
+            if (dstate.state == DSTATE_FOURTH)
+                dstate.state = DSTATE_FIRST;
              else
-                dstate->state++;
+                dstate.state++;
              break;
          default:
-            return DECODE_UTF_FAILURE;
+            return false;
          }
      }
  
@@ -229,41 +204,36 @@ static int DecodeUTF32LE(char* src, unsigned int src_len, char* dst, unsigned in
   * dst          => buffer to write translated text
   * dst_len      => length allocated for dst
   * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
   *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
   */
  
-static int DecodeUTF32BE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF32BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
  {
-    char* src_index = src;
+    const char* src_index = src;
      char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
-
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
+    bool result = true;
  
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
      {
          /* Skip 3 bytes, copy the fourth. */
-        switch (dstate->state)
+        switch (dstate.state)
          {
          case DSTATE_FIRST:
          case DSTATE_SECOND:
          case DSTATE_THIRD:
              if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            dstate->state++;
+                result = false;
+            dstate.state++;
              break;
          case DSTATE_FOURTH:
              *dst_index++ = *src_index++;
-            dstate->state = DSTATE_FIRST;
+            dstate.state = DSTATE_FIRST;
              break;
          default:
-            return DECODE_UTF_FAILURE;
+            return false;
          }
      }
  
@@ -272,28 +242,78 @@ static int DecodeUTF32BE(char* src, unsigned int src_len, char* dst, unsigned in
      return result;
  }
  
+void UtfDecodeSession::determine_charset(const char** src, unsigned int *src_len)
+{
+    CharsetCode charset;
+    if (dstate.charset == CHARSET_UNKNOWN)
+    {
+        /* Got a text content type but no charset.
+         * Look for potential BOM (Byte Order Mark) */
+        if (*src_len >= 4)
+        {
+            uint8_t size = 0;
+
+            if (!memcmp(*src, "\x00\x00\xFE\xFF", 4))
+            {
+                charset = CHARSET_UTF32BE;
+                size = 4;
+            }
+            else if (!memcmp(*src, "\xFF\xFE\x00\x00", 4))
+            {
+                charset = CHARSET_UTF32LE;
+                size = 4;
+            }
+            else if (!memcmp(*src, "\xFE\xFF", 2))
+            {
+                charset = CHARSET_UTF16BE;
+                size = 2;
+            }
+            else if (!memcmp(*src, "\xFF\xFE", 2))
+            {
+                charset = CHARSET_UTF16LE;
+                size = 2;
+            }
+            else
+                charset = CHARSET_DEFAULT; // ensure we don't try again
+            *src +=size;
+            *src_len -=size;
+        }
+        else
+            charset = CHARSET_DEFAULT; // ensure we don't try again
+        set_decode_utf_state_charset(charset);
+
+    }
+}
+
  /* Wrapper function for DecodeUTF{16,32}{LE,BE} */
-int DecodeUTF(
-    char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::decode_utf(
+    const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
  {
-    if ( !src || !dst || !bytes_copied || !dstate || !src_len || !dst_len )
-        return DECODE_UTF_FAILURE;
+    if ( !src || !dst || !bytes_copied || !src_len || !dst_len )
+        return false;
+
+    *bytes_copied = 0;
  
-    switch (dstate->charset)
+    determine_charset(&src, &src_len);
+
+    if( !src_len)
+        return false;
+
+    switch (dstate.charset)
      {
      case CHARSET_UTF16LE:
-        return DecodeUTF16LE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF16LE(src, src_len, dst, dst_len, bytes_copied);
      case CHARSET_UTF16BE:
-        return DecodeUTF16BE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF16BE(src, src_len, dst, dst_len, bytes_copied);
      case CHARSET_UTF32LE:
-        return DecodeUTF32LE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF32LE(src, src_len, dst, dst_len, bytes_copied);
      case CHARSET_UTF32BE:
-        return DecodeUTF32BE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF32BE(src, src_len, dst, dst_len, bytes_copied);
+    default:
+        break;
      }
  
-    /* In case the function is called with a bad charset. */
-    *bytes_copied = 0;
-    return DECODE_UTF_FAILURE;
+    return true;
  }
  
diff --git a/src/utils/util_utf.h b/src/utils/util_utf.h

index 452e495e43334f2d10493cb20a7c8363db8e1021..dcb491160eccb5e3fcf6665510e9743270a99212 100644 (file)
--- a/src/utils/util_utf.h
+++ b/src/utils/util_utf.h
@@ -24,18 +24,19 @@
  
  #include "main/snort_types.h"
  
-// return codes
-#define DECODE_UTF_SUCCESS  0  // FIXIT-L replace with bool
-#define DECODE_UTF_FAILURE -1
-
-// Character set types 
-#define CHARSET_DEFAULT 0  // FIXIT-L these should be an enum
-#define CHARSET_UTF7    1
-#define CHARSET_UTF16LE 2
-#define CHARSET_UTF16BE 3
-#define CHARSET_UTF32LE 4
-#define CHARSET_UTF32BE 5
-#define CHARSET_UNKNOWN 255
+// Character set types. Used by HTTP inspectors. Update inspectors while changing this value.
+enum CharsetCode
+{
+    CHARSET_DEFAULT=0,
+    CHARSET_OTHER,
+    CHARSET_UTF7,
+    CHARSET_IRRELEVANT,
+    CHARSET_UTF16LE,
+    CHARSET_UTF16BE,
+    CHARSET_UTF32LE,
+    CHARSET_UTF32BE,
+    CHARSET_UNKNOWN
+};
  
  // Since payloads don't have to end on 2/4-byte boundaries, callers to
  // DecodeUTF are responsible for keeping a decode_utf_state_t. This carries
@@ -43,23 +44,27 @@
  struct decode_utf_state_t
  {
      int state;
-    int charset;
+    CharsetCode charset;
  };
  
  void keep_utf_lib();  // FIXIT-L eliminate; required to keep symbols for dyn plugins
  
-// Init & Terminate functions for decode_utf_state_t
-SO_PUBLIC int init_decode_utf_state(decode_utf_state_t*);
-SO_PUBLIC int term_decode_utf_state(decode_utf_state_t*);
-
-// setters & getters
-SO_PUBLIC int set_decode_utf_state_charset(decode_utf_state_t*, int charset);
-SO_PUBLIC int get_decode_utf_state_charset(decode_utf_state_t*);
-
-// UTF-Decoding function prototypes
-SO_PUBLIC int DecodeUTF(
-    char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t*);
-
+class SO_PUBLIC UtfDecodeSession
+{
+public:
+    UtfDecodeSession();
+    virtual ~UtfDecodeSession() { };
+    void init_decode_utf_state();
+    void set_decode_utf_state_charset(CharsetCode charset);
+    CharsetCode get_decode_utf_state_charset();
+    bool is_utf_encoding_present();
+    bool decode_utf(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+private:
+    decode_utf_state_t dstate;
+    bool DecodeUTF16LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    bool DecodeUTF16BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    bool DecodeUTF32LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    bool DecodeUTF32BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    void determine_charset(const char** src, unsigned int *src_len);
+};
  #endif
-
diff --git a/tools/snort2lua/preprocessor_states/pps_nhttp_inspect_server.cc b/tools/snort2lua/preprocessor_states/pps_nhttp_inspect_server.cc

index 5ffbfb706f0de730575d47a0e254ad7552b027db..083100b632c76f72bfa0a4e64e173c40a64dbca8 100644 (file)
--- a/tools/snort2lua/preprocessor_states/pps_nhttp_inspect_server.cc
+++ b/tools/snort2lua/preprocessor_states/pps_nhttp_inspect_server.cc
@@ -147,7 +147,7 @@ bool NHttpInspectServer::convert(std::istringstream& data_stream)
              table_api.add_deleted_comment("normalize_headers");
  
          else if (!keyword.compare("normalize_utf"))
-            table_api.add_deleted_comment("normalize_utf");
+            tmpval = table_api.add_option("normalize_utf", true);
  
          else if (!keyword.compare("log_uri"))
              table_api.add_deleted_comment("log_uri");
author	Russ Combs (rucombs) <rucombs@cisco.com>
	Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)
committer	Russ Combs (rucombs) <rucombs@cisco.com>
	Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)
extra/src/inspectors/http_server/hi_main.cc		patch \| blob \| blame \| history
extra/src/inspectors/http_server/hi_main.h		patch \| blob \| blame \| history
extra/src/inspectors/http_server/hi_server.cc		patch \| blob \| blame \| history
extra/src/inspectors/http_server/hi_server_norm.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_enum.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_flow_data.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_flow_data.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_module.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_module.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_msg_body.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_msg_body.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_msg_body_chunk.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_msg_head_shared.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_msg_header.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_msg_header.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_normalizers.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_normalizers.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_str_to_code.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_str_to_code.h		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/nhttp_tables.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/test/nhttp_module_test.cc		patch \| blob \| blame \| history
src/service_inspectors/nhttp_inspect/test/nhttp_normalizers_test.cc		patch \| blob \| blame \| history
src/utils/util_utf.cc		patch \| blob \| blame \| history
src/utils/util_utf.h		patch \| blob \| blame \| history
tools/snort2lua/preprocessor_states/pps_nhttp_inspect_server.cc		patch \| blob \| blame \| history