]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Merge pull request #575 in SNORT/snort3 from utf_decoding to master
authorRuss Combs (rucombs) <rucombs@cisco.com>
Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)
committerRuss Combs (rucombs) <rucombs@cisco.com>
Fri, 12 Aug 2016 20:12:11 +0000 (16:12 -0400)
Squashed commit of the following:

commit f6b070405bd84b69aa52df672b7d3b69c484adff
Author: Bhagya Tholpady <bbantwal@cisco.com>
Date:   Thu Aug 11 12:05:58 2016 -0400

    resolving conflicts

    resolving conflicts

    resolving conflicts

    Updating utf decoding per review comments

    resolving conflicts

    resolving conflicts

    resolving conflicts

    Updating utf decoding per review comments

    resolving conflicts

    resolving conflicts

    reverting changes to a norm function in nhi

25 files changed:
extra/src/inspectors/http_server/hi_main.cc
extra/src/inspectors/http_server/hi_main.h
extra/src/inspectors/http_server/hi_server.cc
extra/src/inspectors/http_server/hi_server_norm.cc
src/service_inspectors/nhttp_inspect/nhttp_enum.h
src/service_inspectors/nhttp_inspect/nhttp_flow_data.cc
src/service_inspectors/nhttp_inspect/nhttp_flow_data.h
src/service_inspectors/nhttp_inspect/nhttp_module.cc
src/service_inspectors/nhttp_inspect/nhttp_module.h
src/service_inspectors/nhttp_inspect/nhttp_msg_body.cc
src/service_inspectors/nhttp_inspect/nhttp_msg_body.h
src/service_inspectors/nhttp_inspect/nhttp_msg_body_chunk.cc
src/service_inspectors/nhttp_inspect/nhttp_msg_head_shared.h
src/service_inspectors/nhttp_inspect/nhttp_msg_header.cc
src/service_inspectors/nhttp_inspect/nhttp_msg_header.h
src/service_inspectors/nhttp_inspect/nhttp_normalizers.cc
src/service_inspectors/nhttp_inspect/nhttp_normalizers.h
src/service_inspectors/nhttp_inspect/nhttp_str_to_code.cc
src/service_inspectors/nhttp_inspect/nhttp_str_to_code.h
src/service_inspectors/nhttp_inspect/nhttp_tables.cc
src/service_inspectors/nhttp_inspect/test/nhttp_module_test.cc
src/service_inspectors/nhttp_inspect/test/nhttp_normalizers_test.cc
src/utils/util_utf.cc
src/utils/util_utf.h
tools/snort2lua/preprocessor_states/pps_nhttp_inspect_server.cc

index 0283734bb963f94646ce82e66d003e6d163fd031..742ffe2867a59b92d58229842e45aec3ff40799a 100644 (file)
@@ -135,7 +135,7 @@ void HttpFlowData::init()
 HttpFlowData::HttpFlowData() : FlowData(flow_id)
 {
     memset(&session, 0, sizeof(session));
-    init_decode_utf_state(&session.utf_state);
+    session.utf_state = new UtfDecodeSession();
 }
 
 HttpFlowData::~HttpFlowData()
@@ -1182,6 +1182,9 @@ void FreeHttpSessionData(void* data)
     if (hsd->mime_ssn)
         delete hsd->mime_ssn;
 
+    if (hsd->utf_state)
+        delete hsd->utf_state;
+
     if ( hsd->fd_state != 0 )
     {
         File_Decomp_StopFree(hsd->fd_state);
index 65fe609d506fd369f9ca1c75e34081ba5222b714..276833eda74289bcadf248511bcae3be6cfece44 100644 (file)
@@ -125,7 +125,7 @@ typedef struct _HttpSessionData
     DECOMPRESS_STATE* decomp_state;
     HTTP_LOG_STATE* log_state;
     sfip_t* true_ip;
-    decode_utf_state_t utf_state;
+    UtfDecodeSession* utf_state;
     uint8_t log_flags;
     uint8_t cli_small_chunk_count;
     uint8_t srv_small_chunk_count;
index e806f4cffe33e92a62cf2642ade70252144a4d12..b8f66772d3466346b451b4527b32a1b4ab1f7fc8 100644 (file)
@@ -345,7 +345,7 @@ static inline const u_char* extract_http_content_type_charset(
     sf_unfold_header(p, end-p, unfold_buf, sizeof(unfold_buf), &unfold_size, 0, 0);
     if (!unfold_size)
     {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
         return p;
     }
     p += unfold_size;
@@ -356,14 +356,14 @@ static inline const u_char* extract_http_content_type_charset(
     ptr = SnortStrcasestr(ptr, (int)(ptr_end - ptr), "text");
     if (!ptr)
     {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
         return p;
     }
 
     ptr = SnortStrcasestr(ptr, (int)(ptr_end - ptr), "utf-");
     if (!ptr)
     {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
         return p;
     }
     ptr += 4; /* length of "utf-" */
@@ -371,28 +371,28 @@ static inline const u_char* extract_http_content_type_charset(
 
     if ((cmplen > 0) && (*ptr == '8'))
     {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
     }
     else if ((cmplen > 0) && (*ptr == '7'))
     {
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF7);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF7);
         hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF7);
     }
     else if (cmplen >= 4)
     {
         if ( !strncasecmp(ptr, "16le", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF16LE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF16LE);
         else if ( !strncasecmp(ptr, "16be", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF16BE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF16BE);
         else if ( !strncasecmp(ptr, "32le", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF32LE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF32LE);
         else if ( !strncasecmp(ptr, "32be", 4) )
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF32BE);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF32BE);
         else
-            set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+            hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
     }
     else
-        set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+        hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
 
     return p;
 }
@@ -1673,7 +1673,7 @@ static int HttpResponseInspection(HI_SESSION* session, Packet* p, const unsigned
                 }
             }
 
-            if ((get_decode_utf_state_charset(&(sd->utf_state)) != CHARSET_DEFAULT)
+            if ((sd->utf_state->get_decode_utf_state_charset() != CHARSET_DEFAULT)
                 || (ServerConf->normalize_javascript && Server->response.body_size))
             {
                 if ( Server->response.body_size < sizeof(HttpDecodeBuf.data) )
index 3111519df365a602d762d34a728d3b25a4f144f0..ba4696f35f93f04f2f712e96395995d6cdae49a1 100644 (file)
@@ -183,76 +183,24 @@ int hi_server_norm(HI_SESSION* session, HttpSessionData* hsd)
 
     if (session->server_conf->normalize_utf && (ServerResp->body_size > 0))
     {
-        int bytes_copied, result, charset;
+        int bytes_copied;
+        bool decoded;
 
         if (hsd)
         {
-            charset = get_decode_utf_state_charset(&(hsd->utf_state));
+            decoded = hsd->utf_state->decode_utf((const char*)ServerResp->body, ServerResp->body_size,
+                (char*)HttpDecodeBuf.data, sizeof(HttpDecodeBuf.data), &bytes_copied);
 
-            if (charset == CHARSET_UNKNOWN)
-            {
-                /* Got a text content type but no charset.
-                 * Look for potential BOM (Byte Order Mark) */
-                if (ServerResp->body_size >= 4)
+                if (!decoded)
                 {
-                    uint8_t size = 0;
-
-                    if (!memcmp(ServerResp->body, "\x00\x00\xFE\xFF", 4))
-                    {
-                        charset = CHARSET_UTF32BE;
-                        size = 4;
-                    }
-                    else if (!memcmp(ServerResp->body, "\xFF\xFE\x00\x00", 4))
-                    {
-                        charset = CHARSET_UTF32LE;
-                        size = 4;
-                    }
-                    else if (!memcmp(ServerResp->body, "\xFE\xFF", 2))
-                    {
-                        charset = CHARSET_UTF16BE;
-                        size = 2;
-                    }
-                    else if (!memcmp(ServerResp->body, "\xFF\xFE", 2))
-                    {
-                        charset = CHARSET_UTF16LE;
-                        size = 2;
-                    }
-                    else
-                        charset = CHARSET_DEFAULT; // ensure we don't try again
-
-                    ServerResp->body += size;
-                    ServerResp->body_size -= size;
+                    hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF_NORM_FAIL);
                 }
-                else
-                    charset = CHARSET_DEFAULT; // ensure we don't try again
-
-                set_decode_utf_state_charset(&(hsd->utf_state), charset);
-            }
-
-            /* Normalize server responses with utf-16le, utf-16be, utf-32le,
-               or utf-32be charsets.*/
-            switch (charset)
-            {
-            case CHARSET_UTF16LE:
-            case CHARSET_UTF16BE:
-            case CHARSET_UTF32LE:
-            case CHARSET_UTF32BE:
-                result = DecodeUTF((char*)ServerResp->body, ServerResp->body_size,
-                    (char*)HttpDecodeBuf.data, sizeof(HttpDecodeBuf.data),
-                    &bytes_copied,
-                    &(hsd->utf_state));
-
-                if (result == DECODE_UTF_FAILURE)
+                else if ( bytes_copied )
                 {
-                    hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF_NORM_FAIL);
+                    SetHttpDecode((uint16_t)bytes_copied);
+                    ServerResp->body = HttpDecodeBuf.data;
+                    ServerResp->body_size = HttpDecodeBuf.len;
                 }
-                SetHttpDecode((uint16_t)bytes_copied);
-                ServerResp->body = HttpDecodeBuf.data;
-                ServerResp->body_size = HttpDecodeBuf.len;
-                break;
-            default:
-                break;
-            }
         }
     }
 
index e6cf60b43efd682ce0f14ab7abd7badc792adac7..156ab86d8009e02e1b92efcff0b511daf3da2474 100644 (file)
@@ -190,6 +190,8 @@ enum Infraction
     INF_CHUNKED_BEFORE_END,
     INF_OVERSIZE_DIR,
     INF_POST_WO_BODY,
+    INF_UTF_NORM_FAIL,
+    INF_UTF7,
     INF__MAX_VALUE
 };
 
index a0a068e0592e445f83df53fad76c0c59e0e0e43a..71d0a2485d5402ba2a8a6fb9b1960ea7cbd78669 100644 (file)
@@ -75,6 +75,11 @@ NHttpFlowData::~NHttpFlowData()
         delete mime_state;
     }
 
+    if (utf_state != nullptr )
+    {
+        delete utf_state;
+    }
+
     delete_pipeline();
 }
 
@@ -119,6 +124,11 @@ void NHttpFlowData::half_reset(SourceId source_id)
         if (transaction[SRC_SERVER]->final_response())
             expected_trans_num[SRC_SERVER]++;
         status_code_num = STAT_NOT_PRESENT;
+        if (utf_state != nullptr)
+        {
+            delete utf_state;
+            utf_state = nullptr;
+        }
     }
 }
 
index 13c6824bed2f480989b65a71c2ebd85bd0cdf799..c9891cb8cd012471bd779833db69f23b592cfa61 100644 (file)
@@ -25,6 +25,7 @@
 
 #include "stream/stream_api.h"
 #include "mime/file_mime_process.h"
+#include "utils/util_utf.h"
 
 #include "nhttp_cutter.h"
 #include "nhttp_infractions.h"
@@ -107,6 +108,7 @@ private:
     int64_t detect_depth_remaining[2] = { NHttpEnums::STAT_NOT_PRESENT,
         NHttpEnums::STAT_NOT_PRESENT };
     MimeSession* mime_state = nullptr;  // SRC_CLIENT only
+    UtfDecodeSession* utf_state = nullptr; //SRC_SERVER only
     uint64_t expected_trans_num[2] = { 1, 1 };
 
     // number of user data octets seen so far (regular body or chunks)
index 3347536f20f573b1b53521370fabf1b303940a26..df26c04c8b82e4e46f2affb327d70cc4e5d1465d 100644 (file)
@@ -34,6 +34,7 @@ const Parameter NHttpModule::nhttp_params[] =
     { "response_depth", Parameter::PT_INT, "-1:", "-1",
           "maximum response message body bytes to examine (-1 no limit)" },
     { "unzip", Parameter::PT_BOOL, nullptr, "true", "decompress gzip and deflate message bodies" },
+    { "normalize_utf", Parameter::PT_BOOL, nullptr, "true", "normalize charset utf encodings" },
     { "bad_characters", Parameter::PT_BIT_LIST, "255", nullptr,
           "alert when any of specified bytes are present in URI after percent decoding" },
     { "ignore_unreserved", Parameter::PT_STRING, "(optional)", nullptr,
@@ -95,6 +96,10 @@ bool NHttpModule::set(const char*, Value& val, SnortConfig*)
     {
         params->unzip = val.get_bool();
     }
+    else if (val.is("normalize_utf"))
+    {
+        params->normalize_utf = val.get_bool();
+    }
     else if (val.is("bad_characters"))
     {
         val.get_bits(params->uri_param.bad_characters);
index dff3023e418af1382915d1e2874b29658955098d..c83c7c8bbce2dfad39ef6c090f620749f360ad38 100644 (file)
@@ -36,6 +36,7 @@ public:
     long request_depth;
     long response_depth;
     bool unzip;
+    bool normalize_utf = true;
     struct UriParam
     {
     public:
index 7535f927a4b71caec6345350fc455b803a072760..0a988d3a5d060a24c1cd9af8a0a60bc20326d820 100644 (file)
@@ -47,20 +47,35 @@ NHttpMsgBody::~NHttpMsgBody()
 {
     if (classic_client_body_alloc)
         classic_client_body.delete_buffer();
+
+    if (decoded_body_alloc)
+        decoded_body.delete_buffer();
 }
 
 void NHttpMsgBody::analyze()
 {
-    detect_data.length = (msg_text.length <= session_data->detect_depth_remaining[source_id]) ?
-       msg_text.length : session_data->detect_depth_remaining[source_id];
-    detect_data.start = msg_text.start;
+    do_utf_decoding(msg_text, decoded_body, decoded_body_alloc);
+    if ( decoded_body_alloc )
+    {
+        detect_data.length = (decoded_body.length <= session_data->detect_depth_remaining[source_id]) ?
+           decoded_body.length : session_data->detect_depth_remaining[source_id];
+        detect_data.start = decoded_body.start;
+    }
+    else
+    {
+        detect_data.length = (msg_text.length <= session_data->detect_depth_remaining[source_id]) ?
+           msg_text.length : session_data->detect_depth_remaining[source_id];
+        detect_data.start = msg_text.start;
+    }
+
     session_data->detect_depth_remaining[source_id] -= detect_data.length;
 
     // Always set file data. File processing will later set a new value in some cases.
     file_data.length = detect_data.length;
+
     if (file_data.length > 0)
     {
-        file_data.start = msg_text.start;
+        file_data.start = detect_data.start;
         set_file_data(const_cast<uint8_t*>(file_data.start), (unsigned)file_data.length);
     }
 
@@ -72,6 +87,36 @@ void NHttpMsgBody::analyze()
     body_octets += msg_text.length;
 }
 
+void NHttpMsgBody::do_utf_decoding(const Field& input, Field& output, bool& decoded_alloc)
+{
+
+    if (!params->normalize_utf || source_id == SRC_CLIENT )
+        return;
+
+    if (session_data->utf_state && session_data->utf_state->is_utf_encoding_present() )
+    {
+        int bytes_copied;
+        bool decoded;
+        uint8_t* buffer = new uint8_t[input.length];
+        decoded = session_data->utf_state->decode_utf((const char*)input.start, input.length,
+                            (char*)buffer, input.length, &bytes_copied);
+        if (!decoded)
+        {
+            delete[] buffer;
+            infractions += INF_UTF_NORM_FAIL;
+            events.create_event(EVENT_UTF_NORM_FAIL);
+        }
+        else if ( bytes_copied )
+        {
+            output.set(bytes_copied, buffer);
+            decoded_alloc = true;
+        }
+        else
+            delete[] buffer;
+    }
+
+}
+
 void NHttpMsgBody::do_file_processing()
 {
     // Using the trick that cutter is deleted when regular or chunked body is complete
index 9da03277a3b67139af26a692fae40bf1049ea201..b87619ff0f0f6e8f22bb04c303a406aa79147c27 100644 (file)
@@ -50,12 +50,15 @@ protected:
 
 private:
     void do_file_processing();
+    void do_utf_decoding(const Field& input, Field& output, bool& decoded_alloc);
 
     Field detect_data;
     Field file_data;
     const bool detection_section;
     Field classic_client_body;   // URI normalization applied
     bool classic_client_body_alloc = false;
+    Field decoded_body;
+    bool decoded_body_alloc = false;
 };
 
 #endif
index 7bfb4b68f0fed0722186290f62b3e22ecc85a876..48016ca8a512522e7cb5725e43df4fe8a8bd1a56 100644 (file)
@@ -40,6 +40,12 @@ void NHttpMsgBodyChunk::update_flow()
             delete session_data->mime_state;
             session_data->mime_state = nullptr;
         }
+
+        if ((source_id == SRC_SERVER) && (session_data->utf_state != nullptr))
+        {
+            delete session_data->utf_state;
+            session_data->utf_state = nullptr;
+        }
     }
     else
     {
index 757a8e0f42cff5f3d15831a97011f007029afc47..cfcf2065349f6d153d3c25315f41d1ca8f1f0c2a 100644 (file)
@@ -52,6 +52,8 @@ public:
     static const StrCode header_list[];
     static const StrCode trans_code_list[];
     static const StrCode content_code_list[];
+    static const StrCode charset_code_list[];
+    static const StrCode charset_code_opt_list[];
 
 protected:
     NHttpMsgHeadShared(const uint8_t* buffer, const uint16_t buf_size,
@@ -73,6 +75,7 @@ private:
     static const HeaderNormalizer NORMALIZER_BASIC;
     static const HeaderNormalizer NORMALIZER_NUMBER;
     static const HeaderNormalizer NORMALIZER_TOKEN_LIST;
+    static const HeaderNormalizer NORMALIZER_CHARSET;
     static const HeaderNormalizer NORMALIZER_CAT;
     static const HeaderNormalizer NORMALIZER_COOKIE;
 
index 7e7d607c76cd0aebc64e75407c52d818170db335..f2996f6e782b56926fee36b5b1cf1fd0f6f4b013 100644 (file)
@@ -185,6 +185,7 @@ void NHttpMsgHeader::prepare_body()
     }
     setup_file_processing();
     setup_decompression();
+    setup_utf_decoding();
     update_depth();
     session_data->infractions[source_id].reset();
     session_data->events[source_id].reset();
@@ -260,6 +261,54 @@ void NHttpMsgHeader::setup_decompression()
     }
 }
 
+void NHttpMsgHeader::setup_utf_decoding()
+{
+    Field last_token;
+    CharsetCode charset_code;
+
+    if (!params->normalize_utf || source_id == SRC_CLIENT )
+        return;
+
+    const Field& norm_content_type = get_header_value_norm(HEAD_CONTENT_TYPE);
+    if (norm_content_type.length <= 0)
+        return;
+
+    get_last_token(norm_content_type, last_token, ';');
+
+    // No semicolon in the Content-Type header
+    if ( last_token.length == norm_content_type.length )
+    {
+        if( SnortStrnStr((const char*)norm_content_type.start, norm_content_type.length, "text") )
+        {
+            charset_code = CHARSET_UNKNOWN;
+        }
+        else
+            return;
+    }
+    else
+    {
+
+        charset_code = (CharsetCode)str_to_code(last_token.start, last_token.length, NHttpMsgHeadShared::charset_code_list);
+
+        if( charset_code == CHARSET_OTHER )
+        {
+            charset_code = (CharsetCode)substr_to_code(last_token.start, last_token.length, NHttpMsgHeadShared::charset_code_opt_list);
+
+            if( charset_code != CHARSET_UNKNOWN ) 
+                return;
+        }
+        else if ( charset_code == CHARSET_UTF7 )
+        {
+            infractions += INF_UTF7;
+            events.create_event(EVENT_UTF7);
+        }
+    }
+
+    session_data->utf_state = new UtfDecodeSession();
+    session_data->utf_state->set_decode_utf_state_charset(charset_code);
+}
+
+
 #ifdef REG_TEST
 void NHttpMsgHeader::print_section(FILE* output)
 {
index 10610b236ea7fe42bb1372f8c8a112f8c6c72890..7cda8fda4c383e010bcaf9dbf98524c566979f03 100644 (file)
@@ -46,6 +46,7 @@ private:
     void prepare_body();
     void setup_file_processing();
     void setup_decompression();
+    void setup_utf_decoding();
 
     bool detection_section = true;
 
index 1a149b6c8ab7a302052d2f4871ec7d71fea52a09..c45493cfaea03d4a2b5af91c0bab54cf5c817bfb 100644 (file)
@@ -54,6 +54,19 @@ int32_t norm_remove_lws(const uint8_t* in_buf, int32_t in_length, uint8_t* out_b
     }
     return length;
 }
+//FIXIT - norm_remove_lws and norm_remove_quotes_lws could be combined into one function
+int32_t norm_remove_quotes_lws(const uint8_t* in_buf, int32_t in_length, uint8_t* out_buf,
+    NHttpInfractions&, NHttpEventGen&)
+{
+    int32_t length = 0;
+    for (int32_t k=0; k < in_length; k++)
+    {
+        if (in_buf[k] == '\'' || in_buf[k] == '\"' || is_sp_tab[in_buf[k]])
+            continue;
+        out_buf[length++] = in_buf[k];
+    }
+    return length;
+}
 
 // Other header-value processing functions (not using the standard normalization signature)
 // Convert a decimal field such as Content-Length to an integer.
@@ -77,16 +90,23 @@ int64_t norm_decimal_integer(const Field& input)
     return total;
 }
 
+void get_last_token(const Field& input, Field& last_token, char ichar)
+{
+    assert(input.length > 0);
+    for (last_token.start = input.start + input.length - 1; (last_token.start >= input.start) &&
+        (*(last_token.start)!= ichar); (last_token.start)--);
+    (last_token.start)++;
+    last_token.length = input.length - (last_token.start - input.start);
+    return;
+}
+
 // Find the last token in a comma-separated field and convert it to an enum
 int32_t norm_last_token_code(const Field& input, const StrCode table[])
 {
-    assert(input.length > 0);
-    const uint8_t* last_start;
-    for (last_start = input.start + input.length - 1; (last_start >= input.start) &&
-        (*last_start != ','); last_start--);
-    last_start++;
-    const int32_t last_length = input.length - (last_start - input.start);
-    return str_to_code(last_start, last_length, table);
+    Field last_token;
+    get_last_token(input, last_token, ',');
+
+    return str_to_code(last_token.start, last_token.length, table);
 }
 
 // Given a comma-separated list of words, does "chunked" appear before the last word
index fdc62c0b664db168805d674395cb91a33616c762..6eedd15ddd01fd3deacaa9b50a959ca4c9bb7bca 100644 (file)
 typedef int32_t (NormFunc)(const uint8_t*, int32_t, uint8_t*, NHttpInfractions&, NHttpEventGen&);
 NormFunc norm_to_lower;
 NormFunc norm_remove_lws;
+NormFunc norm_remove_quotes_lws;
 
 // Other normalization-related utilities
+void get_last_token(const Field& input, Field& last_token, char ichar);
 int64_t norm_decimal_integer(const Field& input);
 int32_t norm_last_token_code(const Field& input, const StrCode table[]);
 bool chunked_before_end(const Field& input);
index b192c9e5b38dbca51bafca9b95449cb9f29ca5b5..b409930dc3efa95eba8e76891dd139dd0948f7b7 100644 (file)
@@ -38,3 +38,17 @@ SO_PUBLIC int32_t str_to_code(const uint8_t* text, const int32_t text_len, const
     return NHttpEnums::STAT_OTHER;
 }
 
+SO_PUBLIC int32_t substr_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[])
+{
+    for (int32_t k=0; table[k].name != nullptr; k++)
+    {
+        int32_t len =  (text_len <= (int)strlen(table[k].name) ) ? text_len : (int)strlen(table[k].name);
+
+        if (memcmp(text, table[k].name, len) == 0)
+        {
+            return table[k].code;
+        }
+    }
+    return NHttpEnums::STAT_OTHER;
+}
+
index 2d979cf5b0e66bf44fb9dfc8b9f5a681e410235b..f089d28673aa07a0ffc94202ade4c5350b13601f 100644 (file)
@@ -27,6 +27,7 @@ struct StrCode
 };
 
 int32_t str_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[]);
+int32_t substr_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[]);
 
 #endif
 
index d1e4c0d394289ba0809af157cc6d33cc952e6583..b0e1e9fcad713f3848643aa57e6b85a86f0e4e38 100644 (file)
@@ -27,6 +27,8 @@
 #include "framework/module.h"
 #include "framework/counts.h"
 
+#include "utils/util_utf.h"
+
 #include "nhttp_enum.h"
 #include "nhttp_str_to_code.h"
 #include "nhttp_normalizers.h"
@@ -173,6 +175,24 @@ const StrCode NHttpMsgHeadShared::content_code_list[] =
     { 0,                         nullptr }
 };
 
+const StrCode NHttpMsgHeadShared::charset_code_list[] =
+{
+    { CHARSET_DEFAULT,       "charset=utf-8" },
+    { CHARSET_UTF7,          "charset=utf-7" },
+    { CHARSET_UTF16LE,       "charset=utf-16le" },
+    { CHARSET_UTF16BE,       "charset=utf-16be" },
+    { CHARSET_UTF32LE,       "charset=utf-32le" },
+    { CHARSET_UTF32BE,       "charset=utf-32be" },
+    { 0,                     nullptr }
+};
+
+const StrCode NHttpMsgHeadShared::charset_code_opt_list[] =
+{
+    { CHARSET_UNKNOWN,       "charset=utf-" },
+    { CHARSET_IRRELEVANT,    "charset=" },
+    { 0,                     nullptr }
+};
+
 const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_BASIC
     { false, nullptr, nullptr, nullptr };
 
@@ -182,6 +202,9 @@ const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_NUMBER
 const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_TOKEN_LIST
     { true, norm_remove_lws, norm_to_lower, nullptr };
 
+const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_CHARSET
+    { true, norm_remove_quotes_lws, norm_to_lower, nullptr };
+
 const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_CAT
     { true, norm_remove_lws, nullptr, nullptr };
 
@@ -245,7 +268,7 @@ const HeaderNormalizer* const NHttpMsgHeadShared::header_norms[HEAD__MAX_VALUE]
     [HEAD_CONTENT_LOCATION] = &NORMALIZER_BASIC,
     [HEAD_CONTENT_MD5] = &NORMALIZER_BASIC,
     [HEAD_CONTENT_RANGE] = &NORMALIZER_BASIC,
-    [HEAD_CONTENT_TYPE] = &NORMALIZER_BASIC,
+    [HEAD_CONTENT_TYPE] = &NORMALIZER_CHARSET,
     [HEAD_EXPIRES] = &NORMALIZER_BASIC,
     [HEAD_LAST_MODIFIED] = &NORMALIZER_BASIC,
     [HEAD_X_FORWARDED_FOR] = &NORMALIZER_CAT,
index c854b6be64bc67994f40b314af67fc750a0571be..7064e52e8fc0943f9edfc34a6c34bac84b7ba0b7 100644 (file)
@@ -44,6 +44,7 @@ void Value::get_bits(std::bitset<256ul>&) const {}
 int SnortEventqAdd(unsigned int, unsigned int, RuleType) { return 0; }
 
 int32_t str_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
+int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
 long NHttpTestManager::print_amount {};
 bool NHttpTestManager::print_hex {};
 
index a05dfb6d199d88aedb6e463fcf3a78765bb4fa4b..f544fc8201694318bf28890b34422e2374406251 100644 (file)
@@ -28,6 +28,7 @@
 
 // Stubs whose sole purpose is to make the test code link
 int32_t str_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
+int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
 const bool NHttpEnums::is_sp_tab[256] {};
 long NHttpTestManager::print_amount {};
 bool NHttpTestManager::print_hex {};
index ff06d938b2db83e2efccf6a5a20c0ad57173e258..0689924395e79b1c053aed6d468def3d230fda62 100644 (file)
@@ -22,6 +22,7 @@
 #include "util_utf.h"
 
 #include <stdlib.h>
+#include <string.h>
 
 #define DSTATE_FIRST 0
 #define DSTATE_SECOND 1
 
 void keep_utf_lib() { }
 
-/* init a new decode_utf_state_t */
-int init_decode_utf_state(decode_utf_state_t* p)
+UtfDecodeSession::UtfDecodeSession()
 {
-    if (p == NULL)
-        return DECODE_UTF_FAILURE;
-
-    p->state = DSTATE_FIRST;
-    p->charset = CHARSET_DEFAULT;
-    return DECODE_UTF_SUCCESS;
+    init_decode_utf_state();
 }
 
-/* terminate a decode_utf_state_t.
-   returns DECODE_UTF_FAILURE if we're not at the base state. */
-int term_decode_utf_state(decode_utf_state_t* dead)
+/* init a new decode_utf_state_t */
+void UtfDecodeSession::init_decode_utf_state()
 {
-    if (dead == NULL)
-        return DECODE_UTF_FAILURE;
-
-    if (dead->state != DSTATE_FIRST)
-        return DECODE_UTF_FAILURE;
-
-    return DECODE_UTF_SUCCESS;
+    dstate.state = DSTATE_FIRST;
+    dstate.charset = CHARSET_DEFAULT;
 }
 
 /* setters & getters */
-int set_decode_utf_state_charset(decode_utf_state_t* dstate, int charset)
+void UtfDecodeSession::set_decode_utf_state_charset(CharsetCode charset)
 {
-    if (dstate == NULL)
-        return DECODE_UTF_FAILURE;
-
-    dstate->state = DSTATE_FIRST;
-    dstate->charset = charset;
-    return DECODE_UTF_SUCCESS;
+    dstate.state = DSTATE_FIRST;
+    dstate.charset = charset;
 }
 
-int get_decode_utf_state_charset(decode_utf_state_t* dstate)
+CharsetCode UtfDecodeSession::get_decode_utf_state_charset()
 {
-    if (dstate == NULL)
-        return DECODE_UTF_FAILURE;
+    return dstate.charset;
+}
 
-    return dstate->charset;
+bool UtfDecodeSession::is_utf_encoding_present()
+{
+    if ( get_decode_utf_state_charset() > CHARSET_IRRELEVANT )
+        return true;
+    else
+        return false;
 }
 
 /* Decode UTF-16le from src to dst.
@@ -80,39 +70,34 @@ int get_decode_utf_state_charset(decode_utf_state_t* dstate)
  * dst          => buffer to write translated text
  * dst_len      => length allocated for dst
  * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
  *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
  */
 
-static int DecodeUTF16LE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF16LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
 {
-    char* src_index = src;
+    const char* src_index = src;
     char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
-
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
+    bool result = true;
 
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
     {
         /* Copy first byte, skip second, failing if second byte != 0 */
-        switch (dstate->state)
+        switch (dstate.state)
         {
         case DSTATE_FIRST:
             *dst_index++ = *src_index++;
-            dstate->state = DSTATE_SECOND;
+            dstate.state = DSTATE_SECOND;
             break;
         case DSTATE_SECOND:
             if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            dstate->state = DSTATE_FIRST;
+                result = false;
+            dstate.state = DSTATE_FIRST;
             break;
         default:
-            return DECODE_UTF_FAILURE;
+            return false;
         }
     }
 
@@ -128,39 +113,34 @@ static int DecodeUTF16LE(char* src, unsigned int src_len, char* dst, unsigned in
  * dst          => buffer to write translated text
  * dst_len      => length allocated for dst
  * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
  *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
  */
 
-static int DecodeUTF16BE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF16BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
 {
-    char* src_index = src;
+    const char* src_index = src;
     char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
-
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
+    bool result = true;
 
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
     {
         /* Skip first byte, copy second. */
-        switch (dstate->state)
+        switch (dstate.state)
         {
         case DSTATE_FIRST:
             if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            dstate->state = DSTATE_SECOND;
+                result = false;
+            dstate.state = DSTATE_SECOND;
             break;
         case DSTATE_SECOND:
             *dst_index++ = *src_index++;
-            dstate->state = DSTATE_FIRST;
+            dstate.state = DSTATE_FIRST;
             break;
         default:
-            return DECODE_UTF_FAILURE;
+            return false;
         }
     }
 
@@ -176,44 +156,39 @@ static int DecodeUTF16BE(char* src, unsigned int src_len, char* dst, unsigned in
  * dst          => buffer to write translated text
  * dst_len      => length allocated for dst
  * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
  *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
  */
 
-static int DecodeUTF32LE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF32LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
 {
-    char* src_index = src;
+    const char* src_index = src;
     char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
+    bool result = true;
 
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
-
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
     {
         /* Copy the first byte, then skip three. */
-        switch (dstate->state)
+        switch (dstate.state)
         {
         case DSTATE_FIRST:
             *dst_index++ = *src_index++;
-            dstate->state++;
+            dstate.state++;
             break;
         case DSTATE_SECOND:
         case DSTATE_THIRD:
         case DSTATE_FOURTH:
             if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            if (dstate->state == DSTATE_FOURTH)
-                dstate->state = DSTATE_FIRST;
+                result = false;
+            if (dstate.state == DSTATE_FOURTH)
+                dstate.state = DSTATE_FIRST;
             else
-                dstate->state++;
+                dstate.state++;
             break;
         default:
-            return DECODE_UTF_FAILURE;
+            return false;
         }
     }
 
@@ -229,41 +204,36 @@ static int DecodeUTF32LE(char* src, unsigned int src_len, char* dst, unsigned in
  * dst          => buffer to write translated text
  * dst_len      => length allocated for dst
  * bytes_copied => store the # of bytes copied to dst
- * dstate       => saved state from last call
  *
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
  */
 
-static int DecodeUTF32BE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF32BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
 {
-    char* src_index = src;
+    const char* src_index = src;
     char* dst_index = dst;
-    int result = DECODE_UTF_SUCCESS;
-
-    if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
-        dst_len == 0)
-        return DECODE_UTF_FAILURE;
+    bool result = true;
 
-    while ((src_index < (char*)(src + src_len)) &&
-        (dst_index < (char*)(dst + dst_len)))
+    while ((src_index < (src + src_len)) &&
+        (dst_index < (dst + dst_len)))
     {
         /* Skip 3 bytes, copy the fourth. */
-        switch (dstate->state)
+        switch (dstate.state)
         {
         case DSTATE_FIRST:
         case DSTATE_SECOND:
         case DSTATE_THIRD:
             if (*src_index++ > 0)
-                result = DECODE_UTF_FAILURE;
-            dstate->state++;
+                result = false;
+            dstate.state++;
             break;
         case DSTATE_FOURTH:
             *dst_index++ = *src_index++;
-            dstate->state = DSTATE_FIRST;
+            dstate.state = DSTATE_FIRST;
             break;
         default:
-            return DECODE_UTF_FAILURE;
+            return false;
         }
     }
 
@@ -272,28 +242,78 @@ static int DecodeUTF32BE(char* src, unsigned int src_len, char* dst, unsigned in
     return result;
 }
 
+void UtfDecodeSession::determine_charset(const char** src, unsigned int *src_len)
+{
+    CharsetCode charset;
+    if (dstate.charset == CHARSET_UNKNOWN)
+    {
+        /* Got a text content type but no charset.
+         * Look for potential BOM (Byte Order Mark) */
+        if (*src_len >= 4)
+        {
+            uint8_t size = 0;
+
+            if (!memcmp(*src, "\x00\x00\xFE\xFF", 4))
+            {
+                charset = CHARSET_UTF32BE;
+                size = 4;
+            }
+            else if (!memcmp(*src, "\xFF\xFE\x00\x00", 4))
+            {
+                charset = CHARSET_UTF32LE;
+                size = 4;
+            }
+            else if (!memcmp(*src, "\xFE\xFF", 2))
+            {
+                charset = CHARSET_UTF16BE;
+                size = 2;
+            }
+            else if (!memcmp(*src, "\xFF\xFE", 2))
+            {
+                charset = CHARSET_UTF16LE;
+                size = 2;
+            }
+            else
+                charset = CHARSET_DEFAULT; // ensure we don't try again
+            *src +=size;
+            *src_len -=size;
+        }
+        else
+            charset = CHARSET_DEFAULT; // ensure we don't try again
+        set_decode_utf_state_charset(charset);
+
+    }
+}
+
 /* Wrapper function for DecodeUTF{16,32}{LE,BE} */
-int DecodeUTF(
-    char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::decode_utf(
+    const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+    int* bytes_copied)
 {
-    if ( !src || !dst || !bytes_copied || !dstate || !src_len || !dst_len )
-        return DECODE_UTF_FAILURE;
+    if ( !src || !dst || !bytes_copied || !src_len || !dst_len )
+        return false;
+
+    *bytes_copied = 0;
 
-    switch (dstate->charset)
+    determine_charset(&src, &src_len);
+
+    if( !src_len)
+        return false;
+
+    switch (dstate.charset)
     {
     case CHARSET_UTF16LE:
-        return DecodeUTF16LE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF16LE(src, src_len, dst, dst_len, bytes_copied);
     case CHARSET_UTF16BE:
-        return DecodeUTF16BE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF16BE(src, src_len, dst, dst_len, bytes_copied);
     case CHARSET_UTF32LE:
-        return DecodeUTF32LE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF32LE(src, src_len, dst, dst_len, bytes_copied);
     case CHARSET_UTF32BE:
-        return DecodeUTF32BE(src, src_len, dst, dst_len, bytes_copied, dstate);
+        return DecodeUTF32BE(src, src_len, dst, dst_len, bytes_copied);
+    default:
+        break;
     }
 
-    /* In case the function is called with a bad charset. */
-    *bytes_copied = 0;
-    return DECODE_UTF_FAILURE;
+    return true;
 }
 
index 452e495e43334f2d10493cb20a7c8363db8e1021..dcb491160eccb5e3fcf6665510e9743270a99212 100644 (file)
 
 #include "main/snort_types.h"
 
-// return codes
-#define DECODE_UTF_SUCCESS  0  // FIXIT-L replace with bool
-#define DECODE_UTF_FAILURE -1
-
-// Character set types 
-#define CHARSET_DEFAULT 0  // FIXIT-L these should be an enum
-#define CHARSET_UTF7    1
-#define CHARSET_UTF16LE 2
-#define CHARSET_UTF16BE 3
-#define CHARSET_UTF32LE 4
-#define CHARSET_UTF32BE 5
-#define CHARSET_UNKNOWN 255
+// Character set types. Used by HTTP inspectors. Update inspectors while changing this value.
+enum CharsetCode
+{
+    CHARSET_DEFAULT=0,
+    CHARSET_OTHER,
+    CHARSET_UTF7,
+    CHARSET_IRRELEVANT,
+    CHARSET_UTF16LE,
+    CHARSET_UTF16BE,
+    CHARSET_UTF32LE,
+    CHARSET_UTF32BE,
+    CHARSET_UNKNOWN
+};
 
 // Since payloads don't have to end on 2/4-byte boundaries, callers to
 // DecodeUTF are responsible for keeping a decode_utf_state_t. This carries
 struct decode_utf_state_t
 {
     int state;
-    int charset;
+    CharsetCode charset;
 };
 
 void keep_utf_lib();  // FIXIT-L eliminate; required to keep symbols for dyn plugins
 
-// Init & Terminate functions for decode_utf_state_t
-SO_PUBLIC int init_decode_utf_state(decode_utf_state_t*);
-SO_PUBLIC int term_decode_utf_state(decode_utf_state_t*);
-
-// setters & getters
-SO_PUBLIC int set_decode_utf_state_charset(decode_utf_state_t*, int charset);
-SO_PUBLIC int get_decode_utf_state_charset(decode_utf_state_t*);
-
-// UTF-Decoding function prototypes
-SO_PUBLIC int DecodeUTF(
-    char* src, unsigned int src_len, char* dst, unsigned int dst_len,
-    int* bytes_copied, decode_utf_state_t*);
-
+class SO_PUBLIC UtfDecodeSession
+{
+public:
+    UtfDecodeSession();
+    virtual ~UtfDecodeSession() { };
+    void init_decode_utf_state();
+    void set_decode_utf_state_charset(CharsetCode charset);
+    CharsetCode get_decode_utf_state_charset();
+    bool is_utf_encoding_present();
+    bool decode_utf(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+private:
+    decode_utf_state_t dstate;
+    bool DecodeUTF16LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    bool DecodeUTF16BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    bool DecodeUTF32LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    bool DecodeUTF32BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+    void determine_charset(const char** src, unsigned int *src_len);
+};
 #endif
-
index 5ffbfb706f0de730575d47a0e254ad7552b027db..083100b632c76f72bfa0a4e64e173c40a64dbca8 100644 (file)
@@ -147,7 +147,7 @@ bool NHttpInspectServer::convert(std::istringstream& data_stream)
             table_api.add_deleted_comment("normalize_headers");
 
         else if (!keyword.compare("normalize_utf"))
-            table_api.add_deleted_comment("normalize_utf");
+            tmpval = table_api.add_option("normalize_utf", true);
 
         else if (!keyword.compare("log_uri"))
             table_api.add_deleted_comment("log_uri");