HttpFlowData::HttpFlowData() : FlowData(flow_id)
{
memset(&session, 0, sizeof(session));
- init_decode_utf_state(&session.utf_state);
+ session.utf_state = new UtfDecodeSession();
}
HttpFlowData::~HttpFlowData()
if (hsd->mime_ssn)
delete hsd->mime_ssn;
+ if (hsd->utf_state)
+ delete hsd->utf_state;
+
if ( hsd->fd_state != 0 )
{
File_Decomp_StopFree(hsd->fd_state);
DECOMPRESS_STATE* decomp_state;
HTTP_LOG_STATE* log_state;
sfip_t* true_ip;
- decode_utf_state_t utf_state;
+ UtfDecodeSession* utf_state;
uint8_t log_flags;
uint8_t cli_small_chunk_count;
uint8_t srv_small_chunk_count;
sf_unfold_header(p, end-p, unfold_buf, sizeof(unfold_buf), &unfold_size, 0, 0);
if (!unfold_size)
{
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
return p;
}
p += unfold_size;
ptr = SnortStrcasestr(ptr, (int)(ptr_end - ptr), "text");
if (!ptr)
{
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
return p;
}
ptr = SnortStrcasestr(ptr, (int)(ptr_end - ptr), "utf-");
if (!ptr)
{
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
return p;
}
ptr += 4; /* length of "utf-" */
if ((cmplen > 0) && (*ptr == '8'))
{
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_DEFAULT);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_DEFAULT);
}
else if ((cmplen > 0) && (*ptr == '7'))
{
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF7);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF7);
hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF7);
}
else if (cmplen >= 4)
{
if ( !strncasecmp(ptr, "16le", 4) )
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF16LE);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF16LE);
else if ( !strncasecmp(ptr, "16be", 4) )
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF16BE);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF16BE);
else if ( !strncasecmp(ptr, "32le", 4) )
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF32LE);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF32LE);
else if ( !strncasecmp(ptr, "32be", 4) )
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UTF32BE);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UTF32BE);
else
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
}
else
- set_decode_utf_state_charset(&(hsd->utf_state), CHARSET_UNKNOWN);
+ hsd->utf_state->set_decode_utf_state_charset(CHARSET_UNKNOWN);
return p;
}
}
}
- if ((get_decode_utf_state_charset(&(sd->utf_state)) != CHARSET_DEFAULT)
+ if ((sd->utf_state->get_decode_utf_state_charset() != CHARSET_DEFAULT)
|| (ServerConf->normalize_javascript && Server->response.body_size))
{
if ( Server->response.body_size < sizeof(HttpDecodeBuf.data) )
if (session->server_conf->normalize_utf && (ServerResp->body_size > 0))
{
- int bytes_copied, result, charset;
+ int bytes_copied;
+ bool decoded;
if (hsd)
{
- charset = get_decode_utf_state_charset(&(hsd->utf_state));
+ decoded = hsd->utf_state->decode_utf((const char*)ServerResp->body, ServerResp->body_size,
+ (char*)HttpDecodeBuf.data, sizeof(HttpDecodeBuf.data), &bytes_copied);
- if (charset == CHARSET_UNKNOWN)
- {
- /* Got a text content type but no charset.
- * Look for potential BOM (Byte Order Mark) */
- if (ServerResp->body_size >= 4)
+ if (!decoded)
{
- uint8_t size = 0;
-
- if (!memcmp(ServerResp->body, "\x00\x00\xFE\xFF", 4))
- {
- charset = CHARSET_UTF32BE;
- size = 4;
- }
- else if (!memcmp(ServerResp->body, "\xFF\xFE\x00\x00", 4))
- {
- charset = CHARSET_UTF32LE;
- size = 4;
- }
- else if (!memcmp(ServerResp->body, "\xFE\xFF", 2))
- {
- charset = CHARSET_UTF16BE;
- size = 2;
- }
- else if (!memcmp(ServerResp->body, "\xFF\xFE", 2))
- {
- charset = CHARSET_UTF16LE;
- size = 2;
- }
- else
- charset = CHARSET_DEFAULT; // ensure we don't try again
-
- ServerResp->body += size;
- ServerResp->body_size -= size;
+ hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF_NORM_FAIL);
}
- else
- charset = CHARSET_DEFAULT; // ensure we don't try again
-
- set_decode_utf_state_charset(&(hsd->utf_state), charset);
- }
-
- /* Normalize server responses with utf-16le, utf-16be, utf-32le,
- or utf-32be charsets.*/
- switch (charset)
- {
- case CHARSET_UTF16LE:
- case CHARSET_UTF16BE:
- case CHARSET_UTF32LE:
- case CHARSET_UTF32BE:
- result = DecodeUTF((char*)ServerResp->body, ServerResp->body_size,
- (char*)HttpDecodeBuf.data, sizeof(HttpDecodeBuf.data),
- &bytes_copied,
- &(hsd->utf_state));
-
- if (result == DECODE_UTF_FAILURE)
+ else if ( bytes_copied )
{
- hi_set_event(GID_HTTP_SERVER, HI_SERVER_UTF_NORM_FAIL);
+ SetHttpDecode((uint16_t)bytes_copied);
+ ServerResp->body = HttpDecodeBuf.data;
+ ServerResp->body_size = HttpDecodeBuf.len;
}
- SetHttpDecode((uint16_t)bytes_copied);
- ServerResp->body = HttpDecodeBuf.data;
- ServerResp->body_size = HttpDecodeBuf.len;
- break;
- default:
- break;
- }
}
}
INF_CHUNKED_BEFORE_END,
INF_OVERSIZE_DIR,
INF_POST_WO_BODY,
+ INF_UTF_NORM_FAIL,
+ INF_UTF7,
INF__MAX_VALUE
};
delete mime_state;
}
+ if (utf_state != nullptr )
+ {
+ delete utf_state;
+ }
+
delete_pipeline();
}
if (transaction[SRC_SERVER]->final_response())
expected_trans_num[SRC_SERVER]++;
status_code_num = STAT_NOT_PRESENT;
+ if (utf_state != nullptr)
+ {
+ delete utf_state;
+ utf_state = nullptr;
+ }
}
}
#include "stream/stream_api.h"
#include "mime/file_mime_process.h"
+#include "utils/util_utf.h"
#include "nhttp_cutter.h"
#include "nhttp_infractions.h"
int64_t detect_depth_remaining[2] = { NHttpEnums::STAT_NOT_PRESENT,
NHttpEnums::STAT_NOT_PRESENT };
MimeSession* mime_state = nullptr; // SRC_CLIENT only
+ UtfDecodeSession* utf_state = nullptr; //SRC_SERVER only
uint64_t expected_trans_num[2] = { 1, 1 };
// number of user data octets seen so far (regular body or chunks)
{ "response_depth", Parameter::PT_INT, "-1:", "-1",
"maximum response message body bytes to examine (-1 no limit)" },
{ "unzip", Parameter::PT_BOOL, nullptr, "true", "decompress gzip and deflate message bodies" },
+ { "normalize_utf", Parameter::PT_BOOL, nullptr, "true", "normalize charset utf encodings" },
{ "bad_characters", Parameter::PT_BIT_LIST, "255", nullptr,
"alert when any of specified bytes are present in URI after percent decoding" },
{ "ignore_unreserved", Parameter::PT_STRING, "(optional)", nullptr,
{
params->unzip = val.get_bool();
}
+ else if (val.is("normalize_utf"))
+ {
+ params->normalize_utf = val.get_bool();
+ }
else if (val.is("bad_characters"))
{
val.get_bits(params->uri_param.bad_characters);
long request_depth;
long response_depth;
bool unzip;
+ bool normalize_utf = true;
struct UriParam
{
public:
{
if (classic_client_body_alloc)
classic_client_body.delete_buffer();
+
+ if (decoded_body_alloc)
+ decoded_body.delete_buffer();
}
void NHttpMsgBody::analyze()
{
- detect_data.length = (msg_text.length <= session_data->detect_depth_remaining[source_id]) ?
- msg_text.length : session_data->detect_depth_remaining[source_id];
- detect_data.start = msg_text.start;
+ do_utf_decoding(msg_text, decoded_body, decoded_body_alloc);
+ if ( decoded_body_alloc )
+ {
+ detect_data.length = (decoded_body.length <= session_data->detect_depth_remaining[source_id]) ?
+ decoded_body.length : session_data->detect_depth_remaining[source_id];
+ detect_data.start = decoded_body.start;
+ }
+ else
+ {
+ detect_data.length = (msg_text.length <= session_data->detect_depth_remaining[source_id]) ?
+ msg_text.length : session_data->detect_depth_remaining[source_id];
+ detect_data.start = msg_text.start;
+ }
+
session_data->detect_depth_remaining[source_id] -= detect_data.length;
// Always set file data. File processing will later set a new value in some cases.
file_data.length = detect_data.length;
+
if (file_data.length > 0)
{
- file_data.start = msg_text.start;
+ file_data.start = detect_data.start;
set_file_data(const_cast<uint8_t*>(file_data.start), (unsigned)file_data.length);
}
body_octets += msg_text.length;
}
+void NHttpMsgBody::do_utf_decoding(const Field& input, Field& output, bool& decoded_alloc)
+{
+
+ if (!params->normalize_utf || source_id == SRC_CLIENT )
+ return;
+
+ if (session_data->utf_state && session_data->utf_state->is_utf_encoding_present() )
+ {
+ int bytes_copied;
+ bool decoded;
+ uint8_t* buffer = new uint8_t[input.length];
+ decoded = session_data->utf_state->decode_utf((const char*)input.start, input.length,
+ (char*)buffer, input.length, &bytes_copied);
+ if (!decoded)
+ {
+ delete[] buffer;
+ infractions += INF_UTF_NORM_FAIL;
+ events.create_event(EVENT_UTF_NORM_FAIL);
+ }
+ else if ( bytes_copied )
+ {
+ output.set(bytes_copied, buffer);
+ decoded_alloc = true;
+ }
+ else
+ delete[] buffer;
+ }
+
+}
+
void NHttpMsgBody::do_file_processing()
{
// Using the trick that cutter is deleted when regular or chunked body is complete
private:
void do_file_processing();
+ void do_utf_decoding(const Field& input, Field& output, bool& decoded_alloc);
Field detect_data;
Field file_data;
const bool detection_section;
Field classic_client_body; // URI normalization applied
bool classic_client_body_alloc = false;
+ Field decoded_body;
+ bool decoded_body_alloc = false;
};
#endif
delete session_data->mime_state;
session_data->mime_state = nullptr;
}
+
+ if ((source_id == SRC_SERVER) && (session_data->utf_state != nullptr))
+ {
+ delete session_data->utf_state;
+ session_data->utf_state = nullptr;
+ }
}
else
{
static const StrCode header_list[];
static const StrCode trans_code_list[];
static const StrCode content_code_list[];
+ static const StrCode charset_code_list[];
+ static const StrCode charset_code_opt_list[];
protected:
NHttpMsgHeadShared(const uint8_t* buffer, const uint16_t buf_size,
static const HeaderNormalizer NORMALIZER_BASIC;
static const HeaderNormalizer NORMALIZER_NUMBER;
static const HeaderNormalizer NORMALIZER_TOKEN_LIST;
+ static const HeaderNormalizer NORMALIZER_CHARSET;
static const HeaderNormalizer NORMALIZER_CAT;
static const HeaderNormalizer NORMALIZER_COOKIE;
}
setup_file_processing();
setup_decompression();
+ setup_utf_decoding();
update_depth();
session_data->infractions[source_id].reset();
session_data->events[source_id].reset();
}
}
+void NHttpMsgHeader::setup_utf_decoding()
+{
+ Field last_token;
+ CharsetCode charset_code;
+
+ if (!params->normalize_utf || source_id == SRC_CLIENT )
+ return;
+
+ const Field& norm_content_type = get_header_value_norm(HEAD_CONTENT_TYPE);
+ if (norm_content_type.length <= 0)
+ return;
+
+ get_last_token(norm_content_type, last_token, ';');
+
+ // No semicolon in the Content-Type header
+ if ( last_token.length == norm_content_type.length )
+ {
+ if( SnortStrnStr((const char*)norm_content_type.start, norm_content_type.length, "text") )
+ {
+ charset_code = CHARSET_UNKNOWN;
+ }
+ else
+ return;
+ }
+ else
+ {
+
+ charset_code = (CharsetCode)str_to_code(last_token.start, last_token.length, NHttpMsgHeadShared::charset_code_list);
+
+ if( charset_code == CHARSET_OTHER )
+ {
+ charset_code = (CharsetCode)substr_to_code(last_token.start, last_token.length, NHttpMsgHeadShared::charset_code_opt_list);
+
+ if( charset_code != CHARSET_UNKNOWN )
+ return;
+ }
+ else if ( charset_code == CHARSET_UTF7 )
+ {
+ infractions += INF_UTF7;
+ events.create_event(EVENT_UTF7);
+ }
+ }
+
+ session_data->utf_state = new UtfDecodeSession();
+ session_data->utf_state->set_decode_utf_state_charset(charset_code);
+}
+
+
#ifdef REG_TEST
void NHttpMsgHeader::print_section(FILE* output)
{
void prepare_body();
void setup_file_processing();
void setup_decompression();
+ void setup_utf_decoding();
bool detection_section = true;
}
return length;
}
+//FIXIT - norm_remove_lws and norm_remove_quotes_lws could be combined into one function
+int32_t norm_remove_quotes_lws(const uint8_t* in_buf, int32_t in_length, uint8_t* out_buf,
+ NHttpInfractions&, NHttpEventGen&)
+{
+ int32_t length = 0;
+ for (int32_t k=0; k < in_length; k++)
+ {
+ if (in_buf[k] == '\'' || in_buf[k] == '\"' || is_sp_tab[in_buf[k]])
+ continue;
+ out_buf[length++] = in_buf[k];
+ }
+ return length;
+}
// Other header-value processing functions (not using the standard normalization signature)
// Convert a decimal field such as Content-Length to an integer.
return total;
}
+void get_last_token(const Field& input, Field& last_token, char ichar)
+{
+ assert(input.length > 0);
+ for (last_token.start = input.start + input.length - 1; (last_token.start >= input.start) &&
+ (*(last_token.start)!= ichar); (last_token.start)--);
+ (last_token.start)++;
+ last_token.length = input.length - (last_token.start - input.start);
+ return;
+}
+
// Find the last token in a comma-separated field and convert it to an enum
int32_t norm_last_token_code(const Field& input, const StrCode table[])
{
- assert(input.length > 0);
- const uint8_t* last_start;
- for (last_start = input.start + input.length - 1; (last_start >= input.start) &&
- (*last_start != ','); last_start--);
- last_start++;
- const int32_t last_length = input.length - (last_start - input.start);
- return str_to_code(last_start, last_length, table);
+ Field last_token;
+ get_last_token(input, last_token, ',');
+
+ return str_to_code(last_token.start, last_token.length, table);
}
// Given a comma-separated list of words, does "chunked" appear before the last word
typedef int32_t (NormFunc)(const uint8_t*, int32_t, uint8_t*, NHttpInfractions&, NHttpEventGen&);
NormFunc norm_to_lower;
NormFunc norm_remove_lws;
+NormFunc norm_remove_quotes_lws;
// Other normalization-related utilities
+void get_last_token(const Field& input, Field& last_token, char ichar);
int64_t norm_decimal_integer(const Field& input);
int32_t norm_last_token_code(const Field& input, const StrCode table[]);
bool chunked_before_end(const Field& input);
return NHttpEnums::STAT_OTHER;
}
+SO_PUBLIC int32_t substr_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[])
+{
+ for (int32_t k=0; table[k].name != nullptr; k++)
+ {
+ int32_t len = (text_len <= (int)strlen(table[k].name) ) ? text_len : (int)strlen(table[k].name);
+
+ if (memcmp(text, table[k].name, len) == 0)
+ {
+ return table[k].code;
+ }
+ }
+ return NHttpEnums::STAT_OTHER;
+}
+
};
int32_t str_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[]);
+int32_t substr_to_code(const uint8_t* text, const int32_t text_len, const StrCode table[]);
#endif
#include "framework/module.h"
#include "framework/counts.h"
+#include "utils/util_utf.h"
+
#include "nhttp_enum.h"
#include "nhttp_str_to_code.h"
#include "nhttp_normalizers.h"
{ 0, nullptr }
};
+const StrCode NHttpMsgHeadShared::charset_code_list[] =
+{
+ { CHARSET_DEFAULT, "charset=utf-8" },
+ { CHARSET_UTF7, "charset=utf-7" },
+ { CHARSET_UTF16LE, "charset=utf-16le" },
+ { CHARSET_UTF16BE, "charset=utf-16be" },
+ { CHARSET_UTF32LE, "charset=utf-32le" },
+ { CHARSET_UTF32BE, "charset=utf-32be" },
+ { 0, nullptr }
+};
+
+const StrCode NHttpMsgHeadShared::charset_code_opt_list[] =
+{
+ { CHARSET_UNKNOWN, "charset=utf-" },
+ { CHARSET_IRRELEVANT, "charset=" },
+ { 0, nullptr }
+};
+
const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_BASIC
{ false, nullptr, nullptr, nullptr };
const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_TOKEN_LIST
{ true, norm_remove_lws, norm_to_lower, nullptr };
+const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_CHARSET
+ { true, norm_remove_quotes_lws, norm_to_lower, nullptr };
+
const HeaderNormalizer NHttpMsgHeadShared::NORMALIZER_CAT
{ true, norm_remove_lws, nullptr, nullptr };
[HEAD_CONTENT_LOCATION] = &NORMALIZER_BASIC,
[HEAD_CONTENT_MD5] = &NORMALIZER_BASIC,
[HEAD_CONTENT_RANGE] = &NORMALIZER_BASIC,
- [HEAD_CONTENT_TYPE] = &NORMALIZER_BASIC,
+ [HEAD_CONTENT_TYPE] = &NORMALIZER_CHARSET,
[HEAD_EXPIRES] = &NORMALIZER_BASIC,
[HEAD_LAST_MODIFIED] = &NORMALIZER_BASIC,
[HEAD_X_FORWARDED_FOR] = &NORMALIZER_CAT,
int SnortEventqAdd(unsigned int, unsigned int, RuleType) { return 0; }
int32_t str_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
+int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
long NHttpTestManager::print_amount {};
bool NHttpTestManager::print_hex {};
// Stubs whose sole purpose is to make the test code link
int32_t str_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
+int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return 0; }
const bool NHttpEnums::is_sp_tab[256] {};
long NHttpTestManager::print_amount {};
bool NHttpTestManager::print_hex {};
#include "util_utf.h"
#include <stdlib.h>
+#include <string.h>
#define DSTATE_FIRST 0
#define DSTATE_SECOND 1
void keep_utf_lib() { }
-/* init a new decode_utf_state_t */
-int init_decode_utf_state(decode_utf_state_t* p)
+UtfDecodeSession::UtfDecodeSession()
{
- if (p == NULL)
- return DECODE_UTF_FAILURE;
-
- p->state = DSTATE_FIRST;
- p->charset = CHARSET_DEFAULT;
- return DECODE_UTF_SUCCESS;
+ init_decode_utf_state();
}
-/* terminate a decode_utf_state_t.
- returns DECODE_UTF_FAILURE if we're not at the base state. */
-int term_decode_utf_state(decode_utf_state_t* dead)
+/* init a new decode_utf_state_t */
+void UtfDecodeSession::init_decode_utf_state()
{
- if (dead == NULL)
- return DECODE_UTF_FAILURE;
-
- if (dead->state != DSTATE_FIRST)
- return DECODE_UTF_FAILURE;
-
- return DECODE_UTF_SUCCESS;
+ dstate.state = DSTATE_FIRST;
+ dstate.charset = CHARSET_DEFAULT;
}
/* setters & getters */
-int set_decode_utf_state_charset(decode_utf_state_t* dstate, int charset)
+void UtfDecodeSession::set_decode_utf_state_charset(CharsetCode charset)
{
- if (dstate == NULL)
- return DECODE_UTF_FAILURE;
-
- dstate->state = DSTATE_FIRST;
- dstate->charset = charset;
- return DECODE_UTF_SUCCESS;
+ dstate.state = DSTATE_FIRST;
+ dstate.charset = charset;
}
-int get_decode_utf_state_charset(decode_utf_state_t* dstate)
+CharsetCode UtfDecodeSession::get_decode_utf_state_charset()
{
- if (dstate == NULL)
- return DECODE_UTF_FAILURE;
+ return dstate.charset;
+}
- return dstate->charset;
+bool UtfDecodeSession::is_utf_encoding_present()
+{
+ if ( get_decode_utf_state_charset() > CHARSET_IRRELEVANT )
+ return true;
+ else
+ return false;
}
/* Decode UTF-16le from src to dst.
* dst => buffer to write translated text
* dst_len => length allocated for dst
* bytes_copied => store the # of bytes copied to dst
- * dstate => saved state from last call
*
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
*/
-static int DecodeUTF16LE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
- int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF16LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+ int* bytes_copied)
{
- char* src_index = src;
+ const char* src_index = src;
char* dst_index = dst;
- int result = DECODE_UTF_SUCCESS;
-
- if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
- dst_len == 0)
- return DECODE_UTF_FAILURE;
+ bool result = true;
- while ((src_index < (char*)(src + src_len)) &&
- (dst_index < (char*)(dst + dst_len)))
+ while ((src_index < (src + src_len)) &&
+ (dst_index < (dst + dst_len)))
{
/* Copy first byte, skip second, failing if second byte != 0 */
- switch (dstate->state)
+ switch (dstate.state)
{
case DSTATE_FIRST:
*dst_index++ = *src_index++;
- dstate->state = DSTATE_SECOND;
+ dstate.state = DSTATE_SECOND;
break;
case DSTATE_SECOND:
if (*src_index++ > 0)
- result = DECODE_UTF_FAILURE;
- dstate->state = DSTATE_FIRST;
+ result = false;
+ dstate.state = DSTATE_FIRST;
break;
default:
- return DECODE_UTF_FAILURE;
+ return false;
}
}
* dst => buffer to write translated text
* dst_len => length allocated for dst
* bytes_copied => store the # of bytes copied to dst
- * dstate => saved state from last call
*
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
*/
-static int DecodeUTF16BE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
- int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF16BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+ int* bytes_copied)
{
- char* src_index = src;
+ const char* src_index = src;
char* dst_index = dst;
- int result = DECODE_UTF_SUCCESS;
-
- if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
- dst_len == 0)
- return DECODE_UTF_FAILURE;
+ bool result = true;
- while ((src_index < (char*)(src + src_len)) &&
- (dst_index < (char*)(dst + dst_len)))
+ while ((src_index < (src + src_len)) &&
+ (dst_index < (dst + dst_len)))
{
/* Skip first byte, copy second. */
- switch (dstate->state)
+ switch (dstate.state)
{
case DSTATE_FIRST:
if (*src_index++ > 0)
- result = DECODE_UTF_FAILURE;
- dstate->state = DSTATE_SECOND;
+ result = false;
+ dstate.state = DSTATE_SECOND;
break;
case DSTATE_SECOND:
*dst_index++ = *src_index++;
- dstate->state = DSTATE_FIRST;
+ dstate.state = DSTATE_FIRST;
break;
default:
- return DECODE_UTF_FAILURE;
+ return false;
}
}
* dst => buffer to write translated text
* dst_len => length allocated for dst
* bytes_copied => store the # of bytes copied to dst
- * dstate => saved state from last call
*
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
*/
-static int DecodeUTF32LE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
- int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF32LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+ int* bytes_copied)
{
- char* src_index = src;
+ const char* src_index = src;
char* dst_index = dst;
- int result = DECODE_UTF_SUCCESS;
+ bool result = true;
- if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
- dst_len == 0)
- return DECODE_UTF_FAILURE;
-
- while ((src_index < (char*)(src + src_len)) &&
- (dst_index < (char*)(dst + dst_len)))
+ while ((src_index < (src + src_len)) &&
+ (dst_index < (dst + dst_len)))
{
/* Copy the first byte, then skip three. */
- switch (dstate->state)
+ switch (dstate.state)
{
case DSTATE_FIRST:
*dst_index++ = *src_index++;
- dstate->state++;
+ dstate.state++;
break;
case DSTATE_SECOND:
case DSTATE_THIRD:
case DSTATE_FOURTH:
if (*src_index++ > 0)
- result = DECODE_UTF_FAILURE;
- if (dstate->state == DSTATE_FOURTH)
- dstate->state = DSTATE_FIRST;
+ result = false;
+ if (dstate.state == DSTATE_FOURTH)
+ dstate.state = DSTATE_FIRST;
else
- dstate->state++;
+ dstate.state++;
break;
default:
- return DECODE_UTF_FAILURE;
+ return false;
}
}
* dst => buffer to write translated text
* dst_len => length allocated for dst
* bytes_copied => store the # of bytes copied to dst
- * dstate => saved state from last call
*
- * returns: DECODE_UTF_SUCCESS or DECODE_UTF_FAILURE
+ * returns: true or false
*/
-static int DecodeUTF32BE(char* src, unsigned int src_len, char* dst, unsigned int dst_len,
- int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::DecodeUTF32BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+ int* bytes_copied)
{
- char* src_index = src;
+ const char* src_index = src;
char* dst_index = dst;
- int result = DECODE_UTF_SUCCESS;
-
- if (src == NULL || dst == NULL || bytes_copied == NULL || dstate == NULL || src_len == 0 ||
- dst_len == 0)
- return DECODE_UTF_FAILURE;
+ bool result = true;
- while ((src_index < (char*)(src + src_len)) &&
- (dst_index < (char*)(dst + dst_len)))
+ while ((src_index < (src + src_len)) &&
+ (dst_index < (dst + dst_len)))
{
/* Skip 3 bytes, copy the fourth. */
- switch (dstate->state)
+ switch (dstate.state)
{
case DSTATE_FIRST:
case DSTATE_SECOND:
case DSTATE_THIRD:
if (*src_index++ > 0)
- result = DECODE_UTF_FAILURE;
- dstate->state++;
+ result = false;
+ dstate.state++;
break;
case DSTATE_FOURTH:
*dst_index++ = *src_index++;
- dstate->state = DSTATE_FIRST;
+ dstate.state = DSTATE_FIRST;
break;
default:
- return DECODE_UTF_FAILURE;
+ return false;
}
}
return result;
}
+void UtfDecodeSession::determine_charset(const char** src, unsigned int *src_len)
+{
+ CharsetCode charset;
+ if (dstate.charset == CHARSET_UNKNOWN)
+ {
+ /* Got a text content type but no charset.
+ * Look for potential BOM (Byte Order Mark) */
+ if (*src_len >= 4)
+ {
+ uint8_t size = 0;
+
+ if (!memcmp(*src, "\x00\x00\xFE\xFF", 4))
+ {
+ charset = CHARSET_UTF32BE;
+ size = 4;
+ }
+ else if (!memcmp(*src, "\xFF\xFE\x00\x00", 4))
+ {
+ charset = CHARSET_UTF32LE;
+ size = 4;
+ }
+ else if (!memcmp(*src, "\xFE\xFF", 2))
+ {
+ charset = CHARSET_UTF16BE;
+ size = 2;
+ }
+ else if (!memcmp(*src, "\xFF\xFE", 2))
+ {
+ charset = CHARSET_UTF16LE;
+ size = 2;
+ }
+ else
+ charset = CHARSET_DEFAULT; // ensure we don't try again
+ *src +=size;
+ *src_len -=size;
+ }
+ else
+ charset = CHARSET_DEFAULT; // ensure we don't try again
+ set_decode_utf_state_charset(charset);
+
+ }
+}
+
/* Wrapper function for DecodeUTF{16,32}{LE,BE} */
-int DecodeUTF(
- char* src, unsigned int src_len, char* dst, unsigned int dst_len,
- int* bytes_copied, decode_utf_state_t* dstate)
+bool UtfDecodeSession::decode_utf(
+ const char* src, unsigned int src_len, char* dst, unsigned int dst_len,
+ int* bytes_copied)
{
- if ( !src || !dst || !bytes_copied || !dstate || !src_len || !dst_len )
- return DECODE_UTF_FAILURE;
+ if ( !src || !dst || !bytes_copied || !src_len || !dst_len )
+ return false;
+
+ *bytes_copied = 0;
- switch (dstate->charset)
+ determine_charset(&src, &src_len);
+
+ if( !src_len)
+ return false;
+
+ switch (dstate.charset)
{
case CHARSET_UTF16LE:
- return DecodeUTF16LE(src, src_len, dst, dst_len, bytes_copied, dstate);
+ return DecodeUTF16LE(src, src_len, dst, dst_len, bytes_copied);
case CHARSET_UTF16BE:
- return DecodeUTF16BE(src, src_len, dst, dst_len, bytes_copied, dstate);
+ return DecodeUTF16BE(src, src_len, dst, dst_len, bytes_copied);
case CHARSET_UTF32LE:
- return DecodeUTF32LE(src, src_len, dst, dst_len, bytes_copied, dstate);
+ return DecodeUTF32LE(src, src_len, dst, dst_len, bytes_copied);
case CHARSET_UTF32BE:
- return DecodeUTF32BE(src, src_len, dst, dst_len, bytes_copied, dstate);
+ return DecodeUTF32BE(src, src_len, dst, dst_len, bytes_copied);
+ default:
+ break;
}
- /* In case the function is called with a bad charset. */
- *bytes_copied = 0;
- return DECODE_UTF_FAILURE;
+ return true;
}
#include "main/snort_types.h"
-// return codes
-#define DECODE_UTF_SUCCESS 0 // FIXIT-L replace with bool
-#define DECODE_UTF_FAILURE -1
-
-// Character set types
-#define CHARSET_DEFAULT 0 // FIXIT-L these should be an enum
-#define CHARSET_UTF7 1
-#define CHARSET_UTF16LE 2
-#define CHARSET_UTF16BE 3
-#define CHARSET_UTF32LE 4
-#define CHARSET_UTF32BE 5
-#define CHARSET_UNKNOWN 255
+// Character set types. Used by HTTP inspectors. Update inspectors while changing this value.
+enum CharsetCode
+{
+ CHARSET_DEFAULT=0,
+ CHARSET_OTHER,
+ CHARSET_UTF7,
+ CHARSET_IRRELEVANT,
+ CHARSET_UTF16LE,
+ CHARSET_UTF16BE,
+ CHARSET_UTF32LE,
+ CHARSET_UTF32BE,
+ CHARSET_UNKNOWN
+};
// Since payloads don't have to end on 2/4-byte boundaries, callers to
// DecodeUTF are responsible for keeping a decode_utf_state_t. This carries
struct decode_utf_state_t
{
int state;
- int charset;
+ CharsetCode charset;
};
void keep_utf_lib(); // FIXIT-L eliminate; required to keep symbols for dyn plugins
-// Init & Terminate functions for decode_utf_state_t
-SO_PUBLIC int init_decode_utf_state(decode_utf_state_t*);
-SO_PUBLIC int term_decode_utf_state(decode_utf_state_t*);
-
-// setters & getters
-SO_PUBLIC int set_decode_utf_state_charset(decode_utf_state_t*, int charset);
-SO_PUBLIC int get_decode_utf_state_charset(decode_utf_state_t*);
-
-// UTF-Decoding function prototypes
-SO_PUBLIC int DecodeUTF(
- char* src, unsigned int src_len, char* dst, unsigned int dst_len,
- int* bytes_copied, decode_utf_state_t*);
-
+class SO_PUBLIC UtfDecodeSession
+{
+public:
+ UtfDecodeSession();
+ virtual ~UtfDecodeSession() { };
+ void init_decode_utf_state();
+ void set_decode_utf_state_charset(CharsetCode charset);
+ CharsetCode get_decode_utf_state_charset();
+ bool is_utf_encoding_present();
+ bool decode_utf(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+private:
+ decode_utf_state_t dstate;
+ bool DecodeUTF16LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+ bool DecodeUTF16BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+ bool DecodeUTF32LE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+ bool DecodeUTF32BE(const char* src, unsigned int src_len, char* dst, unsigned int dst_len, int* bytes_copied);
+ void determine_charset(const char** src, unsigned int *src_len);
+};
#endif
-
table_api.add_deleted_comment("normalize_headers");
else if (!keyword.compare("normalize_utf"))
- table_api.add_deleted_comment("normalize_utf");
+ tmpval = table_api.add_option("normalize_utf", true);
else if (!keyword.compare("log_uri"))
table_api.add_deleted_comment("log_uri");