set(multiValueArgs SOURCES LIBS)
cmake_parse_arguments(CppUTest "" "" "${multiValueArgs}" ${ARGN})
add_executable(${testname} EXCLUDE_FROM_ALL ${testname}.cc ${CppUTest_SOURCES})
+ target_compile_options(${testname} PRIVATE "-DUNIT_TEST_BUILD")
target_include_directories(${testname} PRIVATE ${CPPUTEST_INCLUDE_DIR})
target_link_libraries(${testname} ${CPPUTEST_LIBRARIES} ${CppUTest_LIBS})
add_test(${testname} ${testname})
INF_MULTIPLE_HOST_HDRS,
INF_HTTP2_SETTINGS,
INF_UPGRADE_HEADER_HTTP2,
- INF_JS_UNEXPECTED_TAG,
+ INF_JS_BAD_TOKEN,
+ INF_JS_OPENING_TAG,
+ INF_JS_CLOSING_TAG,
INF__MAX_VALUE
};
EVENT_PDF_UNSUP_COMP_TYPE = 115,
EVENT_PDF_CASC_COMP = 116,
EVENT_PDF_PARSE_FAILURE = 117,
- EVENT_JS_UNEXPECTED_TAG = 118,
+ EVENT_JS_BAD_TOKEN = 118,
+ EVENT_JS_OPENING_TAG = 119,
+ EVENT_JS_CLOSING_TAG = 120,
EVENT_LOSS_OF_SYNC = 201,
EVENT_CHUNK_ZEROS = 202,
#include "http_flow_data.h"
#include "decompress/file_decomp.h"
+#include "utils/js_normalizer.h"
#include "http_cutter.h"
#include "http_common.h"
if (HttpModule::get_peg_counts(PEG_CONCURRENT_SESSIONS) > 0)
HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS);
+#ifndef UNIT_TEST_BUILD
+ if (js_normalizer)
+ {
+ update_deallocations(JSNormalizer::size());
+ delete js_normalizer;
+ }
+#endif
+
for (int k=0; k <= 1; k++)
{
delete infractions[k];
update_deallocations(partial_buffer_length[k]);
delete[] partial_detect_buffer[k];
update_deallocations(partial_detect_length[k]);
+ delete[] js_detect_buffer[k];
+ update_deallocations(js_detect_length[k]);
HttpTransaction::delete_transaction(transaction[k], nullptr);
delete cutter[k];
if (compress_stream[k] != nullptr)
}
}
+#ifndef UNIT_TEST_BUILD
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx()
+{
+ if (js_normalizer)
+ return *js_normalizer;
+
+ js_normalizer = new JSNormalizer();
+ update_allocations(JSNormalizer::size());
+
+ return *js_normalizer;
+}
+
+void HttpFlowData::release_js_ctx()
+{
+ if (!js_normalizer)
+ return;
+
+ update_deallocations(JSNormalizer::size());
+ delete js_normalizer;
+ js_normalizer = nullptr;
+}
+#else
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx() { return *js_normalizer; }
+void HttpFlowData::release_js_ctx() {}
+#endif
+
bool HttpFlowData::add_to_pipeline(HttpTransaction* latest)
{
if (pipeline == nullptr)
class HttpCutter;
class HttpQueryParser;
+namespace snort
+{
+class JSNormalizer;
+}
+
class HttpFlowData : public snort::FlowData
{
public:
size_t size_of() override;
friend class HttpInspect;
+ friend class HttpJsNorm;
friend class HttpMsgSection;
friend class HttpMsgStart;
friend class HttpMsgRequest;
uint8_t* partial_detect_buffer[2] = { nullptr, nullptr };
uint32_t partial_detect_length[2] = { 0, 0 };
uint32_t partial_js_detect_length[2] = { 0, 0 };
+ uint8_t* js_detect_buffer[2] = { nullptr, nullptr };
+ uint32_t js_detect_length[2] = { 0, 0 };
int32_t status_code_num = HttpCommon::STAT_NOT_PRESENT;
HttpEnums::VersionId version_id[2] = { HttpEnums::VERS__NOT_PRESENT,
HttpEnums::VERS__NOT_PRESENT };
bool cutover_on_clear = false;
bool ssl_search_abandoned = false;
+ // *** HttpJsNorm
+ snort::JSNormalizer* js_normalizer = nullptr;
+
+ snort::JSNormalizer& acquire_js_ctx();
+ void release_js_ctx();
+
// *** Transaction management including pipelining
static const int MAX_PIPELINE = 100; // requests seen - responses seen <= MAX_PIPELINE
HttpTransaction* transaction[2] = { nullptr, nullptr };
#include "http_js_norm.h"
-#include "utils/js_norm_state.h"
#include "utils/js_normalizer.h"
#include "utils/safec.h"
#include "utils/util_jsnorm.h"
+#include "http_common.h"
#include "http_enum.h"
using namespace HttpEnums;
using namespace snort;
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
- uri_param(uri_param_), javascript_search_mpse(nullptr),
- htmltype_search_mpse(nullptr)
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+ uri_param(uri_param_),
+ normalization_depth(normalization_depth_),
+ mpse_otag(nullptr),
+ mpse_attr(nullptr),
+ mpse_type(nullptr)
{}
HttpJsNorm::~HttpJsNorm()
{
- delete javascript_search_mpse;
- delete js_src_attr_search_mpse;
- delete htmltype_search_mpse;
+ delete mpse_otag;
+ delete mpse_attr;
+ delete mpse_type;
}
void HttpJsNorm::configure()
{
- if ( configure_once )
+ if (configure_once)
return;
- javascript_search_mpse = new SearchTool;
- js_src_attr_search_mpse = new SearchTool;
- htmltype_search_mpse = new SearchTool;
-
- javascript_search_mpse->add(script_start, script_start_length, JS_JAVASCRIPT);
- javascript_search_mpse->prep();
-
- js_src_attr_search_mpse->add(script_src_attr, script_src_attr_length, JS_ATTR_SRC);
- js_src_attr_search_mpse->prep();
-
- struct HiSearchToken
- {
- const char* name;
- int name_len;
- int search_id;
- };
-
- const HiSearchToken html_patterns[] =
- {
- { "JAVASCRIPT", 10, HTML_JS },
- { "ECMASCRIPT", 10, HTML_EMA },
- { "VBSCRIPT", 8, HTML_VB },
- { nullptr, 0, 0 }
- };
-
- for (const HiSearchToken* tmp = &html_patterns[0]; tmp->name != nullptr; tmp++)
- {
- htmltype_search_mpse->add(tmp->name, tmp->name_len, tmp->search_id);
- }
- htmltype_search_mpse->prep();
+ mpse_otag = new SearchTool;
+ mpse_attr = new SearchTool;
+ mpse_type = new SearchTool;
+
+ static constexpr const char* otag_start = "<SCRIPT";
+ static constexpr const char* attr_gt = ">";
+ static constexpr const char* attr_src = "SRC";
+ static constexpr const char* attr_js1 = "JAVASCRIPT";
+ static constexpr const char* attr_js2 = "ECMASCRIPT";
+ static constexpr const char* attr_vb = "VBSCRIPT";
+
+ mpse_otag->add(otag_start, strlen(otag_start), 0);
+ mpse_attr->add(attr_gt, strlen(attr_gt), AID_GT);
+ mpse_attr->add(attr_src, strlen(attr_src), AID_SRC);
+ mpse_attr->add(attr_js1, strlen(attr_js1), AID_JS);
+ mpse_attr->add(attr_js2, strlen(attr_js2), AID_ECMA);
+ mpse_attr->add(attr_vb, strlen(attr_vb), AID_VB);
+ mpse_type->add(attr_js1, strlen(attr_js1), AID_JS);
+ mpse_type->add(attr_js2, strlen(attr_js2), AID_ECMA);
+ mpse_type->add(attr_vb, strlen(attr_vb), AID_VB);
+
+ mpse_otag->prep();
+ mpse_attr->prep();
+ mpse_type->prep();
configure_once = true;
}
-void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
- HttpEventGen* events, int64_t js_normalization_depth) const
+void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
+ HttpInfractions* infractions, HttpFlowData* ssn) const
{
- bool js_present = false;
- int index = 0;
const char* ptr = (const char*)input.start();
const char* const end = ptr + input.length();
- uint8_t* buffer = new uint8_t[input.length()];
+ HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
- JSNormState state;
- state.norm_depth = js_normalization_depth;
- state.alerts = 0;
+ char* buffer = nullptr;
+ char* dst = nullptr;
+ const char* dst_end = nullptr;
+
+ bool script_continue = alive_ctx(ssn);
while (ptr < end)
{
- int bytes_copied = 0;
- int mindex;
-
- // Search for beginning of a javascript
- if (javascript_search_mpse->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
+ if (!script_continue)
{
- const char* js_start = ptr + mindex;
- const char* const angle_bracket =
- (const char*)SnortStrnStr(js_start, end - js_start, ">");
- if (angle_bracket == nullptr || (end - angle_bracket) == 0)
+ if (!mpse_otag->find(ptr, end - ptr, match_otag, false, &ptr))
+ break;
+ if (ptr >= end)
break;
- bool type_js = false;
- bool external_js = false;
- if (angle_bracket > js_start)
- {
- int mid;
- const int script_found = htmltype_search_mpse->find(
- js_start, (angle_bracket-js_start), search_html_found, false, &mid);
-
- external_js = is_external_script(js_start, angle_bracket);
+ MatchContext sctx = {ptr, true, false};
- js_start = angle_bracket + 1;
- if (script_found > 0)
- {
- switch (mid)
- {
- case HTML_JS:
- js_present = true;
- type_js = true;
- break;
- default:
- type_js = false;
- break;
- }
- }
- else
- {
- // if no type or language is found we assume it is a javascript
- js_present = true;
- type_js = true;
- }
- }
- // Save before the <script> begins
- if (js_start > ptr)
+ if (ptr[0] == '>')
+ ptr++;
+ else
{
- if ((js_start - ptr) > (input.length() - index))
- break;
+ if (!mpse_attr->find(ptr, end - ptr, match_attr, false, &sctx))
+ break; // the opening tag never ends
+ ptr = sctx.next;
}
- ptr = js_start;
- if (!type_js or external_js)
+ if (!sctx.is_javascript || sctx.is_external)
continue;
- JSNormalizer::normalize(js_start, (uint16_t)(end-js_start), (char*)buffer+index,
- (uint16_t)(input.length() - index), &ptr, &bytes_copied, state);
-
+ // script found
HttpModule::increment_peg_counts(PEG_JS_INLINE);
+ }
- index += bytes_copied;
+ if (!buffer)
+ {
+ uint8_t* nbuf = ssn->js_detect_buffer[HttpCommon::SRC_SERVER];
+ uint32_t nlen = ssn->js_detect_length[HttpCommon::SRC_SERVER];
+
+ auto len = nlen + (end - ptr); // not more then the remaining raw data
+ buffer = new char[len];
+ if (nbuf)
+ memcpy(buffer, nbuf, nlen);
+ dst = buffer + nlen;
+ dst_end = buffer + len;
}
- else
- break;
- }
- if (js_present)
- {
- if (state.alerts & ALERT_UNEXPECTED_TAG)
+ auto& ctx = ssn->acquire_js_ctx();
+ ctx.set_depth(normalization_depth);
+
+ auto ret = ctx.normalize(ptr, end - ptr, dst, dst_end - dst);
+ ptr = ctx.get_src_next();
+ dst = ctx.get_dst_next();
+
+ switch (ret)
{
- *infractions += INF_JS_UNEXPECTED_TAG;
- events->create_event(EVENT_JS_UNEXPECTED_TAG);
+ case JSTokenizer::EOS:
+ ctx.reset_depth();
+ script_continue = false;
+ break;
+ case JSTokenizer::SCRIPT_ENDED:
+ script_continue = false;
+ break;
+ case JSTokenizer::SCRIPT_CONTINUE:
+ script_continue = true;
+ break;
+ case JSTokenizer::OPENING_TAG:
+ *infractions += INF_JS_OPENING_TAG;
+ events->create_event(EVENT_JS_OPENING_TAG);
+ script_continue = false;
+ break;
+ case JSTokenizer::CLOSING_TAG:
+ *infractions += INF_JS_CLOSING_TAG;
+ events->create_event(EVENT_JS_CLOSING_TAG);
+ script_continue = false;
+ break;
+ case JSTokenizer::BAD_TOKEN:
+ *infractions += INF_JS_BAD_TOKEN;
+ events->create_event(EVENT_JS_BAD_TOKEN);
+ script_continue = false;
+ break;
+ default:
+ assert(false);
+ script_continue = false;
+ break;
}
- output.set(index, buffer, true);
}
- else
- delete[] buffer;
+
+ if (!script_continue)
+ ssn->release_js_ctx();
+
+ if (buffer)
+ output.set(dst - buffer, (const uint8_t*)buffer, true);
}
void HttpJsNorm::legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions,
int mindex;
// Search for beginning of a javascript
- if (javascript_search_mpse->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
+ if (mpse_otag->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
{
const char* js_start = ptr + mindex;
const char* const angle_bracket =
if (angle_bracket > js_start)
{
int mid;
- const int script_found = htmltype_search_mpse->find(
+ const int script_found = mpse_type->find(
js_start, (angle_bracket-js_start), search_html_found, false, &mid);
js_start = angle_bracket + 1;
{
switch (mid)
{
- case HTML_JS:
+ case AID_JS:
js_present = true;
type_js = true;
break;
}
}
-/* Returning non-zero stops search, which is okay since we only look for one at a time */
int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*)
{
+ static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
*((int*) index_ptr) = index - script_start_length;
return 1;
}
-int HttpJsNorm::search_js_src_attr_found(void*, void*, int index, void* index_ptr, void*)
+
+int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
{
- *((int*) index_ptr) = index - script_src_attr_length;
+ *((int*) id_ptr) = (int)(uintptr_t)id;
return 1;
}
-int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
+
+int HttpJsNorm::match_otag(void*, void*, int index, void* ptr, void*)
{
- *((int*) id_ptr) = (int)(uintptr_t)id;
+ *(char**)ptr += index;
return 1;
}
-bool HttpJsNorm::is_external_script(const char* it, const char* script_tag_end) const
+int HttpJsNorm::match_attr(void* pid, void*, int index, void* sctx, void*)
{
- int src_pos;
+ MatchContext* ctx = (MatchContext*)sctx;
+ AttrId id = (AttrId)(uintptr_t)pid;
+ const char* c;
- while (js_src_attr_search_mpse->find(it, (script_tag_end - it),
- search_js_src_attr_found, false, &src_pos))
+ switch (id)
{
- it += (src_pos + script_src_attr_length - 1);
- while (++it < script_tag_end)
- {
- if (*it == ' ')
- continue;
- else if (*it == '=')
- return true;
- else
- break;
- }
+ case AID_GT:
+ ctx->next += index;
+ return 1;
+
+ case AID_SRC:
+ c = ctx->next + index;
+ while (*c == ' ') c++;
+ ctx->is_external = ctx->is_external || *c == '=';
+ return 0;
+
+ case AID_JS:
+ ctx->is_javascript = true;
+ return 0;
+
+ case AID_ECMA:
+ ctx->is_javascript = true;
+ return 0;
+
+ case AID_VB:
+ ctx->is_javascript = false;
+ return 0;
+
+ default:
+ ctx->next += index;
+ ctx->is_external = false;
+ ctx->is_javascript = false;
+ return 1;
}
-
- return false;
}
-
#include "search_engines/search_tool.h"
#include "http_field.h"
+#include "http_flow_data.h"
#include "http_event.h"
#include "http_module.h"
class HttpJsNorm
{
public:
- HttpJsNorm(const HttpParaList::UriParam& uri_param_);
+ HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth);
~HttpJsNorm();
- void legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions,
- HttpEventGen* events, int max_javascript_whitespaces) const;
- void enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
- HttpEventGen* events, int64_t js_normalization_depth) const;
+
+ void legacy_normalize(const Field& input, Field& output, HttpInfractions*, HttpEventGen*,
+ int max_javascript_whitespaces) const;
+ void enhanced_normalize(const Field& input, Field& output, HttpInfractions*, HttpFlowData*) const;
void configure();
-private:
- bool configure_once = false;
- enum JsSearchId { JS_JAVASCRIPT };
- enum JsSrcAttrSearchId { JS_ATTR_SRC };
- enum HtmlSearchId { HTML_JS, HTML_EMA, HTML_VB };
+private:
+ enum AttrId { AID_GT, AID_SRC, AID_JS, AID_ECMA, AID_VB };
- static constexpr const char* script_start = "<SCRIPT";
- static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
- static constexpr const char* script_src_attr = "SRC";
- static constexpr int script_src_attr_length = sizeof("SRC") - 1;
+ struct MatchContext
+ {
+ const char* next;
+ bool is_javascript;
+ bool is_external;
+ };
const HttpParaList::UriParam& uri_param;
+ int64_t normalization_depth;
+ bool configure_once = false;
- snort::SearchTool* javascript_search_mpse;
- snort::SearchTool* js_src_attr_search_mpse;
- snort::SearchTool* htmltype_search_mpse;
+ snort::SearchTool* mpse_otag;
+ snort::SearchTool* mpse_attr;
+ snort::SearchTool* mpse_type; // legacy only
- static int search_js_found(void*, void*, int index, void*, void*);
- static int search_js_src_attr_found(void*, void*, int index, void*, void*);
- static int search_html_found(void* id, void*, int, void*, void*);
+ static int search_js_found(void*, void*, int index, void*, void*); // legacy only
+ static int search_html_found(void* id, void*, int, void*, void*); // legacy only
+ static int match_otag(void*, void*, int, void*, void*);
+ static int match_attr(void*, void*, int, void*, void*);
- bool is_external_script(const char* it, const char* script_tag_end) const;
+ bool alive_ctx(const HttpFlowData* ssn) const
+ { return ssn->js_normalizer; }
};
#endif
else if (val.is("normalize_javascript"))
{
params->js_norm_param.normalize_javascript = val.get_bool();
-
- if ( !params->js_norm_param.is_javascript_normalization )
- params->js_norm_param.is_javascript_normalization =
- params->js_norm_param.normalize_javascript;
+ params->js_norm_param.is_javascript_normalization =
+ params->js_norm_param.is_javascript_normalization
+ or params->js_norm_param.normalize_javascript;
}
else if (val.is("js_normalization_depth"))
{
int64_t v = val.get_int64();
- params->js_norm_param.js_normalization_depth = (v == -1) ?
- Parameter::get_int("max53") : v;
-
- if ( !params->js_norm_param.is_javascript_normalization )
- params->js_norm_param.is_javascript_normalization =
- (params->js_norm_param.js_normalization_depth > 0);
+ params->js_norm_param.js_normalization_depth = v;
+ params->js_norm_param.is_javascript_normalization =
+ params->js_norm_param.is_javascript_normalization or (v != 0);
}
else if (val.is("max_javascript_whitespaces"))
{
ParseError("Cannot use normalize_javascript and js_normalization_depth together.");
if ( params->js_norm_param.is_javascript_normalization )
- params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param);
+ params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param.js_normalization_depth);
params->script_detection_handle = script_detection_handle;
memcpy(cumulative_buffer + partial_detect_length, decompressed_file_body.start(),
decompressed_file_body.length());
cumulative_data.set(total_length, cumulative_buffer, true);
- do_js_normalization(cumulative_data, js_norm_body);
+
+ do_js_normalization(cumulative_data, js_norm_body, true);
+
if ((int32_t)partial_js_detect_length == js_norm_body.length())
{
clean_partial(partial_inspected_octets, partial_detect_length,
}
}
else
- do_js_normalization(decompressed_file_body, js_norm_body);
+ do_js_normalization(decompressed_file_body, js_norm_body, false);
const int32_t detect_length =
(js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ?
}
}
-void HttpMsgBody::do_js_normalization(const Field& input, Field& output)
+void HttpMsgBody::do_js_normalization(const Field& input, Field& output, bool partial_detect)
{
- if ( !params->js_norm_param.is_javascript_normalization or source_id == SRC_CLIENT )
+ if (!params->js_norm_param.is_javascript_normalization or source_id == SRC_CLIENT)
output.set(input);
- else if ( params->js_norm_param.normalize_javascript )
+ else if (params->js_norm_param.normalize_javascript)
params->js_norm_param.js_norm->legacy_normalize(input, output,
transaction->get_infractions(source_id), session_data->events[source_id],
params->js_norm_param.max_javascript_whitespaces);
- else if ( params->js_norm_param.js_normalization_depth )
+ else if (params->js_norm_param.js_normalization_depth)
{
output.set(input);
+ bool js_continuation = session_data->js_normalizer;
+ uint8_t*& buf = session_data->js_detect_buffer[source_id];
+ uint32_t& len = session_data->js_detect_length[source_id];
+
+ if (partial_detect)
+ session_data->release_js_ctx();
+ else
+ {
+ session_data->update_deallocations(len);
+ delete[] buf;
+ buf = nullptr;
+ len = 0;
+ }
+
params->js_norm_param.js_norm->enhanced_normalize(input, enhanced_js_norm_body,
- transaction->get_infractions(source_id), session_data->events[source_id],
- params->js_norm_param.js_normalization_depth);
+ transaction->get_infractions(source_id), session_data);
const int32_t norm_length =
(enhanced_js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ?
enhanced_js_norm_body.length() : session_data->detect_depth_remaining[source_id];
if ( norm_length > 0 )
+ {
set_script_data(enhanced_js_norm_body.start(), (unsigned int)norm_length);
+
+ if (partial_detect)
+ return;
+
+ if (js_continuation)
+ {
+ auto nscript_len = enhanced_js_norm_body.length();
+ uint8_t* nscript = new uint8_t[nscript_len];
+
+ memcpy(nscript, enhanced_js_norm_body.start(), nscript_len);
+ buf = nscript;
+ len = nscript_len;
+ session_data->update_allocations(len);
+ }
+ }
}
}
void do_file_processing(const Field& file_data);
void do_utf_decoding(const Field& input, Field& output);
void do_file_decompression(const Field& input, Field& output);
- void do_js_normalization(const Field& input, Field& output);
+ void do_js_normalization(const Field& input, Field& output, bool partial_detect);
void clean_partial(uint32_t& partial_inspected_octets, uint32_t& partial_detect_length,
uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length,
int32_t detect_length);
{ EVENT_PDF_UNSUP_COMP_TYPE, "PDF file unsupported compression type" },
{ EVENT_PDF_CASC_COMP, "PDF file cascaded compression" },
{ EVENT_PDF_PARSE_FAILURE, "PDF file parse failure" },
- { EVENT_JS_UNEXPECTED_TAG, "unexpected script tag within inline javascript" },
+ { EVENT_JS_BAD_TOKEN, "bad token in JavaScript" },
+ { EVENT_JS_OPENING_TAG, "unexpected script opening tag in JavaScript" },
+ { EVENT_JS_CLOSING_TAG, "unexpected script closing tag in JavaScript" },
{ EVENT_LOSS_OF_SYNC, "not HTTP traffic" },
{ EVENT_CHUNK_ZEROS, "chunk length has excessive leading zeros" },
{ EVENT_WS_BETWEEN_MSGS, "white space before or between messages" },
long HttpTestManager::print_amount {};
bool HttpTestManager::print_hex {};
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
- uri_param(uri_param_), javascript_search_mpse(nullptr), htmltype_search_mpse(nullptr) {}
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+ uri_param(uri_param_), normalization_depth(normalization_depth_),
+ mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
HttpJsNorm::~HttpJsNorm() = default;
void HttpJsNorm::configure(){}
int64_t Parameter::get_int(char const*) { return 0; }
void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { }
void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { }
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
- uri_param(uri_param_), javascript_search_mpse(nullptr), htmltype_search_mpse(nullptr) {}
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+ uri_param(uri_param_), normalization_depth(normalization_depth_),
+ mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
HttpJsNorm::~HttpJsNorm() = default;
void HttpJsNorm::configure() {}
int64_t Parameter::get_int(char const*) { return 0; }
dnet_header.h
dyn_array.cc
dyn_array.h
- js_norm_state.h
js_normalizer.cc
js_normalizer.h
js_tokenizer.h
+++ /dev/null
-//--------------------------------------------------------------------------
-// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License Version 2 as published
-// by the Free Software Foundation. You may not use, modify or distribute
-// this program under any other version of the GNU General Public License.
-//
-// This program is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-//--------------------------------------------------------------------------
-// js_norm_state.h author Oleksandr Serhiienko <oserhiie@cisco.com>
-
-#ifndef JS_NORM_STATE_H
-#define JS_NORM_STATE_H
-
-#include "main/snort_types.h"
-
-namespace snort
-{
-#define ALERT_UNEXPECTED_TAG 0x1
-
-struct JSNormState
-{
- int64_t norm_depth;
- uint16_t alerts;
-};
-}
-
-#endif // JS_NORM_STATE_H
-
#include "js_normalizer.h"
-#include <FlexLexer.h>
+using namespace snort;
+
+JSNormalizer::JSNormalizer()
+ : depth(-1),
+ rem_bytes(-1),
+ unlim(true),
+ src_next(nullptr),
+ dst_next(nullptr),
+ tokenizer(in, out)
+{
+}
-#include "js_tokenizer.h"
+void JSNormalizer::set_depth(size_t new_depth)
+{
+ if (depth == new_depth)
+ return;
-using namespace snort;
+ depth = new_depth;
+ rem_bytes = depth;
+ unlim = depth == (size_t)-1;
+}
-int JSNormalizer::normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
- const char** ptr, int* bytes_copied, JSNormState& state)
+JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, char* dst, size_t dst_len)
{
- std::stringstream in, out;
- in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf),
- (state.norm_depth >= srclen) ? srclen : state.norm_depth);
+ if (rem_bytes == 0 && !unlim)
+ {
+ src_next = src + src_len;
+ dst_next = dst;
+ return JSTokenizer::EOS;
+ }
+
+ size_t len = unlim ? src_len :
+ src_len < rem_bytes ? src_len : rem_bytes;
+ in.rdbuf()->pubsetbuf(const_cast<char*>(src), len);
+ out.rdbuf()->pubsetbuf(dst, dst_len);
+
+ JSTokenizer::JSRet ret = (JSTokenizer::JSRet)tokenizer.yylex();
+ in.clear();
+ out.clear();
+ size_t r_bytes = in.tellg();
+ size_t w_bytes = out.tellp();
- JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied, state);
- return tokenizer.yylex();
+ if (!unlim)
+ rem_bytes -= r_bytes;
+ src_next = src + r_bytes;
+ dst_next = dst + w_bytes;
+
+ return rem_bytes ? ret : JSTokenizer::EOS;
}
+size_t JSNormalizer::size()
+{
+ return sizeof(JSNormalizer) + 16834; // the default YY_BUF_SIZE
+}
#include "main/snort_types.h"
-#include "js_norm_state.h"
+#include <FlexLexer.h>
+
+#include "js_tokenizer.h"
namespace snort
{
+
class JSNormalizer
{
public:
- static int normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
- const char** ptr, int* bytes_copied, JSNormState& state);
+ JSNormalizer();
+
+ const char* get_src_next() const
+ { return src_next; }
+
+ char* get_dst_next() const // this can go beyond dst length, but no writing happens outside of dst
+ { return dst_next; }
+
+ void reset_depth()
+ { rem_bytes = depth; }
+
+ void set_depth(size_t depth);
+
+ JSTokenizer::JSRet normalize(const char* src, size_t src_len, char* dst, size_t dst_len);
+
+ static size_t size();
+
+private:
+ size_t depth;
+ size_t rem_bytes;
+ bool unlim;
+ const char* src_next;
+ char* dst_next;
+
+ std::stringstream in;
+ std::stringstream out;
+ JSTokenizer tokenizer;
};
+
}
#endif //JS_NORMALIZER_H
#include "log/messages.h"
-#include "js_norm_state.h"
-
class JSTokenizer : public yyFlexLexer
{
private:
};
public:
- // we need an out stream because yyFlexLexer API strongly requires that
- JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
- const uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state);
+ enum JSRet
+ {
+ EOS = 0,
+ SCRIPT_ENDED,
+ SCRIPT_CONTINUE,
+ OPENING_TAG,
+ CLOSING_TAG,
+ BAD_TOKEN
+ };
+
+ JSTokenizer(std::istream& in, std::ostream& out);
~JSTokenizer() override;
- // so, Flex will treat this class as yyclass
- // must come with yyclass Flex option
- // don't need to define this method, it'll be substituted by Flex
- // returns 0 if OK, 1 otherwise
+ // returns JSRet
int yylex() override;
protected:
{ snort::FatalError("%s", msg); }
private:
- void init();
-
- // scan buffers control
- void switch_to_temporal(const std::string& data);
void switch_to_initial();
-
- bool eval_identifier(const char* lexeme);
- bool eval_string_literal(const char* match_prefix, const char quotes);
- bool eval_regex_literal(const char* match_prefix);
- bool eval_eof();
- bool eval_single_line_comment();
- bool eval_multi_line_comment();
-
- bool parse_literal(const std::string& match_prefix, const char sentinel_ch,
- std::string& result, bool& is_alert, bool is_regex = false);
-
- // main lexeme handler
- // all scanned tokens must pass here
- bool eval(const JSToken tok, const char* lexeme);
-
- bool normalize_identifier(const JSToken prev_tok, const char* lexeme);
- bool normalize_punctuator(const JSToken prev_tok, const char* lexeme);
- bool normalize_operator(const JSToken prev_tok, const char* lexeme);
- bool normalize_directive(const JSToken prev_tok, const char* lexeme);
- bool normalize_undefined(const JSToken prev_tok, const char* lexeme);
- bool normalize_lexeme(const JSToken prev_tok, const char* lexeme);
-
- bool write_output(const std::string& str);
-
- void update_ptr();
+ void switch_to_temporal(const std::string& data);
+ JSRet eval_eof();
+ JSRet do_spacing(JSToken cur_token);
+ JSRet do_operator_spacing(JSToken cur_token);
+ bool unescape(const char* lexeme);
private:
- char* dstbuf;
- const uint16_t dstlen;
- const char** ptr;
- int* bytes_copied;
-
- struct ScanBuffers;
- ScanBuffers* buffers = nullptr;
- std::stringstream temporal;
-
- JSToken prev_tok = UNDEFINED;
-
- snort::JSNormState& state;
+ void* cur_buffer;
+ void* tmp_buffer = nullptr;
+ std::stringstream tmp;
+ JSToken token = UNDEFINED;
};
#endif // JS_TOKENIZER_H
#include <cassert>
#include "utils/util_cstring.h"
+
+ #define EXEC(f) { auto r = (f); if (r) { BEGIN(regst); return r; } }
%}
/* The following grammar was created based on ECMAScript specification */
/* comments */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.4 */
-SINGLE_LINE_COMMENT "//"
-MULTI_LINE_COMMENT "/\*"
+LINE_COMMENT_START "//"
+LINE_COMMENT_END1 [^<\xA\xD]*\xA
+LINE_COMMENT_END2 [^<\xA\xD]*\xD
+LINE_COMMENT_END3 [^<\xA\xD]*"<"+(?i:script)
+LINE_COMMENT_END4 [^<\xA\xD]*"<"+(?i:\/script>)
+LINE_COMMENT_SKIP [^<\xA\xD]*"<"?
+BLOCK_COMMENT_START "/*"
+BLOCK_COMMENT_END1 [^<*]*"*"+"/"
+BLOCK_COMMENT_END2 [^<*]*"<"+(?i:script)
+BLOCK_COMMENT_END3 [^<*]*"<"+(?i:\/script>)
+BLOCK_COMMENT_SKIP [^<*]*[<*]?
/* directives */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-14.1 */
-USE_STRICT_DIRECTIVE "\"use strict\"";*|"\'use strict\'";*
+USE_STRICT_DIRECTIVE "\"use strict\""|"\'use strict\'"
+USE_STRICT_DIRECTIVE_SC "\"use strict\"";*|"\'use strict\'";*
/* keywords */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6.1.1 */
LITERAL_BOOLEAN true|false
LITERAL_DECIMAL [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]*
LITERAL_HEX_INTEGER 0x[0-9a-fA-F]*|0X[0-9a-fA-F]*
-LITERAL_DOUBLE_STRING_BEGIN \"
-LITERAL_SINGLE_STRING_BEGIN \'
-LITERAL_REGULAR_EXPRESSION \/[^*\/]
+LITERAL_DQ_STRING_START \"
+LITERAL_DQ_STRING_END \"
+LITERAL_DQ_STRING_SKIP \\\"
+LITERAL_SQ_STRING_START \'
+LITERAL_SQ_STRING_END \'
+LITERAL_SQ_STRING_SKIP \\\'
+LITERAL_REGEX_START \/[^*\/]
+LITERAL_REGEX_END \/[gimsuy]*
+LITERAL_REGEX_SKIP \\\/
/* extra literals */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-4.3 */
LITERAL_UNDEFINED undefined
LITERAL_NAN NaN
LITERAL {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
-HTML_COMMENT_OPEN <!--
-HTML_TAG_SCRIPT_OPEN (?i:<script)
-HTML_TAG_SCRIPT_CLOSE (?i:<\/script>)
+HTML_COMMENT_OPEN "<"+"!--"
+HTML_TAG_SCRIPT_OPEN "<"+(?i:script)
+HTML_TAG_SCRIPT_CLOSE "<"+(?i:\/script>)
/* from 0x000 to 0x10FFFD to match undefined tokens */
/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
/* match regex literal only if the previous token was of type PUNCTUATOR_3 or KEYWORD */
/* this resolves an ambiguity with a division operator: var x = 2/2/1; */
-%x regex
+%s regst
/* do not match division operators as punctuators if the previous token was of type PUNCTUATOR */
/* this resolves an ambiguity with regular expression in some cases such as (/=abc=/g) */
-%x div_op
+%s divop
+
+/* in a single line comment */
+%x lcomm
+
+/* in a multi line comment */
+%x bcomm
+
+/* in a single-quoted string */
+%x sqstr
+
+/* in a double-quoted string */
+%x dqstr
+
+/* in a regular expression */
+%x regex
%%
-<*>{WHITESPACES} { /* skip */ }
-<*>{CHAR_ESCAPE_SEQUENCES} { /* skip */ }
-<*>{LINE_TERMINATORS} { BEGIN(regex); }
-<*>{HTML_TAG_SCRIPT_OPEN} { state.alerts |= ALERT_UNEXPECTED_TAG; update_ptr(); return 1; }
-<*>{HTML_TAG_SCRIPT_CLOSE} { update_ptr(); *ptr -= YYLeng(); return 0; }
-<*>{HTML_COMMENT_OPEN} { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
-<*>{SINGLE_LINE_COMMENT} { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
-<*>{MULTI_LINE_COMMENT} { if ( !eval_multi_line_comment() ) { update_ptr(); return 1; } }
-<*>{USE_STRICT_DIRECTIVE} { if ( !eval(DIRECTIVE, YYText()) ) { update_ptr(); return 1; } }
-<*>{KEYWORD} { if ( !eval(KEYWORD, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
-<*>{CLOSING_BRACES} { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<div_op>{DIV_OPERATOR}|{DIV_ASSIGNMENT_OPERATOR} { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } }
-<*>{PUNCTUATOR} { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
-<*>{OPERATOR} { if ( !eval(OPERATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{LITERAL} { if ( !eval(LITERAL, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{LITERAL_DOUBLE_STRING_BEGIN} { if ( !eval_string_literal(YYText(), '"') ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{LITERAL_SINGLE_STRING_BEGIN} { if ( !eval_string_literal(YYText(), '\'') ) { update_ptr(); return 1; } BEGIN(div_op); }
-<regex>{LITERAL_REGULAR_EXPRESSION} { if ( !eval_regex_literal(YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{IDENTIFIER} { if ( !eval_identifier(YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>.|{ALL_UNICODE} { if ( !eval(UNDEFINED, YYText()) ) { update_ptr(); return 1; } }
-<<EOF>> { if ( eval_eof() ) { update_ptr(); return 0; } }
+{WHITESPACES} { }
+{CHAR_ESCAPE_SEQUENCES} { }
+{LINE_TERMINATORS} { BEGIN(regst); }
+
+<INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); return OPENING_TAG; }
+{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return SCRIPT_ENDED; }
+
+ {HTML_COMMENT_OPEN} { BEGIN(lcomm); }
+ {LINE_COMMENT_START} { BEGIN(lcomm); }
+<lcomm>{LINE_COMMENT_END1} { BEGIN(regst); }
+<lcomm>{LINE_COMMENT_END2} { BEGIN(regst); }
+<lcomm>{LINE_COMMENT_END3} { BEGIN(regst); return OPENING_TAG; }
+<lcomm>{LINE_COMMENT_END4} { BEGIN(regst); return CLOSING_TAG; }
+<lcomm>{LINE_COMMENT_SKIP} { }
+<lcomm><<EOF>> { return SCRIPT_CONTINUE; }
+
+ {BLOCK_COMMENT_START} { BEGIN(bcomm); }
+<bcomm>{BLOCK_COMMENT_END1} { BEGIN(regst); }
+<bcomm>{BLOCK_COMMENT_END2} { BEGIN(regst); return OPENING_TAG; }
+<bcomm>{BLOCK_COMMENT_END3} { BEGIN(regst); return CLOSING_TAG; }
+<bcomm>{BLOCK_COMMENT_SKIP} { }
+<bcomm><<EOF>> { return SCRIPT_CONTINUE; }
+
+ {LITERAL_DQ_STRING_START} { EXEC(do_spacing(LITERAL)); ECHO; BEGIN(dqstr); }
+<dqstr>{LITERAL_DQ_STRING_END} { ECHO; BEGIN(divop); }
+<dqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
+<dqstr>\\{CR}{LF} { }
+<dqstr>\\{LF} { }
+<dqstr>\\{CR} { }
+<dqstr>{LINE_TERMINATORS} { BEGIN(regst); return BAD_TOKEN; }
+<dqstr>{LITERAL_DQ_STRING_SKIP} { ECHO; }
+<dqstr>. { ECHO; }
+<dqstr><<EOF>> { return SCRIPT_CONTINUE; }
+
+ {LITERAL_SQ_STRING_START} { EXEC(do_spacing(LITERAL)); ECHO; BEGIN(sqstr); }
+<sqstr>{LITERAL_SQ_STRING_END} { ECHO; BEGIN(divop); }
+<sqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
+<sqstr>\\{CR}{LF} { }
+<sqstr>\\{LF} { }
+<sqstr>\\{CR} { }
+<sqstr>{LINE_TERMINATORS} { BEGIN(regst); return BAD_TOKEN; }
+<sqstr>{LITERAL_SQ_STRING_SKIP} { ECHO; }
+<sqstr>. { ECHO; }
+<sqstr><<EOF>> { return SCRIPT_CONTINUE; }
+
+<regst>{LITERAL_REGEX_START} { EXEC(do_spacing(LITERAL)); yyout << '/'; yyless(1); BEGIN(regex); }
+<regex>{LITERAL_REGEX_END} { ECHO; BEGIN(divop); }
+<regex>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
+<regex>{LITERAL_REGEX_SKIP} { ECHO; }
+<regex>\\{LF} |
+<regex>\\{CR} |
+<regex>{LINE_TERMINATORS} { BEGIN(regst); return BAD_TOKEN; }
+<regex>[^<{LF}{CR}{LS}{PS}\\\/]+ { ECHO; }
+<regex><<EOF>> { return SCRIPT_CONTINUE; }
+
+<divop>{DIV_OPERATOR} |
+<divop>{DIV_ASSIGNMENT_OPERATOR} { ECHO; token = PUNCTUATOR; BEGIN(INITIAL); }
+
+{CLOSING_BRACES} { ECHO; token = PUNCTUATOR; BEGIN(divop); }
+{PUNCTUATOR} { ECHO; token = PUNCTUATOR; BEGIN(regst); }
+
+{USE_STRICT_DIRECTIVE} { EXEC(do_spacing(DIRECTIVE)); ECHO; BEGIN(INITIAL); yyout << ';'; }
+{USE_STRICT_DIRECTIVE_SC} { EXEC(do_spacing(DIRECTIVE)); ECHO; BEGIN(INITIAL); }
+{KEYWORD} { EXEC(do_spacing(KEYWORD)); ECHO; BEGIN(regst); }
+{OPERATOR} { EXEC(do_operator_spacing(OPERATOR)); ECHO; BEGIN(divop); }
+{LITERAL} { EXEC(do_spacing(LITERAL)); ECHO; BEGIN(divop); }
+{IDENTIFIER} { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)); ECHO; }; BEGIN(divop); }
+
+.|{ALL_UNICODE} { ECHO; token = UNDEFINED; BEGIN(INITIAL); }
+<<EOF>> { EXEC(eval_eof()); }
+
%%
// static helper functions
return res;
}
-static bool contains_script_tags(const std::string& str)
-{
- static constexpr const char* script = "SCRIPT";
- static constexpr const int script_len = sizeof("SCRIPT") - 1;
-
- const char* start = str.c_str();
- const char* end = start + str.size();
- const char* it = start;
-
- while ( it )
- {
- it = snort::SnortStrcasestr(it, (end - it), script);
- if ( it )
- {
- int d = it - start;
- if ( d == 1 )
- {
- if ( *(it - 1) == '<' )
- return true;
- }
- else if ( d >= 2 )
- {
- if ( (*(it - 1) == '/' and *(it - 2) == '<') or
- (*(it - 1) == '<' and *(it - 2) != '\\') )
- {
- return true;
- }
- }
- it += script_len;
- }
- }
- return false;
-}
-
// JSTokenizer members
-struct JSTokenizer::ScanBuffers
-{
- YY_BUFFER_STATE initial = nullptr;
- YY_BUFFER_STATE temporal = nullptr;
-};
-
-JSTokenizer::JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
- uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state)
- : yyFlexLexer(in, out),
- dstbuf(dstbuf),
- dstlen(dstlen),
- ptr(ptr),
- bytes_copied(bytes_copied),
- state(state)
+JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out)
+ : yyFlexLexer(in, out)
{
- assert(bytes_copied);
- init();
+ BEGIN(regst);
}
JSTokenizer::~JSTokenizer()
-{ delete buffers; }
-
-void JSTokenizer::init()
{
- buffers = new ScanBuffers;
- *bytes_copied = 0;
-
- // since regular expression may occur at the beginning of the input
- BEGIN(regex);
+ yy_delete_buffer((YY_BUFFER_STATE)tmp_buffer);
}
void JSTokenizer::switch_to_temporal(const std::string& data)
{
- temporal.str(data);
- buffers->initial = YY_CURRENT_BUFFER;
- buffers->temporal = yy_create_buffer(temporal, data.size());
- yy_switch_to_buffer(buffers->temporal);
+ tmp.str(data);
+ cur_buffer = YY_CURRENT_BUFFER;
+ tmp_buffer = yy_create_buffer(tmp, data.size());
+ yy_switch_to_buffer((YY_BUFFER_STATE)tmp_buffer);
}
void JSTokenizer::switch_to_initial()
{
- yy_delete_buffer(buffers->temporal);
- yy_switch_to_buffer(buffers->initial);
- buffers->temporal = nullptr;
-}
-
-bool JSTokenizer::eval_identifier(const char* lexeme)
-{
- // If an identifier has escaped Unicode, unescape and match again
- // in a temporal scan buffer
- if ( strstr(lexeme, "\\u") )
- {
- const std::string unescaped_lex = unescape_unicode(lexeme);
- switch_to_temporal(unescaped_lex);
- return true;
- }
-
- return eval(IDENTIFIER, lexeme);
-}
-
-bool JSTokenizer::eval_string_literal(const char* match_prefix, const char quotes)
-{
- std::string s;
- bool is_alert = false;
- bool is_ok = parse_literal(match_prefix, quotes, s, is_alert);
-
- if ( is_alert )
- return false;
-
- return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
-}
-
-bool JSTokenizer::eval_regex_literal(const char* match_prefix)
-{
- static const std::string regex_flags = "gimsuy";
-
- std::string s;
- bool is_alert = false;
- bool is_ok = parse_literal(match_prefix, '/', s, is_alert, true);
-
- if ( is_alert )
- return false;
-
- // append regex flags
- char c;
- while ( (c = yyinput()) != 0 )
- {
- if ( regex_flags.find(c) != std::string::npos )
- s += c;
- else
- {
- unput(c);
- break;
- }
- }
-
- return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
+ yy_switch_to_buffer((YY_BUFFER_STATE)cur_buffer);
+ yy_delete_buffer((YY_BUFFER_STATE)tmp_buffer);
+ tmp_buffer = nullptr;
}
// A return value of this method uses to terminate the scanner
// true - terminate, false - continue scanning
// Use this method only in <<EOF>> handler
// The return value should be used to make a decision about yyterminate() call
-bool JSTokenizer::eval_eof()
+JSTokenizer::JSRet JSTokenizer::eval_eof()
{
// If the temporal scan buffer reaches EOF, cleanup and
// continue with the initial one
- if ( buffers->temporal )
+ if ( tmp_buffer )
{
switch_to_initial();
- return false;
+ return EOS;
}
// Normal termination
- return true;
-}
-
-bool JSTokenizer::eval_single_line_comment()
-{
- char c;
- std::string result;
-
- while ( (c = yyinput()) != 0 )
- {
- result += c;
- if ( c == '\n' )
- break;
- }
-
- if ( contains_script_tags(result) )
- {
- state.alerts |= ALERT_UNEXPECTED_TAG;
- return false;
- }
- else
- return true;
-}
-
-bool JSTokenizer::eval_multi_line_comment()
-{
- char c;
- std::string result;
-
- while ( (c = yyinput()) != 0 )
- {
- result += c;
- if ( c == '*' )
- {
- if ( (c = yyinput()) == '/' )
- break;
- else
- unput(c);
- }
- }
-
- if ( contains_script_tags(result) )
- {
- state.alerts |= ALERT_UNEXPECTED_TAG;
- return false;
- }
- else
- return true;
-}
-
-// Unicode line terminators
-#define LS "\u2028"
-#define PS "\u2029"
-
-// This method delineates and validates literals from the input stream such as:
-// 1. double quotes string literal
-// 2. single quotes string literal
-// 3. regex literal
-// Call this method when lexer meets those literals
-// match_prefix is a lexeme part already matched by the lexer (with sentinel char)
-bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sentinel_ch,
- std::string& result, bool& is_alert, bool is_regex)
-{
- bool is_ok = true;
- char c;
- short n = 0;
-
- for ( auto it = match_prefix.crbegin(); it != match_prefix.crend(); ++it )
- unput(*it);
-
- result += yyinput();
- while ( (c = yyinput()) != 0 )
- {
- result += c;
-
- if ( c == sentinel_ch and !( n % 2 ) )
- break;
- else if ( c == '\\' )
- {
- ++n;
- continue;
- }
- else if ( c == '\r' )
- {
- if ( is_regex )
- {
- is_ok = false;
- result = result.substr(0, result.size() - n);
- }
- else if ( n == 0 )
- is_ok = false;
- else if ( ( (c = yyinput()) != 0 ) and c == '\n' )
- {
- result = result.substr(0, result.size() - 2);
- continue;
- }
- else
- {
- is_ok = false;
- unput(c);
- }
-
- break;
- }
- else if ( c == '\n' )
- {
- if ( is_regex )
- {
- is_ok = false;
- result = result.substr(0, result.size() - n);
- }
- else if ( n == 0 )
- is_ok = false;
- else
- {
- result = result.substr(0, result.size() - 2);
- continue;
- }
-
- break;
- }
-
- n = 0;
- }
-
- if ( !is_ok )
- {
- result.back() = sentinel_ch;
- return is_ok;
- }
-
- if ( result.find(LS) != std::string::npos or result.find(PS) != std::string::npos )
- is_ok = false;
-
- if ( contains_script_tags(result) )
- {
- is_alert = true;
- state.alerts |= ALERT_UNEXPECTED_TAG;
- }
-
- return is_ok;
+ return SCRIPT_CONTINUE;
}
-bool JSTokenizer::eval(const JSToken tok, const char* lexeme)
+JSTokenizer::JSRet JSTokenizer::do_spacing(JSToken cur_token)
{
- bool ret = false;
-
- switch( tok )
+ switch (token)
{
- case IDENTIFIER:
- ret = normalize_identifier(prev_tok, lexeme);
- break;
-
- case KEYWORD:
- ret = normalize_lexeme(prev_tok, lexeme);
- break;
-
case PUNCTUATOR:
- ret = normalize_punctuator(prev_tok, lexeme);
- break;
-
case OPERATOR:
- ret = normalize_operator(prev_tok, lexeme);
- break;
-
- case LITERAL:
- ret = normalize_lexeme(prev_tok, lexeme);
- break;
-
case DIRECTIVE:
- ret = normalize_directive(prev_tok, lexeme);
- break;
-
case UNDEFINED:
- ret = normalize_undefined(prev_tok, lexeme);
- break;
- }
+ token = cur_token;
+ return EOS;
- prev_tok = tok;
-
- // set a default pattern match start condition
- if ( yy_start != INITIAL )
- BEGIN(INITIAL);
-
- return ret;
-}
+ case IDENTIFIER:
+ case KEYWORD:
+ case LITERAL:
+ yyout << ' ';
+ token = cur_token;
+ return EOS;
+ }
-bool JSTokenizer::normalize_identifier(const JSToken prev_tok, const char* lexeme)
-{
- return normalize_lexeme(prev_tok, lexeme);
-}
+ assert(false);
-bool JSTokenizer::normalize_punctuator(const JSToken, const char* lexeme)
-{
- return write_output(lexeme);
+ return BAD_TOKEN;
}
-bool JSTokenizer::normalize_operator(const JSToken prev_tok, const char* lexeme)
+JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token)
{
- switch( prev_tok )
+ switch (token)
{
case IDENTIFIER:
case KEYWORD:
case LITERAL:
case DIRECTIVE:
case UNDEFINED:
- return write_output(lexeme);
- break;
+ token = cur_token;
+ return EOS;
case OPERATOR:
- return write_output(" " + std::string(lexeme));
- break;
+ yyout << ' ';
+ token = cur_token;
+ return EOS;
}
- return false;
-}
-
-bool JSTokenizer::normalize_directive(const JSToken prev_tok, const char* lexeme)
-{
- std::string str = lexeme;
+ assert(false);
- if ( str.rfind(";") == std::string::npos )
- str += ";";
-
- return normalize_lexeme(prev_tok, str.c_str());
+ return BAD_TOKEN;
}
-bool JSTokenizer::normalize_undefined(const JSToken, const char* lexeme)
-{ return write_output(lexeme); }
-
-bool JSTokenizer::normalize_lexeme(const JSToken prev_tok, const char* lexeme)
+bool JSTokenizer::unescape(const char* lexeme)
{
- switch( prev_tok )
+ if ( strstr(lexeme, "\\u") )
{
- case PUNCTUATOR:
- case OPERATOR:
- case DIRECTIVE:
- case UNDEFINED:
- return write_output(lexeme);
- break;
-
- case IDENTIFIER:
- case KEYWORD:
- case LITERAL:
- return write_output(" " + std::string(lexeme));
- break;
- }
-
- return false;
-}
-
-bool JSTokenizer::write_output(const std::string& str)
-{
- size_t len = str.size();
- int new_size = *bytes_copied + len;
-
- if ( new_size >= 0 and new_size <= dstlen )
- memcpy((char*) dstbuf, (const char*)str.c_str(), len);
- else
+ const std::string unescaped_lex = unescape_unicode(lexeme);
+ switch_to_temporal(unescaped_lex);
return false;
+ }
- dstbuf += len;
- *bytes_copied = new_size;
return true;
}
-
-void JSTokenizer::update_ptr()
-{ *ptr += yyin.tellg(); }
-
using namespace snort;
-#define NORM_DEPTH 65535
-
-#define NORMALIZE(srcbuf, expected) \
- char dstbuf[sizeof(expected)]; \
- int bytes_copied; \
- const char* ptr = srcbuf; \
- JSNormState state; \
- state.norm_depth = NORM_DEPTH; \
- state.alerts = 0; \
- int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), \
- dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, state);
-
-#define VALIDATE(srcbuf, expected) \
- CHECK(ret == 0); \
- CHECK((ptr - srcbuf) == sizeof(srcbuf)); \
- CHECK(bytes_copied == sizeof(expected) - 1); \
- CHECK(!memcmp(dstbuf, expected, bytes_copied));
-
-#define VALIDATE_FAIL(srcbuf, expected, ret_code, ptr_offset) \
- CHECK(ret == ret_code); \
- CHECK((ptr - srcbuf) == ptr_offset); \
- CHECK(bytes_copied == sizeof(expected) - 1); \
- CHECK(!memcmp(dstbuf, expected, bytes_copied));
-
-#define VALIDATE_ALERT(alert) \
- CHECK(state.alerts & alert);
+#define DEPTH 65535
+
+#define NORMALIZE(src, expected) \
+ char dst[sizeof(expected)]; \
+ JSNormalizer norm; \
+ norm.set_depth(DEPTH); \
+ auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst)); \
+ const char* ptr = norm.get_src_next(); \
+ int act_len = norm.get_dst_next() - dst; \
+
+#define VALIDATE(src, expected) \
+ CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
+ CHECK((ptr - src) == sizeof(src)); \
+ CHECK(act_len == sizeof(expected) - 1); \
+ CHECK(!memcmp(dst, expected, act_len));
+
+#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset) \
+ CHECK(ret == ret_code); \
+ CHECK((ptr - src) == ptr_offset); \
+ CHECK(act_len == sizeof(expected) - 1); \
+ CHECK(!memcmp(dst, expected, act_len));
+
+#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \
+ { \
+ JSNormalizer norm; \
+ norm.set_depth(depth); \
+ ret = norm.normalize(src, src_len, dst, dst_len); \
+ ptr = norm.get_src_next(); \
+ len = norm.get_dst_next() - dst; \
+ } \
// ClamAV test cases
static const char clamav_buf0[] =
SECTION("test_case_14")
{
NORMALIZE(clamav_buf14, clamav_expected14);
+ // trailing \0 is included as a part of the string
+ // to utilize available macros we alter the read length
+ act_len -= 1;
VALIDATE(clamav_buf14, clamav_expected14);
}
}
}
SECTION("directives")
{
- const char srcbuf0[] = "'use strict'\nvar a = 1;";
- const char srcbuf1[] = "\"use strict\"\nvar a = 1;";
- const char srcbuf2[] = "'use strict';var a = 1;";
- const char srcbuf3[] = "\"use strict\";var a = 1;";
- const char srcbuf4[] = "var a = 1 'use strict';";
+ const char src0[] = "'use strict'\nvar a = 1;";
+ const char src1[] = "\"use strict\"\nvar a = 1;";
+ const char src2[] = "'use strict';var a = 1;";
+ const char src3[] = "\"use strict\";var a = 1;";
+ const char src4[] = "var a = 1 'use strict';";
+
const char expected0[] = "'use strict';var a=1;";
const char expected1[] = "\"use strict\";var a=1;";
const char expected2[] = "var a=1 'use strict';";
- char dstbuf0[sizeof(expected0)];
- char dstbuf1[sizeof(expected1)];
- char dstbuf2[sizeof(expected0)];
- char dstbuf3[sizeof(expected1)];
- char dstbuf4[sizeof(expected2)];
- int bytes_copied0, bytes_copied1, bytes_copied2, bytes_copied3, bytes_copied4;
- const char* ptr0 = srcbuf0;
- const char* ptr1 = srcbuf1;
- const char* ptr2 = srcbuf2;
- const char* ptr3 = srcbuf3;
- const char* ptr4 = srcbuf4;
- JSNormState state;
- state.norm_depth = NORM_DEPTH;
- state.alerts = 0;
-
- int ret0 = JSNormalizer::normalize(srcbuf0, sizeof(srcbuf0), dstbuf0, sizeof(dstbuf0),
- &ptr0, &bytes_copied0, state);
- int ret1 = JSNormalizer::normalize(srcbuf1, sizeof(srcbuf1), dstbuf1, sizeof(dstbuf1),
- &ptr1, &bytes_copied1, state);
- int ret2 = JSNormalizer::normalize(srcbuf2, sizeof(srcbuf2), dstbuf2, sizeof(dstbuf2),
- &ptr2, &bytes_copied2, state);
- int ret3 = JSNormalizer::normalize(srcbuf3, sizeof(srcbuf3), dstbuf3, sizeof(dstbuf3),
- &ptr3, &bytes_copied3, state);
- int ret4 = JSNormalizer::normalize(srcbuf4, sizeof(srcbuf4), dstbuf4, sizeof(dstbuf4),
- &ptr4, &bytes_copied4, state);
-
- CHECK(ret0 == 0);
- CHECK((ptr0 - srcbuf0) == sizeof(srcbuf0));
- CHECK(bytes_copied0 == sizeof(expected0) - 1);
- CHECK(!memcmp(dstbuf0, expected0, bytes_copied0));
-
- CHECK(ret1 == 0);
- CHECK((ptr1 - srcbuf1) == sizeof(srcbuf1));
- CHECK(bytes_copied1 == sizeof(expected1) - 1);
- CHECK(!memcmp(dstbuf1, expected1, bytes_copied1));
-
- CHECK(ret2 == 0);
- CHECK((ptr2 - srcbuf2) == sizeof(srcbuf2));
- CHECK(bytes_copied2 == sizeof(expected0) - 1);
- CHECK(!memcmp(dstbuf2, expected0, bytes_copied2));
-
- CHECK(ret3 == 0);
- CHECK((ptr3 - srcbuf3) == sizeof(srcbuf3));
- CHECK(bytes_copied3 == sizeof(expected1) - 1);
- CHECK(!memcmp(dstbuf3, expected1, bytes_copied3));
-
- CHECK(ret4 == 0);
- CHECK((ptr4 - srcbuf4) == sizeof(srcbuf4));
- CHECK(bytes_copied4 == sizeof(expected2) - 1);
- CHECK(!memcmp(dstbuf4, expected2, bytes_copied4));
+
+ char dst0[sizeof(expected0)];
+ char dst1[sizeof(expected1)];
+ char dst2[sizeof(expected0)];
+ char dst3[sizeof(expected1)];
+ char dst4[sizeof(expected2)];
+
+ int ret0, ret1, ret2, ret3, ret4;
+ const char *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
+ int act_len0, act_len1, act_len2, act_len3, act_len4;
+
+ NORMALIZE_L(src0, sizeof(src0), dst0, sizeof(dst0), DEPTH, ret0, ptr0, act_len0);
+ NORMALIZE_L(src1, sizeof(src1), dst1, sizeof(dst1), DEPTH, ret1, ptr1, act_len1);
+ NORMALIZE_L(src2, sizeof(src2), dst2, sizeof(dst2), DEPTH, ret2, ptr2, act_len2);
+ NORMALIZE_L(src3, sizeof(src3), dst3, sizeof(dst3), DEPTH, ret3, ptr3, act_len3);
+ NORMALIZE_L(src4, sizeof(src4), dst4, sizeof(dst4), DEPTH, ret4, ptr4, act_len4);
+
+ CHECK(ret0 == JSTokenizer::SCRIPT_CONTINUE);
+ CHECK((ptr0 - src0) == sizeof(src0));
+ CHECK(act_len0 == sizeof(expected0) - 1);
+ CHECK(!memcmp(dst0, expected0, act_len0));
+
+ CHECK(ret1 == JSTokenizer::SCRIPT_CONTINUE);
+ CHECK((ptr1 - src1) == sizeof(src1));
+ CHECK(act_len1 == sizeof(expected1) - 1);
+ CHECK(!memcmp(dst1, expected1, act_len1));
+
+ CHECK(ret2 == JSTokenizer::SCRIPT_CONTINUE);
+ CHECK((ptr2 - src2) == sizeof(src2));
+ CHECK(act_len2 == sizeof(expected0) - 1);
+ CHECK(!memcmp(dst2, expected0, act_len2));
+
+ CHECK(ret3 == JSTokenizer::SCRIPT_CONTINUE);
+ CHECK((ptr3 - src3) == sizeof(src3));
+ CHECK(act_len3 == sizeof(expected1) - 1);
+ CHECK(!memcmp(dst3, expected1, act_len3));
+
+ CHECK(ret4 == JSTokenizer::SCRIPT_CONTINUE);
+ CHECK((ptr4 - src4) == sizeof(src4));
+ CHECK(act_len4 == sizeof(expected2) - 1);
+ CHECK(!memcmp(dst4, expected2, act_len4));
}
SECTION("punctuators")
{
"var a=b% -c;"
"var a=b+ -c;";
+// In the following cases:
+// a reading cursor will be after the literal
+// a malformed literal is not present in the output
+
static const char syntax_cases_buf15[] =
- "var str1 = 'abc\u2028 def' ;\n"
- "var str2 = 'abc\u2029 def' ;\n\r";
+ "var invalid_str = 'abc\u2028 def' ;\n";
static const char syntax_cases_expected15[] =
- "var str1='abc\u2028 def';"
- "var str2='abc\u2029 def';";
+ "var invalid_str='abc";
static const char syntax_cases_buf16[] =
"var invalid_str = \"abc\n def\"";
static const char syntax_cases_expected16[] =
- "var invalid_str=\"abc\"def \"";
+ "var invalid_str=\"abc";
static const char syntax_cases_buf17[] =
"var invalid_str = 'abc\r def'";
static const char syntax_cases_expected17[] =
- "var invalid_str='abc'def '";
+ "var invalid_str='abc";
static const char syntax_cases_buf18[] =
"var invalid_str = 'abc\\\n\r def'";
static const char syntax_cases_expected18[] =
- "var invalid_str='abc'def '";
+ "var invalid_str='abc";
static const char syntax_cases_buf19[] =
"var invalid_re = /abc\\\n def/";
static const char syntax_cases_expected19[] =
- "var invalid_re=/abc/def/";
+ "var invalid_re=/abc";
static const char syntax_cases_buf20[] =
"var invalid_re = /abc\\\r\n def/";
static const char syntax_cases_expected20[] =
- "var invalid_re=/abc/def/";
+ "var invalid_re=/abc";
+
+static const char syntax_cases_buf21[] =
+ "var invalid_str = 'abc\u2029 def' ;\n\r";
+
+static const char syntax_cases_expected21[] =
+ "var invalid_str='abc";
TEST_CASE("syntax cases", "[JSNormalizer]")
{
NORMALIZE(syntax_cases_buf14, syntax_cases_expected14);
VALIDATE(syntax_cases_buf14, syntax_cases_expected14);
}
- SECTION("LS and PS chars within literal")
+}
+
+TEST_CASE("bad tokens", "[JSNormalizer]")
+{
+ SECTION("LS chars within literal")
{
NORMALIZE(syntax_cases_buf15, syntax_cases_expected15);
- VALIDATE(syntax_cases_buf15, syntax_cases_expected15);
+ VALIDATE_FAIL(syntax_cases_buf15, syntax_cases_expected15, JSTokenizer::BAD_TOKEN, 25);
+ }
+ SECTION("PS chars within literal")
+ {
+ NORMALIZE(syntax_cases_buf21, syntax_cases_expected21);
+ VALIDATE_FAIL(syntax_cases_buf21, syntax_cases_expected21, JSTokenizer::BAD_TOKEN, 25);
}
SECTION("explicit LF within literal")
{
NORMALIZE(syntax_cases_buf16, syntax_cases_expected16);
- VALIDATE(syntax_cases_buf16, syntax_cases_expected16);
+ VALIDATE_FAIL(syntax_cases_buf16, syntax_cases_expected16, JSTokenizer::BAD_TOKEN, 23);
}
SECTION("explicit CR within literal")
{
NORMALIZE(syntax_cases_buf17, syntax_cases_expected17);
- VALIDATE(syntax_cases_buf17, syntax_cases_expected17);
+ VALIDATE_FAIL(syntax_cases_buf17, syntax_cases_expected17, JSTokenizer::BAD_TOKEN, 23);
}
SECTION("escaped LF-CR sequence within literal")
{
NORMALIZE(syntax_cases_buf18, syntax_cases_expected18);
- VALIDATE(syntax_cases_buf18, syntax_cases_expected18);
+ VALIDATE_FAIL(syntax_cases_buf18, syntax_cases_expected18, JSTokenizer::BAD_TOKEN, 25);
}
SECTION("escaped LF within regex literal")
{
NORMALIZE(syntax_cases_buf19, syntax_cases_expected19);
- VALIDATE(syntax_cases_buf19, syntax_cases_expected19);
+ VALIDATE_FAIL(syntax_cases_buf19, syntax_cases_expected19, JSTokenizer::BAD_TOKEN, 23);
}
SECTION("escaped CR-LF within regex literal")
{
NORMALIZE(syntax_cases_buf20, syntax_cases_expected20);
- VALIDATE(syntax_cases_buf20, syntax_cases_expected20);
+ VALIDATE_FAIL(syntax_cases_buf20, syntax_cases_expected20, JSTokenizer::BAD_TOKEN, 23);
}
}
-TEST_CASE("norm_depth is specified", "[JSNormalizer]")
+TEST_CASE("endings", "[JSNormalizer]")
{
- const char srcbuf[] = "var abc = 123;\n\r";
- const char expected[] = "var abc";
- char dstbuf[7];
- int bytes_copied;
- const char* ptr = srcbuf;
- JSNormState state;
- state.norm_depth = 7;
- state.alerts = 0;
- int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
- &bytes_copied, state);
-
- CHECK(ret == 0);
- CHECK(bytes_copied == sizeof(expected) - 1);
- CHECK(!memcmp(dstbuf, expected, bytes_copied));
-}
+ SECTION("script closing tag is present", "[JSNormalizer]")
+ {
+ const char src[] =
+ "var a = 1 ;\n" // 12 bytes
+ "var b = 2 ;\n" // 12 bytes
+ "</script>\n" // ptr_offset is here = 33
+ "var c = 3 ;\n";
+ const int ptr_offset = 33;
+ const char expected[] = "var a=1;var b=2;";
+ char dst[sizeof(expected)];
+ int act_len;
+ const char* ptr;
+ int ret;
+
+ NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), DEPTH, ret, ptr, act_len);
+
+ CHECK(ret == JSTokenizer::SCRIPT_ENDED);
+ CHECK(act_len == sizeof(expected) - 1);
+ CHECK((ptr - src) == ptr_offset);
+ CHECK(!memcmp(dst, expected, act_len));
+ }
+ SECTION("depth reached", "[JSNormalizer]")
+ {
+ const char src[] = "var abc = 123;\n\r";
+ const char src2[] = "var foo = 321;\n\r";
+ const char expected[] = "var abc";
+ char dst[sizeof(src)];
+ int act_len;
+ const char* ptr;
+ int ret;
-TEST_CASE("tag script end is specified", "[JSNormalizer]")
-{
- const char srcbuf[] =
- "var a = 1 ;\n" // 12 bytes
- "var b = 2 ;\n" // 12 bytes --> ptr_offset = 24
- "</script>\n"
- "var c = 3 ;\n";
- const int ptr_offset = 24;
- const char expected[] = "var a=1;var b=2;";
- char dstbuf[sizeof(expected)];
- int bytes_copied;
- const char* ptr = srcbuf;
- JSNormState state;
- state.norm_depth = NORM_DEPTH;
- state.alerts = 0;
- int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
- &bytes_copied, state);
-
- CHECK(ret == 0);
- CHECK(bytes_copied == sizeof(expected) - 1);
- CHECK((ptr - srcbuf) == ptr_offset);
- CHECK(!memcmp(dstbuf, expected, bytes_copied));
-}
+ JSNormalizer norm;
-// Tests for JavaScript parsing errors and anomalies
+ norm.set_depth(7);
+ ret = norm.normalize(src, sizeof(src), dst, sizeof(dst));
+ ptr = norm.get_src_next();
+ act_len = norm.get_dst_next() - dst;
-TEST_CASE("parsing errors", "[JSNormalizer]")
-{
- SECTION("dstlen is too small")
+ CHECK(ret == JSTokenizer::EOS);
+ CHECK(ptr == src + 7);
+ CHECK(act_len == sizeof(expected) - 1);
+ CHECK(!memcmp(dst, expected, act_len));
+
+ ret = norm.normalize(src2, sizeof(src2), dst, sizeof(dst));
+ ptr = norm.get_src_next();
+ act_len = norm.get_dst_next() - dst;
+
+ CHECK(ret == JSTokenizer::EOS);
+ CHECK(ptr == src2 + sizeof(src2));
+ CHECK(act_len == 0);
+ }
+ SECTION("dst size is less then src size")
{
- const char srcbuf[] = "var abc = 123;\n\r";
- const char expected[] = "var abc";
- char dstbuf[7];
- int bytes_copied;
- const char* ptr = srcbuf;
- JSNormState state;
- state.norm_depth = NORM_DEPTH;
- state.alerts = 0;
- int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
- &bytes_copied, state);
-
- CHECK(ret == 1);
- CHECK(bytes_copied == sizeof(expected) - 1);
- CHECK(!memcmp(dstbuf, expected, bytes_copied));
+ const char src[] = "var abc = 123;\n\r";
+ const char expected[sizeof(src)] = "var abc";
+ char dst[7];
+ int act_len;
+ const char* ptr;
+ int ret;
+
+ NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), DEPTH, ret, ptr, act_len);
+
+ CHECK(ret == JSTokenizer::SCRIPT_CONTINUE);
+ CHECK(ptr == src + sizeof(src));
+ CHECK(act_len == 12); // size of normalized src
+ CHECK(!memcmp(dst, expected, sizeof(dst)));
}
}
static const char unexpected_tag_buf1[] =
"var a = 1;\n"
"<script type=application/javascript>\n"
- "var b = 2;\r\n";;
+ "var b = 2;\r\n";
static const char unexpected_tag_expected1[] =
"var a=1;";
"var b = 2;\r\n";
static const char unexpected_tag_expected2[] =
- "var a=1;var str=";
+ "var a=1;var str='";
static const char unexpected_tag_buf3[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected3[] =
- "var a=1;var str=";
+ "var a=1;var str='something ";
static const char unexpected_tag_buf4[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected4[] =
- "var a=1;var str=";
+ "var a=1;var str='something ";
static const char unexpected_tag_buf5[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected5[] =
- "var a=1;var str=";
+ "var a=1;var str='";
static const char unexpected_tag_buf6[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected6[] =
- "var a=1;var str=";
+ "var a=1;var str='something ";
static const char unexpected_tag_buf7[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected7[] =
- "var a=1;var str=";
+ "var a=1;var str='something ";
static const char unexpected_tag_buf8[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected8[] =
- "var a=1;var str='something \\<script\\> something';var b=2;";
+ "var a=1;var str='something \\";
static const char unexpected_tag_buf9[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected23[] =
- "var a=1;var str=";
+ "var a=1;var str='script somescript /script something ";
static const char unexpected_tag_buf24[] =
"var a = 1;\n"
"var b = 2;\r\n";
static const char unexpected_tag_expected24[] =
- "var a=1;var str=";
+ "var a=1;var str='something ";
-TEST_CASE("unexpected script tag alert", "[JSNormalizer]")
+TEST_CASE("nested script tags", "[JSNormalizer]")
{
- const int ret_code = 1;
SECTION("explicit open tag - simple")
{
NORMALIZE(unexpected_tag_buf0, unexpected_tag_expected0);
- VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, ret_code, 18);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, JSTokenizer::OPENING_TAG, 18);
}
SECTION("explicit open tag - complex")
{
NORMALIZE(unexpected_tag_buf1, unexpected_tag_expected1);
- VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, ret_code, 18);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, JSTokenizer::OPENING_TAG, 18);
}
SECTION("open tag within literal - start")
{
NORMALIZE(unexpected_tag_buf2, unexpected_tag_expected2);
- VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, ret_code, 41);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, JSTokenizer::OPENING_TAG, 29);
}
SECTION("open tag within literal - mid")
{
NORMALIZE(unexpected_tag_buf3, unexpected_tag_expected3);
- VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, ret_code, 51);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, JSTokenizer::OPENING_TAG, 39);
}
SECTION("open tag within literal - end")
{
NORMALIZE(unexpected_tag_buf4, unexpected_tag_expected4);
- VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, ret_code, 41);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, JSTokenizer::OPENING_TAG, 39);
}
SECTION("close tag within literal - start")
{
NORMALIZE(unexpected_tag_buf5, unexpected_tag_expected5);
- VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, ret_code, 42);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, JSTokenizer::CLOSING_TAG, 31);
}
SECTION("close tag within literal - mid")
{
NORMALIZE(unexpected_tag_buf6, unexpected_tag_expected6);
- VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, ret_code, 52);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, JSTokenizer::CLOSING_TAG, 41);
}
SECTION("close tag within literal - end")
{
NORMALIZE(unexpected_tag_buf7, unexpected_tag_expected7);
- VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, ret_code, 42);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, JSTokenizer::CLOSING_TAG, 41);
}
SECTION("open tag within literal - escaped")
{
NORMALIZE(unexpected_tag_buf8, unexpected_tag_expected8);
- VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8);
+ VALIDATE_FAIL(unexpected_tag_buf8, unexpected_tag_expected8, JSTokenizer::OPENING_TAG, 40);
}
SECTION("close tag within literal - escaped")
{
SECTION("open tag within single-line comment - start")
{
NORMALIZE(unexpected_tag_buf10, unexpected_tag_expected10);
- VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, ret_code, 32);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, JSTokenizer::OPENING_TAG, 20);
}
SECTION("open tag within single-line comment - mid")
{
NORMALIZE(unexpected_tag_buf11, unexpected_tag_expected11);
- VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, ret_code, 42);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, JSTokenizer::OPENING_TAG, 30);
}
SECTION("open tag within single-line comment - end")
{
NORMALIZE(unexpected_tag_buf12, unexpected_tag_expected12);
- VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, ret_code, 32);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, JSTokenizer::OPENING_TAG, 30);
}
SECTION("open tag within multi-line comment - start")
{
NORMALIZE(unexpected_tag_buf13, unexpected_tag_expected13);
- VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, ret_code, 33);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, JSTokenizer::OPENING_TAG, 20);
}
SECTION("open tag within multi-line comment - mid")
{
NORMALIZE(unexpected_tag_buf14, unexpected_tag_expected14);
- VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, ret_code, 43);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, JSTokenizer::OPENING_TAG, 30);
}
SECTION("open tag within multi-line comment - end")
{
NORMALIZE(unexpected_tag_buf15, unexpected_tag_expected15);
- VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, ret_code, 33);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, JSTokenizer::OPENING_TAG, 30);
}
SECTION("close tag within single-line comment - start")
{
NORMALIZE(unexpected_tag_buf16, unexpected_tag_expected16);
- VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, ret_code, 33);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, JSTokenizer::CLOSING_TAG, 22);
}
SECTION("close tag within single-line comment - mid")
{
NORMALIZE(unexpected_tag_buf17, unexpected_tag_expected17);
- VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, ret_code, 50);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, JSTokenizer::CLOSING_TAG, 34);
}
SECTION("close tag within single-line comment - end")
{
NORMALIZE(unexpected_tag_buf18, unexpected_tag_expected18);
- VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, ret_code, 33);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, JSTokenizer::CLOSING_TAG, 32);
}
SECTION("close tag within multi-line comment - start")
{
NORMALIZE(unexpected_tag_buf19, unexpected_tag_expected19);
- VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, ret_code, 34);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, JSTokenizer::CLOSING_TAG, 22);
}
SECTION("close tag within multi-line comment - mid")
{
NORMALIZE(unexpected_tag_buf20, unexpected_tag_expected20);
- VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, ret_code, 44);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, JSTokenizer::CLOSING_TAG, 32);
}
SECTION("close tag within multi-line comment - end")
{
NORMALIZE(unexpected_tag_buf21, unexpected_tag_expected21);
- VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, ret_code, 34);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, JSTokenizer::CLOSING_TAG, 32);
}
SECTION("multiple patterns - not matched")
{
SECTION("multiple patterns - matched")
{
NORMALIZE(unexpected_tag_buf23, unexpected_tag_expected23);
- VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, ret_code, 67);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, JSTokenizer::OPENING_TAG, 65);
}
SECTION("mixed lower and upper case")
{
NORMALIZE(unexpected_tag_buf24, unexpected_tag_expected24);
- VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, ret_code, 41);
- VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 39);
}
}
-