From: Mike Stepanek (mstepane) Date: Fri, 28 May 2021 15:25:37 +0000 (+0000) Subject: Merge pull request #2904 in SNORT/snort3 from ~OSHUMEIK/snort3:js_mpdu to master X-Git-Tag: 3.1.6.0~42 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b9f4b67a32d4717dd241de186375138b5bd0946f;p=thirdparty%2Fsnort3.git Merge pull request #2904 in SNORT/snort3 from ~OSHUMEIK/snort3:js_mpdu to master Squashed commit of the following: commit 33f6bc94d027eb9db5680b3bb3eeba0a2944f8c7 Author: Oleksii Shumeiko Date: Mon May 24 11:22:15 2021 +0300 http_inspect: support partial detect for Javascripts Normalizer context is allocated and freed once per PDU inspection. As a partial flush happens at the closing script tag, it enables context reusage in the following normalizations. Chunked data is supported as well (by resetting the context). commit 89043ad68d74323cfb2d4a64a6558929dae9b534 Author: Oleksii Shumeiko Date: Tue May 18 11:15:31 2021 +0300 utils: refactor JSTokenizer Parsing is done mostly by the lexer's rules. Temporary buffer (for unicode) reworked. commit f0952f4f9565e2f61c0bbcd76bf06474147ab90c Author: Oleksii Shumeiko Date: Thu May 13 16:02:47 2021 +0300 http_inspect: extend built-in alerts for Javascript processing Alerts follow: - nested opening tag - closing tag seen in an unexpected place - bad token happened commit e6d50626331e7bc8d30a07905ef8c8341bc0d1c7 Author: Oleksii Shumeiko Date: Thu Apr 29 14:38:38 2021 +0300 utils: rework JSNormalizer class JSNormalizer can be instantiated meaningfully. It returns the state via the return codes. JSNormalizer context is placed on the flow (if needed). Normalization depth is the property of the context. Flow memory usage is updated. UNIT_TEST_BUILD macro added. The inline script count is increased upon the opening tag seeing. commit 6a8cad0fc881f94318b4679396e1364cb82ba012 Author: Oleksii Shumeiko Date: Wed Apr 21 14:11:24 2021 +0300 http_inspect: improve MPSE in HttpJsNorm (script start conditions) Naming and code style. Rework match callback functions. Combine some patterns in the opening tag into a single search. Allocate the output buffer only if it is needed. --- diff --git a/cmake/macros.cmake b/cmake/macros.cmake index b21a07907..df6030e71 100644 --- a/cmake/macros.cmake +++ b/cmake/macros.cmake @@ -34,6 +34,7 @@ function (add_cpputest testname) set(multiValueArgs SOURCES LIBS) cmake_parse_arguments(CppUTest "" "" "${multiValueArgs}" ${ARGN}) add_executable(${testname} EXCLUDE_FROM_ALL ${testname}.cc ${CppUTest_SOURCES}) + target_compile_options(${testname} PRIVATE "-DUNIT_TEST_BUILD") target_include_directories(${testname} PRIVATE ${CPPUTEST_INCLUDE_DIR}) target_link_libraries(${testname} ${CPPUTEST_LIBRARIES} ${CppUTest_LIBS}) add_test(${testname} ${testname}) diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index d3cf7817b..e3af9347e 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -264,7 +264,9 @@ enum Infraction INF_MULTIPLE_HOST_HDRS, INF_HTTP2_SETTINGS, INF_UPGRADE_HEADER_HTTP2, - INF_JS_UNEXPECTED_TAG, + INF_JS_BAD_TOKEN, + INF_JS_OPENING_TAG, + INF_JS_CLOSING_TAG, INF__MAX_VALUE }; @@ -323,7 +325,9 @@ enum EventSid EVENT_PDF_UNSUP_COMP_TYPE = 115, EVENT_PDF_CASC_COMP = 116, EVENT_PDF_PARSE_FAILURE = 117, - EVENT_JS_UNEXPECTED_TAG = 118, + EVENT_JS_BAD_TOKEN = 118, + EVENT_JS_OPENING_TAG = 119, + EVENT_JS_CLOSING_TAG = 120, EVENT_LOSS_OF_SYNC = 201, EVENT_CHUNK_ZEROS = 202, diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc index 61eb83350..a986dac88 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.cc +++ b/src/service_inspectors/http_inspect/http_flow_data.cc @@ -24,6 +24,7 @@ #include "http_flow_data.h" #include "decompress/file_decomp.h" +#include "utils/js_normalizer.h" #include "http_cutter.h" #include "http_common.h" @@ -79,6 +80,14 @@ HttpFlowData::~HttpFlowData() if (HttpModule::get_peg_counts(PEG_CONCURRENT_SESSIONS) > 0) HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS); +#ifndef UNIT_TEST_BUILD + if (js_normalizer) + { + update_deallocations(JSNormalizer::size()); + delete js_normalizer; + } +#endif + for (int k=0; k <= 1; k++) { delete infractions[k]; @@ -88,6 +97,8 @@ HttpFlowData::~HttpFlowData() update_deallocations(partial_buffer_length[k]); delete[] partial_detect_buffer[k]; update_deallocations(partial_detect_length[k]); + delete[] js_detect_buffer[k]; + update_deallocations(js_detect_length[k]); HttpTransaction::delete_transaction(transaction[k], nullptr); delete cutter[k]; if (compress_stream[k] != nullptr) @@ -204,6 +215,32 @@ void HttpFlowData::garbage_collect() } } +#ifndef UNIT_TEST_BUILD +snort::JSNormalizer& HttpFlowData::acquire_js_ctx() +{ + if (js_normalizer) + return *js_normalizer; + + js_normalizer = new JSNormalizer(); + update_allocations(JSNormalizer::size()); + + return *js_normalizer; +} + +void HttpFlowData::release_js_ctx() +{ + if (!js_normalizer) + return; + + update_deallocations(JSNormalizer::size()); + delete js_normalizer; + js_normalizer = nullptr; +} +#else +snort::JSNormalizer& HttpFlowData::acquire_js_ctx() { return *js_normalizer; } +void HttpFlowData::release_js_ctx() {} +#endif + bool HttpFlowData::add_to_pipeline(HttpTransaction* latest) { if (pipeline == nullptr) diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h index 617775c82..ffcb7aece 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.h +++ b/src/service_inspectors/http_inspect/http_flow_data.h @@ -39,6 +39,11 @@ class HttpMsgSection; class HttpCutter; class HttpQueryParser; +namespace snort +{ +class JSNormalizer; +} + class HttpFlowData : public snort::FlowData { public: @@ -49,6 +54,7 @@ public: size_t size_of() override; friend class HttpInspect; + friend class HttpJsNorm; friend class HttpMsgSection; friend class HttpMsgStart; friend class HttpMsgRequest; @@ -169,6 +175,8 @@ private: uint8_t* partial_detect_buffer[2] = { nullptr, nullptr }; uint32_t partial_detect_length[2] = { 0, 0 }; uint32_t partial_js_detect_length[2] = { 0, 0 }; + uint8_t* js_detect_buffer[2] = { nullptr, nullptr }; + uint32_t js_detect_length[2] = { 0, 0 }; int32_t status_code_num = HttpCommon::STAT_NOT_PRESENT; HttpEnums::VersionId version_id[2] = { HttpEnums::VERS__NOT_PRESENT, HttpEnums::VERS__NOT_PRESENT }; @@ -177,6 +185,12 @@ private: bool cutover_on_clear = false; bool ssl_search_abandoned = false; + // *** HttpJsNorm + snort::JSNormalizer* js_normalizer = nullptr; + + snort::JSNormalizer& acquire_js_ctx(); + void release_js_ctx(); + // *** Transaction management including pipelining static const int MAX_PIPELINE = 100; // requests seen - responses seen <= MAX_PIPELINE HttpTransaction* transaction[2] = { nullptr, nullptr }; diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index 8aad96222..90e7666f6 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -23,159 +23,164 @@ #include "http_js_norm.h" -#include "utils/js_norm_state.h" #include "utils/js_normalizer.h" #include "utils/safec.h" #include "utils/util_jsnorm.h" +#include "http_common.h" #include "http_enum.h" using namespace HttpEnums; using namespace snort; -HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) : - uri_param(uri_param_), javascript_search_mpse(nullptr), - htmltype_search_mpse(nullptr) +HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) : + uri_param(uri_param_), + normalization_depth(normalization_depth_), + mpse_otag(nullptr), + mpse_attr(nullptr), + mpse_type(nullptr) {} HttpJsNorm::~HttpJsNorm() { - delete javascript_search_mpse; - delete js_src_attr_search_mpse; - delete htmltype_search_mpse; + delete mpse_otag; + delete mpse_attr; + delete mpse_type; } void HttpJsNorm::configure() { - if ( configure_once ) + if (configure_once) return; - javascript_search_mpse = new SearchTool; - js_src_attr_search_mpse = new SearchTool; - htmltype_search_mpse = new SearchTool; - - javascript_search_mpse->add(script_start, script_start_length, JS_JAVASCRIPT); - javascript_search_mpse->prep(); - - js_src_attr_search_mpse->add(script_src_attr, script_src_attr_length, JS_ATTR_SRC); - js_src_attr_search_mpse->prep(); - - struct HiSearchToken - { - const char* name; - int name_len; - int search_id; - }; - - const HiSearchToken html_patterns[] = - { - { "JAVASCRIPT", 10, HTML_JS }, - { "ECMASCRIPT", 10, HTML_EMA }, - { "VBSCRIPT", 8, HTML_VB }, - { nullptr, 0, 0 } - }; - - for (const HiSearchToken* tmp = &html_patterns[0]; tmp->name != nullptr; tmp++) - { - htmltype_search_mpse->add(tmp->name, tmp->name_len, tmp->search_id); - } - htmltype_search_mpse->prep(); + mpse_otag = new SearchTool; + mpse_attr = new SearchTool; + mpse_type = new SearchTool; + + static constexpr const char* otag_start = "add(otag_start, strlen(otag_start), 0); + mpse_attr->add(attr_gt, strlen(attr_gt), AID_GT); + mpse_attr->add(attr_src, strlen(attr_src), AID_SRC); + mpse_attr->add(attr_js1, strlen(attr_js1), AID_JS); + mpse_attr->add(attr_js2, strlen(attr_js2), AID_ECMA); + mpse_attr->add(attr_vb, strlen(attr_vb), AID_VB); + mpse_type->add(attr_js1, strlen(attr_js1), AID_JS); + mpse_type->add(attr_js2, strlen(attr_js2), AID_ECMA); + mpse_type->add(attr_vb, strlen(attr_vb), AID_VB); + + mpse_otag->prep(); + mpse_attr->prep(); + mpse_type->prep(); configure_once = true; } -void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions, - HttpEventGen* events, int64_t js_normalization_depth) const +void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, + HttpInfractions* infractions, HttpFlowData* ssn) const { - bool js_present = false; - int index = 0; const char* ptr = (const char*)input.start(); const char* const end = ptr + input.length(); - uint8_t* buffer = new uint8_t[input.length()]; + HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER]; - JSNormState state; - state.norm_depth = js_normalization_depth; - state.alerts = 0; + char* buffer = nullptr; + char* dst = nullptr; + const char* dst_end = nullptr; + + bool script_continue = alive_ctx(ssn); while (ptr < end) { - int bytes_copied = 0; - int mindex; - - // Search for beginning of a javascript - if (javascript_search_mpse->find(ptr, end-ptr, search_js_found, false, &mindex) > 0) + if (!script_continue) { - const char* js_start = ptr + mindex; - const char* const angle_bracket = - (const char*)SnortStrnStr(js_start, end - js_start, ">"); - if (angle_bracket == nullptr || (end - angle_bracket) == 0) + if (!mpse_otag->find(ptr, end - ptr, match_otag, false, &ptr)) + break; + if (ptr >= end) break; - bool type_js = false; - bool external_js = false; - if (angle_bracket > js_start) - { - int mid; - const int script_found = htmltype_search_mpse->find( - js_start, (angle_bracket-js_start), search_html_found, false, &mid); - - external_js = is_external_script(js_start, angle_bracket); + MatchContext sctx = {ptr, true, false}; - js_start = angle_bracket + 1; - if (script_found > 0) - { - switch (mid) - { - case HTML_JS: - js_present = true; - type_js = true; - break; - default: - type_js = false; - break; - } - } - else - { - // if no type or language is found we assume it is a javascript - js_present = true; - type_js = true; - } - } - // Save before the \n" // ptr_offset is here = 33 + "var c = 3 ;\n"; + const int ptr_offset = 33; + const char expected[] = "var a=1;var b=2;"; + char dst[sizeof(expected)]; + int act_len; + const char* ptr; + int ret; + + NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), DEPTH, ret, ptr, act_len); + + CHECK(ret == JSTokenizer::SCRIPT_ENDED); + CHECK(act_len == sizeof(expected) - 1); + CHECK((ptr - src) == ptr_offset); + CHECK(!memcmp(dst, expected, act_len)); + } + SECTION("depth reached", "[JSNormalizer]") + { + const char src[] = "var abc = 123;\n\r"; + const char src2[] = "var foo = 321;\n\r"; + const char expected[] = "var abc"; + char dst[sizeof(src)]; + int act_len; + const char* ptr; + int ret; -TEST_CASE("tag script end is specified", "[JSNormalizer]") -{ - const char srcbuf[] = - "var a = 1 ;\n" // 12 bytes - "var b = 2 ;\n" // 12 bytes --> ptr_offset = 24 - "\n" - "var c = 3 ;\n"; - const int ptr_offset = 24; - const char expected[] = "var a=1;var b=2;"; - char dstbuf[sizeof(expected)]; - int bytes_copied; - const char* ptr = srcbuf; - JSNormState state; - state.norm_depth = NORM_DEPTH; - state.alerts = 0; - int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr, - &bytes_copied, state); - - CHECK(ret == 0); - CHECK(bytes_copied == sizeof(expected) - 1); - CHECK((ptr - srcbuf) == ptr_offset); - CHECK(!memcmp(dstbuf, expected, bytes_copied)); -} + JSNormalizer norm; -// Tests for JavaScript parsing errors and anomalies + norm.set_depth(7); + ret = norm.normalize(src, sizeof(src), dst, sizeof(dst)); + ptr = norm.get_src_next(); + act_len = norm.get_dst_next() - dst; -TEST_CASE("parsing errors", "[JSNormalizer]") -{ - SECTION("dstlen is too small") + CHECK(ret == JSTokenizer::EOS); + CHECK(ptr == src + 7); + CHECK(act_len == sizeof(expected) - 1); + CHECK(!memcmp(dst, expected, act_len)); + + ret = norm.normalize(src2, sizeof(src2), dst, sizeof(dst)); + ptr = norm.get_src_next(); + act_len = norm.get_dst_next() - dst; + + CHECK(ret == JSTokenizer::EOS); + CHECK(ptr == src2 + sizeof(src2)); + CHECK(act_len == 0); + } + SECTION("dst size is less then src size") { - const char srcbuf[] = "var abc = 123;\n\r"; - const char expected[] = "var abc"; - char dstbuf[7]; - int bytes_copied; - const char* ptr = srcbuf; - JSNormState state; - state.norm_depth = NORM_DEPTH; - state.alerts = 0; - int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr, - &bytes_copied, state); - - CHECK(ret == 1); - CHECK(bytes_copied == sizeof(expected) - 1); - CHECK(!memcmp(dstbuf, expected, bytes_copied)); + const char src[] = "var abc = 123;\n\r"; + const char expected[sizeof(src)] = "var abc"; + char dst[7]; + int act_len; + const char* ptr; + int ret; + + NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), DEPTH, ret, ptr, act_len); + + CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); + CHECK(ptr == src + sizeof(src)); + CHECK(act_len == 12); // size of normalized src + CHECK(!memcmp(dst, expected, sizeof(dst))); } } @@ -896,7 +918,7 @@ static const char unexpected_tag_expected0[] = static const char unexpected_tag_buf1[] = "var a = 1;\n" "