PEG_OTHER_METHOD, PEG_REQUEST_BODY, PEG_CHUNKED, PEG_URI_NORM, PEG_URI_PATH, PEG_URI_CODING,
PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION,
PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY,
- PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_COUNT_MAX };
+ PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_COUNT_MAX };
// Result of scanning by splitter
enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE,
INF_MULTIPLE_HOST_HDRS,
INF_HTTP2_SETTINGS,
INF_UPGRADE_HEADER_HTTP2,
+ INF_JS_UNEXPECTED_TAG,
INF__MAX_VALUE
};
EVENT_PDF_UNSUP_COMP_TYPE = 115,
EVENT_PDF_CASC_COMP = 116,
EVENT_PDF_PARSE_FAILURE = 117,
+ EVENT_JS_UNEXPECTED_TAG = 118,
EVENT_LOSS_OF_SYNC = 201,
EVENT_CHUNK_ZEROS = 202,
#include "http_js_norm.h"
+#include "utils/js_norm_state.h"
#include "utils/js_normalizer.h"
#include "utils/safec.h"
#include "utils/util_jsnorm.h"
HttpJsNorm::~HttpJsNorm()
{
delete javascript_search_mpse;
+ delete js_src_attr_search_mpse;
delete htmltype_search_mpse;
}
return;
javascript_search_mpse = new SearchTool;
+ js_src_attr_search_mpse = new SearchTool;
htmltype_search_mpse = new SearchTool;
javascript_search_mpse->add(script_start, script_start_length, JS_JAVASCRIPT);
javascript_search_mpse->prep();
+ js_src_attr_search_mpse->add(script_src_attr, script_src_attr_length, JS_ATTR_SRC);
+ js_src_attr_search_mpse->prep();
+
struct HiSearchToken
{
const char* name;
configure_once = true;
}
-void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
- int64_t js_normalization_depth) const
+void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
+ HttpEventGen* events, int64_t js_normalization_depth) const
{
bool js_present = false;
int index = 0;
uint8_t* buffer = new uint8_t[input.length()];
+ JSNormState state;
+ state.norm_depth = js_normalization_depth;
+ state.alerts = 0;
+
while (ptr < end)
{
int bytes_copied = 0;
break;
bool type_js = false;
+ bool external_js = false;
if (angle_bracket > js_start)
{
int mid;
const int script_found = htmltype_search_mpse->find(
js_start, (angle_bracket-js_start), search_html_found, false, &mid);
+ external_js = is_external_script(js_start, angle_bracket);
+
js_start = angle_bracket + 1;
if (script_found > 0)
{
}
ptr = js_start;
- if (!type_js)
+ if (!type_js or external_js)
continue;
JSNormalizer::normalize(js_start, (uint16_t)(end-js_start), (char*)buffer+index,
- (uint16_t)(input.length() - index), &ptr, &bytes_copied, js_normalization_depth);
+ (uint16_t)(input.length() - index), &ptr, &bytes_copied, state);
+
+ HttpModule::increment_peg_counts(PEG_JS_INLINE);
index += bytes_copied;
}
}
if (js_present)
+ {
+ if (state.alerts & ALERT_UNEXPECTED_TAG)
+ {
+ *infractions += INF_JS_UNEXPECTED_TAG;
+ events->create_event(EVENT_JS_UNEXPECTED_TAG);
+ }
output.set(index, buffer, true);
+ }
else
delete[] buffer;
}
*((int*) index_ptr) = index - script_start_length;
return 1;
}
+int HttpJsNorm::search_js_src_attr_found(void*, void*, int index, void* index_ptr, void*)
+{
+ *((int*) index_ptr) = index - script_src_attr_length;
+ return 1;
+}
int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
{
*((int*) id_ptr) = (int)(uintptr_t)id;
return 1;
}
+bool HttpJsNorm::is_external_script(const char* it, const char* script_tag_end) const
+{
+ int src_pos;
+
+ while (js_src_attr_search_mpse->find(it, (script_tag_end - it),
+ search_js_src_attr_found, false, &src_pos))
+ {
+ it += (src_pos + script_src_attr_length - 1);
+ while (++it < script_tag_end)
+ {
+ if (*it == ' ')
+ continue;
+ else if (*it == '=')
+ return true;
+ else
+ break;
+ }
+ }
+
+ return false;
+}
+
~HttpJsNorm();
void legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions,
HttpEventGen* events, int max_javascript_whitespaces) const;
- void enhanced_normalize(const Field& input, Field& output,
- int64_t js_normalization_depth) const;
+ void enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
+ HttpEventGen* events, int64_t js_normalization_depth) const;
void configure();
private:
bool configure_once = false;
enum JsSearchId { JS_JAVASCRIPT };
+ enum JsSrcAttrSearchId { JS_ATTR_SRC };
enum HtmlSearchId { HTML_JS, HTML_EMA, HTML_VB };
static constexpr const char* script_start = "<SCRIPT";
static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
+ static constexpr const char* script_src_attr = "SRC";
+ static constexpr int script_src_attr_length = sizeof("SRC") - 1;
const HttpParaList::UriParam& uri_param;
snort::SearchTool* javascript_search_mpse;
+ snort::SearchTool* js_src_attr_search_mpse;
snort::SearchTool* htmltype_search_mpse;
static int search_js_found(void*, void*, int index, void*, void*);
+ static int search_js_src_attr_found(void*, void*, int index, void*, void*);
static int search_html_found(void* id, void*, int, void*, void*);
+
+ bool is_external_script(const char* it, const char* script_tag_end) const;
};
#endif
output.set(input);
params->js_norm_param.js_norm->enhanced_normalize(input, enhanced_js_norm_body,
+ transaction->get_infractions(source_id), session_data->events[source_id],
params->js_norm_param.js_normalization_depth);
const int32_t norm_length =
{ EVENT_PDF_UNSUP_COMP_TYPE, "PDF file unsupported compression type" },
{ EVENT_PDF_CASC_COMP, "PDF file cascaded compression" },
{ EVENT_PDF_PARSE_FAILURE, "PDF file parse failure" },
+ { EVENT_JS_UNEXPECTED_TAG, "unexpected script tag within inline javascript" },
{ EVENT_LOSS_OF_SYNC, "not HTTP traffic" },
{ EVENT_CHUNK_ZEROS, "chunk length has excessive leading zeros" },
{ EVENT_WS_BETWEEN_MSGS, "white space before or between messages" },
{ CountType::SUM, "pipelined_flows", "total HTTP connections containing pipelined requests" },
{ CountType::SUM, "pipelined_requests", "total requests placed in a pipeline" },
{ CountType::SUM, "total_bytes", "total HTTP data bytes inspected" },
+ { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" },
{ CountType::END, nullptr, nullptr }
};
dnet_header.h
dyn_array.cc
dyn_array.h
+ js_norm_state.h
js_normalizer.cc
js_normalizer.h
js_tokenizer.h
--- /dev/null
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation. You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_norm_state.h author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifndef JS_NORM_STATE_H
+#define JS_NORM_STATE_H
+
+#include "main/snort_types.h"
+
+namespace snort
+{
+#define ALERT_UNEXPECTED_TAG 0x1
+
+struct JSNormState
+{
+ int64_t norm_depth;
+ uint16_t alerts;
+};
+}
+
+#endif // JS_NORM_STATE_H
+
using namespace snort;
int JSNormalizer::normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
- const char** ptr, int* bytes_copied, int64_t norm_depth)
+ const char** ptr, int* bytes_copied, JSNormState& state)
{
std::stringstream in, out;
+ in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf),
+ (state.norm_depth >= srclen) ? srclen : state.norm_depth);
- in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf), (norm_depth >= srclen) ? srclen : norm_depth);
- JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied);
-
+ JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied, state);
return tokenizer.yylex();
}
#include "main/snort_types.h"
+#include "js_norm_state.h"
+
namespace snort
{
class JSNormalizer
{
public:
static int normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
- const char** ptr, int* bytes_copied, int64_t norm_depth);
+ const char** ptr, int* bytes_copied, JSNormState& state);
};
}
#include "log/messages.h"
+#include "js_norm_state.h"
+
class JSTokenizer : public yyFlexLexer
{
private:
PUNCTUATOR,
OPERATOR,
LITERAL,
- DIRECTIVE,
- TAG_SCRIPT_OPEN
+ DIRECTIVE
};
public:
// we need an out stream because yyFlexLexer API strongly requires that
JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
- const uint16_t dstlen, const char** ptr, int* bytes_copied);
+ const uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state);
~JSTokenizer() override;
// so, Flex will treat this class as yyclass
bool eval_string_literal(const char* match_prefix, const char quotes);
bool eval_regex_literal(const char* match_prefix);
bool eval_eof();
- void skip_single_line_comment();
- void skip_multi_line_comment();
+ bool eval_single_line_comment();
+ bool eval_multi_line_comment();
bool parse_literal(const std::string& match_prefix, const char sentinel_ch,
- std::string& result, bool is_regex = false);
+ std::string& result, bool& is_alert, bool is_regex = false);
// main lexeme handler
// all scanned tokens must pass here
bool normalize_punctuator(const JSToken prev_tok, const char* lexeme);
bool normalize_operator(const JSToken prev_tok, const char* lexeme);
bool normalize_directive(const JSToken prev_tok, const char* lexeme);
- bool normalize_tag_script_open(const JSToken prev_tok, const char* lexeme);
bool normalize_undefined(const JSToken prev_tok, const char* lexeme);
bool normalize_lexeme(const JSToken prev_tok, const char* lexeme);
JSToken prev_tok = UNDEFINED;
+ snort::JSNormState& state;
+
};
#endif // JS_TOKENIZER_H
#endif
#include "utils/js_tokenizer.h"
+
+ #include <cassert>
+
+ #include "utils/util_cstring.h"
%}
/* The following grammar was created based on ECMAScript specification */
LITERAL_NAN NaN
LITERAL {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
-HTML_COMMENT_OPEN <!--
-TAG_SCRIPT_OPEN (?i:<script)
-TAG_SCRIPT_CLOSE (?i:<\/script>)
+HTML_COMMENT_OPEN <!--
+HTML_TAG_SCRIPT_OPEN (?i:<script)
+HTML_TAG_SCRIPT_CLOSE (?i:<\/script>)
/* from 0x000 to 0x10FFFD to match undefined tokens */
/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
<*>{WHITESPACES} { /* skip */ }
<*>{CHAR_ESCAPE_SEQUENCES} { /* skip */ }
<*>{LINE_TERMINATORS} { BEGIN(regex); }
-<*>{TAG_SCRIPT_OPEN} { if ( !eval(TAG_SCRIPT_OPEN, YYText()) ) { update_ptr(); return 1; } }
-<*>{TAG_SCRIPT_CLOSE} { update_ptr(); *ptr -= YYLeng(); return 0; }
-<*>{HTML_COMMENT_OPEN} { skip_single_line_comment(); }
-<*>{SINGLE_LINE_COMMENT} { skip_single_line_comment(); }
-<*>{MULTI_LINE_COMMENT} { skip_multi_line_comment(); }
+<*>{HTML_TAG_SCRIPT_OPEN} { state.alerts |= ALERT_UNEXPECTED_TAG; update_ptr(); return 1; }
+<*>{HTML_TAG_SCRIPT_CLOSE} { update_ptr(); *ptr -= YYLeng(); return 0; }
+<*>{HTML_COMMENT_OPEN} { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
+<*>{SINGLE_LINE_COMMENT} { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
+<*>{MULTI_LINE_COMMENT} { if ( !eval_multi_line_comment() ) { update_ptr(); return 1; } }
<*>{USE_STRICT_DIRECTIVE} { if ( !eval(DIRECTIVE, YYText()) ) { update_ptr(); return 1; } }
<*>{KEYWORD} { if ( !eval(KEYWORD, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
<*>{CLOSING_BRACES} { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
<<EOF>> { if ( eval_eof() ) { update_ptr(); return 0; } }
%%
-#include <cassert>
-
// static helper functions
static std::string unicode_to_utf8(const unsigned int code)
return res;
}
+static bool contains_script_tags(const std::string& str)
+{
+ static constexpr const char* script = "SCRIPT";
+ static constexpr const int script_len = sizeof("SCRIPT") - 1;
+
+ const char* start = str.c_str();
+ const char* end = start + str.size();
+ const char* it = start;
+
+ while ( it )
+ {
+ it = snort::SnortStrcasestr(it, (end - it), script);
+ if ( it )
+ {
+ int d = it - start;
+ if ( d == 1 )
+ {
+ if ( *(it - 1) == '<' )
+ return true;
+ }
+ else if ( d >= 2 )
+ {
+ if ( (*(it - 1) == '/' and *(it - 2) == '<') or
+ (*(it - 1) == '<' and *(it - 2) != '\\') )
+ {
+ return true;
+ }
+ }
+ it += script_len;
+ }
+ }
+ return false;
+}
+
// JSTokenizer members
struct JSTokenizer::ScanBuffers
};
JSTokenizer::JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
- uint16_t dstlen, const char** ptr, int* bytes_copied)
+ uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state)
: yyFlexLexer(in, out),
dstbuf(dstbuf),
dstlen(dstlen),
ptr(ptr),
- bytes_copied(bytes_copied)
+ bytes_copied(bytes_copied),
+ state(state)
{
assert(bytes_copied);
init();
bool JSTokenizer::eval_string_literal(const char* match_prefix, const char quotes)
{
std::string s;
- bool is_ok = parse_literal(match_prefix, quotes, s);
+ bool is_alert = false;
+ bool is_ok = parse_literal(match_prefix, quotes, s, is_alert);
+
+ if ( is_alert )
+ return false;
return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
}
static const std::string regex_flags = "gimsuy";
std::string s;
- bool is_ok = parse_literal(match_prefix, '/', s, true);
+ bool is_alert = false;
+ bool is_ok = parse_literal(match_prefix, '/', s, is_alert, true);
+
+ if ( is_alert )
+ return false;
// append regex flags
char c;
return true;
}
-void JSTokenizer::skip_single_line_comment()
+bool JSTokenizer::eval_single_line_comment()
{
char c;
+ std::string result;
while ( (c = yyinput()) != 0 )
{
+ result += c;
if ( c == '\n' )
break;
}
+
+ if ( contains_script_tags(result) )
+ {
+ state.alerts |= ALERT_UNEXPECTED_TAG;
+ return false;
+ }
+ else
+ return true;
}
-void JSTokenizer::skip_multi_line_comment()
+bool JSTokenizer::eval_multi_line_comment()
{
char c;
+ std::string result;
while ( (c = yyinput()) != 0 )
{
+ result += c;
if ( c == '*' )
{
if ( (c = yyinput()) == '/' )
unput(c);
}
}
+
+ if ( contains_script_tags(result) )
+ {
+ state.alerts |= ALERT_UNEXPECTED_TAG;
+ return false;
+ }
+ else
+ return true;
}
// Unicode line terminators
// Call this method when lexer meets those literals
// match_prefix is a lexeme part already matched by the lexer (with sentinel char)
bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sentinel_ch,
- std::string& result, bool is_regex)
+ std::string& result, bool& is_alert, bool is_regex)
{
bool is_ok = true;
char c;
if ( result.find(LS) != std::string::npos or result.find(PS) != std::string::npos )
is_ok = false;
+ if ( contains_script_tags(result) )
+ {
+ is_alert = true;
+ state.alerts |= ALERT_UNEXPECTED_TAG;
+ }
+
return is_ok;
}
ret = normalize_directive(prev_tok, lexeme);
break;
- case TAG_SCRIPT_OPEN:
- ret = normalize_tag_script_open(prev_tok, lexeme);
- break;
-
case UNDEFINED:
ret = normalize_undefined(prev_tok, lexeme);
break;
case PUNCTUATOR:
case LITERAL:
case DIRECTIVE:
- case TAG_SCRIPT_OPEN:
case UNDEFINED:
return write_output(lexeme);
break;
return normalize_lexeme(prev_tok, str.c_str());
}
-bool JSTokenizer::normalize_tag_script_open(const JSToken, const char* lexeme)
-{
- // FIXIT-L add builtin alert here
- return write_output(lexeme);
-}
-
bool JSTokenizer::normalize_undefined(const JSToken, const char* lexeme)
{ return write_output(lexeme); }
case IDENTIFIER:
case KEYWORD:
case LITERAL:
- case TAG_SCRIPT_OPEN:
return write_output(" " + std::string(lexeme));
break;
}
SOURCES
${FLEX_js_tokenizer_OUTPUTS}
../js_normalizer.cc
+ ../util_cstring.cc
)
char dstbuf[sizeof(expected)]; \
int bytes_copied; \
const char* ptr = srcbuf; \
- int norm_depth = NORM_DEPTH; \
+ JSNormState state; \
+ state.norm_depth = NORM_DEPTH; \
+ state.alerts = 0; \
int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), \
- dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, norm_depth);
+ dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, state);
#define VALIDATE(srcbuf, expected) \
CHECK(ret == 0); \
CHECK(bytes_copied == sizeof(expected) - 1); \
CHECK(!memcmp(dstbuf, expected, bytes_copied));
+#define VALIDATE_FAIL(srcbuf, expected, ret_code, ptr_offset) \
+ CHECK(ret == ret_code); \
+ CHECK((ptr - srcbuf) == ptr_offset); \
+ CHECK(bytes_copied == sizeof(expected) - 1); \
+ CHECK(!memcmp(dstbuf, expected, bytes_copied));
+
+#define VALIDATE_ALERT(alert) \
+ CHECK(state.alerts & alert);
+
// ClamAV test cases
static const char clamav_buf0[] =
"function foo(a, b) {\n"
"$2abc _2abc abc $__$ čć XĂA12 \u0041abc \u00FBdef \u1234ghi ab ww "
"ab ww ab ww ab â ww 2 abc";
-static const char all_patterns_buf6[] =
- "var a = 1;\n"
- "<script>\n"
- "<script var>\n"
- "var b = 2 ;\n";
-
-static const char all_patterns_expected6[] =
- "var a=1;<script><script var>var b=2;";
-
TEST_CASE("all patterns", "[JSNormalizer]")
{
SECTION("whitespaces and special characters")
const char* ptr2 = srcbuf2;
const char* ptr3 = srcbuf3;
const char* ptr4 = srcbuf4;
- int norm_depth = NORM_DEPTH;
+ JSNormState state;
+ state.norm_depth = NORM_DEPTH;
+ state.alerts = 0;
int ret0 = JSNormalizer::normalize(srcbuf0, sizeof(srcbuf0), dstbuf0, sizeof(dstbuf0),
- &ptr0, &bytes_copied0, norm_depth);
+ &ptr0, &bytes_copied0, state);
int ret1 = JSNormalizer::normalize(srcbuf1, sizeof(srcbuf1), dstbuf1, sizeof(dstbuf1),
- &ptr1, &bytes_copied1, norm_depth);
+ &ptr1, &bytes_copied1, state);
int ret2 = JSNormalizer::normalize(srcbuf2, sizeof(srcbuf2), dstbuf2, sizeof(dstbuf2),
- &ptr2, &bytes_copied2, norm_depth);
+ &ptr2, &bytes_copied2, state);
int ret3 = JSNormalizer::normalize(srcbuf3, sizeof(srcbuf3), dstbuf3, sizeof(dstbuf3),
- &ptr3, &bytes_copied3, norm_depth);
+ &ptr3, &bytes_copied3, state);
int ret4 = JSNormalizer::normalize(srcbuf4, sizeof(srcbuf4), dstbuf4, sizeof(dstbuf4),
- &ptr4, &bytes_copied4, norm_depth);
+ &ptr4, &bytes_copied4, state);
CHECK(ret0 == 0);
CHECK((ptr0 - srcbuf0) == sizeof(srcbuf0));
NORMALIZE(all_patterns_buf5, all_patterns_expected5);
VALIDATE(all_patterns_buf5, all_patterns_expected5);
}
- SECTION("tag script open")
- {
- NORMALIZE(all_patterns_buf6, all_patterns_expected6);
- VALIDATE(all_patterns_buf6, all_patterns_expected6);
- }
}
// Tests for different syntax cases
char dstbuf[7];
int bytes_copied;
const char* ptr = srcbuf;
- int norm_depth = 7;
+ JSNormState state;
+ state.norm_depth = 7;
+ state.alerts = 0;
int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
- &bytes_copied, norm_depth);
+ &bytes_copied, state);
CHECK(ret == 0);
CHECK(bytes_copied == sizeof(expected) - 1);
char dstbuf[sizeof(expected)];
int bytes_copied;
const char* ptr = srcbuf;
- int norm_depth = NORM_DEPTH;
+ JSNormState state;
+ state.norm_depth = NORM_DEPTH;
+ state.alerts = 0;
int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
- &bytes_copied, norm_depth);
+ &bytes_copied, state);
CHECK(ret == 0);
CHECK(bytes_copied == sizeof(expected) - 1);
char dstbuf[7];
int bytes_copied;
const char* ptr = srcbuf;
- int norm_depth = NORM_DEPTH;
+ JSNormState state;
+ state.norm_depth = NORM_DEPTH;
+ state.alerts = 0;
int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
- &bytes_copied, norm_depth);
+ &bytes_copied, state);
CHECK(ret == 1);
CHECK(bytes_copied == sizeof(expected) - 1);
}
}
+static const char unexpected_tag_buf0[] =
+ "var a = 1;\n"
+ "<script>\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected0[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf1[] =
+ "var a = 1;\n"
+ "<script type=application/javascript>\n"
+ "var b = 2;\r\n";;
+
+static const char unexpected_tag_expected1[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf2[] =
+ "var a = 1;\n"
+ "var str = '<script> something';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected2[] =
+ "var a=1;var str=";
+
+static const char unexpected_tag_buf3[] =
+ "var a = 1;\n"
+ "var str = 'something <script> something';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected3[] =
+ "var a=1;var str=";
+
+static const char unexpected_tag_buf4[] =
+ "var a = 1;\n"
+ "var str = 'something <script>';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected4[] =
+ "var a=1;var str=";
+
+static const char unexpected_tag_buf5[] =
+ "var a = 1;\n"
+ "var str = '</script> something';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected5[] =
+ "var a=1;var str=";
+
+static const char unexpected_tag_buf6[] =
+ "var a = 1;\n"
+ "var str = 'something </script> something';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected6[] =
+ "var a=1;var str=";
+
+static const char unexpected_tag_buf7[] =
+ "var a = 1;\n"
+ "var str = 'something </script>';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected7[] =
+ "var a=1;var str=";
+
+static const char unexpected_tag_buf8[] =
+ "var a = 1;\n"
+ "var str = 'something \\<script\\> something';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected8[] =
+ "var a=1;var str='something \\<script\\> something';var b=2;";
+
+static const char unexpected_tag_buf9[] =
+ "var a = 1;\n"
+ "var str = 'something \\<\\/script\\> something';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected9[] =
+ "var a=1;var str='something \\<\\/script\\> something';var b=2;";
+
+static const char unexpected_tag_buf10[] =
+ "var a = 1;\n"
+ "//<script> something\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected10[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf11[] =
+ "var a = 1;\n"
+ "//something <script> something\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected11[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf12[] =
+ "var a = 1;\n"
+ "//something <script>\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected12[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf13[] =
+ "var a = 1;\n"
+ "/*<script> something*/\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected13[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf14[] =
+ "var a = 1;\n"
+ "/*something <script> something*/\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected14[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf15[] =
+ "var a = 1;\n"
+ "/*something <script>*/\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected15[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf16[] =
+ "var a = 1;\n"
+ "//</script> something\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected16[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf17[] =
+ "var a = 1;\n"
+ "<!--something </script> something//-->\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected17[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf18[] =
+ "var a = 1;\n"
+ "//something </script>\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected18[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf19[] =
+ "var a = 1;\n"
+ "/*</script>\n"
+ "something*/\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected19[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf20[] =
+ "var a = 1;\n"
+ "/*something\n"
+ "</script>\n"
+ "something*/\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected20[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf21[] =
+ "var a = 1;\n"
+ "/*something\n"
+ "</script>*/\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected21[] =
+ "var a=1;";
+
+static const char unexpected_tag_buf22[] =
+ "var a = 1;\n"
+ "var str = 'script somescript /script something';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected22[] =
+ "var a=1;var str='script somescript /script something';var b=2;";
+
+static const char unexpected_tag_buf23[] =
+ "var a = 1;\n"
+ "var str = 'script somescript /script something <script>';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected23[] =
+ "var a=1;var str=";
+
+static const char unexpected_tag_buf24[] =
+ "var a = 1;\n"
+ "var str = 'something <sCrIpT>';\n"
+ "var b = 2;\r\n";
+
+static const char unexpected_tag_expected24[] =
+ "var a=1;var str=";
+
+TEST_CASE("unexpected script tag alert", "[JSNormalizer]")
+{
+ const int ret_code = 1;
+ SECTION("explicit open tag - simple")
+ {
+ NORMALIZE(unexpected_tag_buf0, unexpected_tag_expected0);
+ VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, ret_code, 18);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("explicit open tag - complex")
+ {
+ NORMALIZE(unexpected_tag_buf1, unexpected_tag_expected1);
+ VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, ret_code, 18);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within literal - start")
+ {
+ NORMALIZE(unexpected_tag_buf2, unexpected_tag_expected2);
+ VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, ret_code, 41);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within literal - mid")
+ {
+ NORMALIZE(unexpected_tag_buf3, unexpected_tag_expected3);
+ VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, ret_code, 51);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within literal - end")
+ {
+ NORMALIZE(unexpected_tag_buf4, unexpected_tag_expected4);
+ VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, ret_code, 41);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within literal - start")
+ {
+ NORMALIZE(unexpected_tag_buf5, unexpected_tag_expected5);
+ VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, ret_code, 42);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within literal - mid")
+ {
+ NORMALIZE(unexpected_tag_buf6, unexpected_tag_expected6);
+ VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, ret_code, 52);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within literal - end")
+ {
+ NORMALIZE(unexpected_tag_buf7, unexpected_tag_expected7);
+ VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, ret_code, 42);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within literal - escaped")
+ {
+ NORMALIZE(unexpected_tag_buf8, unexpected_tag_expected8);
+ VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8);
+ }
+ SECTION("close tag within literal - escaped")
+ {
+ NORMALIZE(unexpected_tag_buf9, unexpected_tag_expected9);
+ VALIDATE(unexpected_tag_buf9, unexpected_tag_expected9);
+ }
+ SECTION("open tag within single-line comment - start")
+ {
+ NORMALIZE(unexpected_tag_buf10, unexpected_tag_expected10);
+ VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, ret_code, 32);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within single-line comment - mid")
+ {
+ NORMALIZE(unexpected_tag_buf11, unexpected_tag_expected11);
+ VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, ret_code, 42);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within single-line comment - end")
+ {
+ NORMALIZE(unexpected_tag_buf12, unexpected_tag_expected12);
+ VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, ret_code, 32);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within multi-line comment - start")
+ {
+ NORMALIZE(unexpected_tag_buf13, unexpected_tag_expected13);
+ VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, ret_code, 33);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within multi-line comment - mid")
+ {
+ NORMALIZE(unexpected_tag_buf14, unexpected_tag_expected14);
+ VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, ret_code, 43);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("open tag within multi-line comment - end")
+ {
+ NORMALIZE(unexpected_tag_buf15, unexpected_tag_expected15);
+ VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, ret_code, 33);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within single-line comment - start")
+ {
+ NORMALIZE(unexpected_tag_buf16, unexpected_tag_expected16);
+ VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, ret_code, 33);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within single-line comment - mid")
+ {
+ NORMALIZE(unexpected_tag_buf17, unexpected_tag_expected17);
+ VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, ret_code, 50);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within single-line comment - end")
+ {
+ NORMALIZE(unexpected_tag_buf18, unexpected_tag_expected18);
+ VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, ret_code, 33);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within multi-line comment - start")
+ {
+ NORMALIZE(unexpected_tag_buf19, unexpected_tag_expected19);
+ VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, ret_code, 34);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within multi-line comment - mid")
+ {
+ NORMALIZE(unexpected_tag_buf20, unexpected_tag_expected20);
+ VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, ret_code, 44);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("close tag within multi-line comment - end")
+ {
+ NORMALIZE(unexpected_tag_buf21, unexpected_tag_expected21);
+ VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, ret_code, 34);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("multiple patterns - not matched")
+ {
+ NORMALIZE(unexpected_tag_buf22, unexpected_tag_expected22);
+ VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22);
+ }
+ SECTION("multiple patterns - matched")
+ {
+ NORMALIZE(unexpected_tag_buf23, unexpected_tag_expected23);
+ VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, ret_code, 67);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+ SECTION("mixed lower and upper case")
+ {
+ NORMALIZE(unexpected_tag_buf24, unexpected_tag_expected24);
+ VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, ret_code, 41);
+ VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+ }
+}
+