%option c++
%{
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
- #include "utils/js_identifier_ctx.h"
- #include "utils/js_tokenizer.h"
+#include "utils/js_tokenizer.h"
- #include <cassert>
+#include <cassert>
- #include "utils/util_cstring.h"
+#include "utils/js_identifier_ctx.h"
+#include "utils/util_cstring.h"
- #define EXEC(f) { auto r = (f); if (r) { BEGIN(regst); return r; } }
+#define YY_USER_ACTION { states_push(); }
+#define EXEC(f) { auto r = (f); if (r) { BEGIN(regst); return r; } }
+#define EEOF(f) { auto r = (f); if (r) { if (r != SCRIPT_CONTINUE) BEGIN(regst); return r; } }
%}
/* The following grammar was created based on ECMAScript specification */
LITERAL_DQ_STRING_START \"
LITERAL_DQ_STRING_END \"
LITERAL_DQ_STRING_SKIP \\\"
+LITERAL_DQ_STRING_TEXT .
LITERAL_SQ_STRING_START \'
LITERAL_SQ_STRING_END \'
LITERAL_SQ_STRING_SKIP \\\'
+LITERAL_SQ_STRING_TEXT .
LITERAL_TEMPLATE_START \`
LITERAL_TEMPLATE_END \`
LITERAL_TEMPLATE_SUBST_START \$\{
+LITERAL_TEMPLATE_OTHER .
LITERAL_REGEX_START \/[^*\/]
LITERAL_REGEX_END \/[gimsuy]*
LITERAL_REGEX_SKIP \\\/
<lcomm>{LINE_COMMENT_END3} { BEGIN(regst); return OPENING_TAG; }
<lcomm>{LINE_COMMENT_END4} { BEGIN(regst); return CLOSING_TAG; }
<lcomm>{LINE_COMMENT_SKIP} { }
-<lcomm><<EOF>> { return SCRIPT_CONTINUE; }
+<lcomm><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
{BLOCK_COMMENT_START} { BEGIN(bcomm); }
<bcomm>{BLOCK_COMMENT_END1} { BEGIN(regst); }
<bcomm>{BLOCK_COMMENT_END2} { BEGIN(regst); return OPENING_TAG; }
<bcomm>{BLOCK_COMMENT_END3} { BEGIN(regst); return CLOSING_TAG; }
<bcomm>{BLOCK_COMMENT_SKIP} { }
-<bcomm><<EOF>> { return SCRIPT_CONTINUE; }
+<bcomm><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
{LITERAL_DQ_STRING_START} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); }
<dqstr>{LITERAL_DQ_STRING_END} { ECHO; BEGIN(divop); }
<dqstr>\\{CR} { }
<dqstr>{LINE_TERMINATORS} { BEGIN(regst); return BAD_TOKEN; }
<dqstr>{LITERAL_DQ_STRING_SKIP} { ECHO; }
-<dqstr>. { ECHO; }
-<dqstr><<EOF>> { return SCRIPT_CONTINUE; }
+<dqstr>{LITERAL_DQ_STRING_TEXT} { ECHO; }
+<dqstr><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
{LITERAL_SQ_STRING_START} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); }
<sqstr>{LITERAL_SQ_STRING_END} { ECHO; BEGIN(divop); }
<sqstr>\\{CR} { }
<sqstr>{LINE_TERMINATORS} { BEGIN(regst); return BAD_TOKEN; }
<sqstr>{LITERAL_SQ_STRING_SKIP} { ECHO; }
-<sqstr>. { ECHO; }
-<sqstr><<EOF>> { return SCRIPT_CONTINUE; }
+<sqstr>{LITERAL_SQ_STRING_TEXT} { ECHO; }
+<sqstr><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
-{OPEN_BRACKET} { if (not bracket_depth.empty()) bracket_depth.top()++; process_punctuator(); }
+{OPEN_BRACKET} { if (!bracket_depth.empty()) bracket_depth.top()++; process_punctuator(); }
{CLOSE_BRACKET} { process_closing_bracket(); }
{LITERAL_TEMPLATE_START} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); }
<tmpll>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START} | /* escaped template substitution */
<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_END} | /* escaped backtick */
-<tmpll>. { ECHO; }
+<tmpll>{LITERAL_TEMPLATE_OTHER} { ECHO; }
<tmpll><<EOF>> { return SCRIPT_CONTINUE; }
-<regst>{LITERAL_REGEX_START} { EXEC(do_spacing(LITERAL)) yyout << '/'; yyless(1); BEGIN(regex); }
+<regst>{LITERAL_REGEX_START} { EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); }
<regex>{LITERAL_REGEX_END} { ECHO; BEGIN(divop); }
<regex>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
<regex>{LITERAL_REGEX_SKIP} { ECHO; }
<regex>\\{CR} |
<regex>{LINE_TERMINATORS} { BEGIN(regst); return BAD_TOKEN; }
<regex>[^<{LF}{CR}{LS}{PS}\\\/]+ { ECHO; }
-<regex><<EOF>> { return SCRIPT_CONTINUE; }
+<regex><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
<divop>{DIV_OPERATOR} |
<divop>{DIV_ASSIGNMENT_OPERATOR} { ECHO; token = PUNCTUATOR; BEGIN(INITIAL); }
{IDENTIFIER} { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); }
.|{ALL_UNICODE} { ECHO; token = UNDEFINED; BEGIN(INITIAL); }
-<<EOF>> { EXEC(eval_eof()) }
+<<EOF>> { EEOF(eval_eof()) }
%%
{
std::string res;
- if ( code <= 0x7f )
+ if (code <= 0x7f)
res += (char)code;
- else if ( code <= 0x7ff )
+ else if (code <= 0x7ff)
{
- res += ( 0xc0 | (code >> 6) );
- res += ( 0x80 | (code & 0x3f) );
+ res += 0xc0 | (code >> 6);
+ res += 0x80 | (code & 0x3f);
}
- else if ( code <= 0xffff )
+ else if (code <= 0xffff)
{
- res += ( 0xe0 | (code >> 12) );
- res += ( 0x80 | ((code >> 6) & 0x3f) );
- res += ( 0x80 | (code & 0x3f) );
+ res += 0xe0 | (code >> 12);
+ res += 0x80 | ((code >> 6) & 0x3f);
+ res += 0x80 | (code & 0x3f);
}
return res;
short digits_left = 4;
std::string unicode_str;
- for ( const auto& ch : lex )
+ for (const auto& ch : lex)
{
- if ( ch == '\\' )
+ if (ch == '\\')
{
is_unescape = true;
continue;
}
- if ( is_unescape )
+ if (is_unescape)
{
- if ( ch == 'u' )
+ if (ch == 'u')
{
is_unicode = true;
continue;
is_unescape = false;
}
- if ( is_unicode )
+ if (is_unicode)
{
unicode_str += ch;
- if ( !(--digits_left) )
+ if (!(--digits_left))
{
const unsigned int unicode = std::stoi(unicode_str, nullptr, 16);
res += unicode_to_utf8(unicode);
// JSTokenizer members
-JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx,
- uint8_t max_template_nesting)
+JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out,
+ JSIdentifierCtxBase& mapper, uint8_t max_template_nesting,
+ char*& buf, size_t& buf_size, int cap_size)
: yyFlexLexer(in, out),
max_template_nesting(max_template_nesting),
- ident_ctx(ident_ctx)
+ ident_ctx(mapper),
+ tmp_buf(buf),
+ tmp_buf_size(buf_size),
+ tmp_cap_size(cap_size)
{
BEGIN(regst);
}
JSTokenizer::~JSTokenizer()
{
yy_delete_buffer((YY_BUFFER_STATE)tmp_buffer);
+ delete[] tmp_buf;
+ tmp_buf = nullptr;
+ tmp_buf_size = 0;
}
void JSTokenizer::switch_to_temporal(const std::string& data)
{
// If the temporal scan buffer reaches EOF, cleanup and
// continue with the initial one
- if ( tmp_buffer )
+ if (tmp_buffer)
{
switch_to_initial();
return EOS;
}
// Normal termination
+ states_apply();
+
return SCRIPT_CONTINUE;
}
bool JSTokenizer::unescape(const char* lexeme)
{
- if ( strstr(lexeme, "\\u") )
+ if (strstr(lexeme, "\\u"))
{
const std::string unescaped_lex = unescape_unicode(lexeme);
switch_to_temporal(unescaped_lex);
void JSTokenizer::process_closing_bracket()
{
- if ( not bracket_depth.empty() )
+ if (!bracket_depth.empty())
{
- if ( bracket_depth.top() )
+ if (bracket_depth.top())
bracket_depth.top()--;
else
{
JSTokenizer::JSRet JSTokenizer::process_subst_open()
{
- if ( bracket_depth.size() >= max_template_nesting )
+ if (bracket_depth.size() >= max_template_nesting)
return TEMPLATE_NESTING_OVERFLOW;
bracket_depth.push(0);
token = PUNCTUATOR;
ECHO;
- BEGIN(divop);
+ BEGIN(divop);
return EOS;
-}
\ No newline at end of file
+}
+
+void JSTokenizer::states_push()
+{
+ assert(yyleng != 0);
+
+ sp++;
+ sp %= JSTOKENIZER_MAX_STATES;
+ auto& state = states[sp];
+
+ state.token = token;
+ state.length = yyleng;
+ state.sc = yy_start;
+}
+
+void JSTokenizer::states_correct(int take_off)
+{
+ auto& state = states[sp];
+ state.length -= yyleng - take_off;
+}
+
+void JSTokenizer::states_apply()
+{
+ int tail_size = 0;
+
+ for (int i = JSTOKENIZER_MAX_STATES; i > 0 && tail_size < tmp_cap_size; --i)
+ {
+ auto idx = sp + i;
+ idx %= JSTOKENIZER_MAX_STATES;
+ auto& state = states[idx];
+
+ if (state.length == 0)
+ continue;
+
+ token = state.token;
+ yy_start = state.sc;
+ tail_size += state.length;
+ tail_size = tail_size < tmp_cap_size ? tail_size : tmp_cap_size;
+ }
+
+ for (int i = 0; i < JSTOKENIZER_MAX_STATES; ++i)
+ states[i].length = 0;
+
+ char* buf = new char[tail_size];
+
+ yyin.seekg(-tail_size, std::ios_base::end);
+ yyin.clear();
+ yyin.read(buf, tail_size);
+
+ delete[] tmp_buf;
+ tmp_buf = buf;
+ tmp_buf_size = tail_size;
+}
const char* substitute(const char* identifier) override
{ return identifier; }
void reset() override {}
- size_t size() const override {}
+ size_t size() const override { return 0; }
};
using namespace snort;
#define DEPTH 65535
#define MAX_TEMPLATE_NESTNIG 4
+#define DST_SIZE 512
#define NORMALIZE(src, expected) \
char dst[sizeof(expected)]; \
len = norm.get_dst_next() - dst; \
}
+#define DO(src, slen, dst, dlen) \
+ { \
+ auto ret = norm.normalize(src, slen, dst, dlen); \
+ CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
+ auto nsrc = norm.get_src_next(); \
+ auto ndst = norm.get_dst_next(); \
+ REQUIRE(nsrc - src == slen); \
+ REQUIRE(ndst - dst == dlen); \
+ }
+
+#define TRY(src, slen, dst, dlen, rexp) \
+ { \
+ auto ret = norm.normalize(src, slen, dst, dlen); \
+ CHECK(ret == rexp); \
+ auto ndst = norm.get_dst_next(); \
+ REQUIRE(ndst - dst == dlen); \
+ }
+
+#define CLOSE() \
+ { \
+ const char end[] = "</script>"; \
+ char dst[DST_SIZE]; \
+ auto ret = norm.normalize(end, sizeof(end) - 1, dst, sizeof(dst) - 1); \
+ CHECK(ret == JSTokenizer::SCRIPT_ENDED); \
+ }
+
+#define NORMALIZE_2(src1, src2, exp1, exp2) \
+ { \
+ char dst1[sizeof(exp1)]; \
+ char dst2[sizeof(exp2)]; \
+ \
+ JSIdentifierCtxTest ident_ctx; \
+ JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \
+ \
+ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
+ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
+ \
+ DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
+ CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
+ \
+ CLOSE(); \
+ }
+
+#define NORMALIZE_3(src1, src2, src3, exp1, exp2, exp3) \
+ { \
+ char dst1[sizeof(exp1)]; \
+ char dst2[sizeof(exp2)]; \
+ char dst3[sizeof(exp3)]; \
+ \
+ JSIdentifierCtxTest ident_ctx; \
+ JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \
+ \
+ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
+ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
+ \
+ DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
+ CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
+ \
+ DO(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1); \
+ CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1)); \
+ \
+ CLOSE(); \
+ }
+
+#define NORM_BAD_2(src1, src2, exp1, exp2, code) \
+ { \
+ char dst1[sizeof(exp1)]; \
+ char dst2[sizeof(exp2)]; \
+ \
+ JSIdentifierCtxTest ident_ctx; \
+ JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \
+ \
+ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
+ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
+ \
+ TRY(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1, code); \
+ CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
+ }
+
+#define NORM_BAD_3(src1, src2, src3, exp1, exp2, exp3, code) \
+ { \
+ char dst1[sizeof(exp1)]; \
+ char dst2[sizeof(exp2)]; \
+ char dst3[sizeof(exp3)]; \
+ \
+ JSIdentifierCtxTest ident_ctx; \
+ JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \
+ \
+ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
+ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
+ \
+ DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
+ CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
+ \
+ TRY(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1, code); \
+ CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1)); \
+ }
+
+#define NORM_LIMITED(limit, src1, src2, exp1, exp2) \
+ { \
+ char dst1[sizeof(exp1)]; \
+ char dst2[sizeof(exp2)]; \
+ \
+ JSIdentifierCtxTest ident_ctx; \
+ JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG, limit); \
+ \
+ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
+ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
+ \
+ DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
+ CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
+ \
+ CLOSE(); \
+ }
+
// ClamAV test cases
static const char clamav_buf0[] =
"function foo(a, b) {\n"
}
}
+TEST_CASE("split between tokens", "[JSNormalizer]")
+{
+ SECTION("operator string")
+ {
+ const char dat1[] = "var s = ";
+ const char dat2[] = "'string';";
+ const char exp1[] = "var s=";
+ const char exp2[] = "var s='string';";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("operator number")
+ {
+ const char dat1[] = "a = 5 +";
+ const char dat2[] = "b + c;";
+ const char exp1[] = "a=5+";
+ const char exp2[] = "a=5+b+c;";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("comment function")
+ {
+ const char dat1[] = "// no comments\n";
+ const char dat2[] = "foo(bar, baz);";
+ const char exp1[] = "";
+ const char exp2[] = "foo(bar,baz);";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("operator identifier")
+ {
+ const char dat1[] = "var ";
+ const char dat2[] = "a = ";
+ const char dat3[] = "b ;";
+ const char exp1[] = "var";
+ const char exp2[] = "var a=";
+ const char exp3[] = "var a=b;";
+
+ NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
+ }
+}
+
+TEST_CASE("split in comments", "[JSNormalizer]")
+{
+ SECTION("/ /")
+ {
+ const char dat1[] = "/";
+ const char dat2[] = "/comment\n";
+ const char exp1[] = "/";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("/ / msg")
+ {
+ const char dat1[] = "//";
+ const char dat2[] = "comment\n";
+ const char exp1[] = "";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("/ / LF")
+ {
+ const char dat1[] = "//comment";
+ const char dat2[] = "\n";
+ const char exp1[] = "";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+
+ SECTION("/ *")
+ {
+ const char dat1[] = "/";
+ const char dat2[] = "* comment */";
+ const char exp1[] = "/";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("/ * msg")
+ {
+ const char dat1[] = "/* t";
+ const char dat2[] = "ext */";
+ const char exp1[] = "";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("* /")
+ {
+ const char dat1[] = "/* comment *";
+ const char dat2[] = "/";
+ const char exp1[] = "";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("/ * msg * /")
+ {
+ const char dat1[] = "/";
+ const char dat2[] = "* comment *";
+ const char dat3[] = "/";
+ const char exp1[] = "/";
+ const char exp2[] = "";
+ const char exp3[] = "";
+
+ NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
+ }
+
+ SECTION("< !--")
+ {
+ const char dat1[] = "<";
+ const char dat2[] = "!-- comment\n";
+ const char exp1[] = "<";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("<! --")
+ {
+ const char dat1[] = "<!";
+ const char dat2[] = "-- comment\n";
+ const char exp1[] = "<!";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("<!- -")
+ {
+ const char dat1[] = "<!-";
+ const char dat2[] = "- comment\n";
+ const char exp1[] = "<!-";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("<!-- msg")
+ {
+ const char dat1[] = "<!--";
+ const char dat2[] = "comment\n";
+ const char exp1[] = "";
+ const char exp2[] = "";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("<! -- msg")
+ {
+ const char dat1[] = "<";
+ const char dat2[] = "!-";
+ const char dat3[] = "-comment\n";
+ const char exp1[] = "<";
+ const char exp2[] = "<!-";
+ const char exp3[] = "";
+
+ NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
+ }
+}
+
+TEST_CASE("split in opening tag", "[JSNormalizer]")
+{
+ SECTION("< script")
+ {
+ const char dat1[] = "<";
+ const char dat2[] = "script";
+ const char exp1[] = "<";
+ const char exp2[] = "";
+
+ NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
+ }
+ SECTION("str='<s cript'")
+ {
+ const char dat1[] = "var str ='<s";
+ const char dat2[] = "cript';";
+ const char exp1[] = "var str='<s";
+ const char exp2[] = "var str='";
+
+ NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
+ }
+ SECTION("str='<scrip t'")
+ {
+ const char dat1[] = "var str ='<scrip";
+ const char dat2[] = "t';";
+ const char exp1[] = "var str='<scrip";
+ const char exp2[] = "='";
+
+ NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
+ }
+ SECTION("< scr ipt")
+ {
+ const char dat1[] = "<";
+ const char dat2[] = "scr";
+ const char dat3[] = "ipt";
+ const char exp1[] = "<";
+ const char exp2[] = "<scr";
+ const char exp3[] = "";
+
+ NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
+ }
+ SECTION("str='<sc rip t'")
+ {
+ const char dat1[] = "var str =\"<sc";
+ const char dat2[] = "rip";
+ const char dat3[] = "t\";";
+ const char exp1[] = "var str=\"<sc";
+ const char exp2[] = " str=\"<scrip";
+ const char exp3[] = "=\"";
+
+ NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
+ }
+}
+
+TEST_CASE("split in closing tag", "[JSNormalizer]")
+{
+ SECTION("< /script>")
+ {
+ const char dat1[] = "<";
+ const char dat2[] = "/script>";
+ const char exp1[] = "<";
+ const char exp2[] = "";
+
+ NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::SCRIPT_ENDED);
+ }
+ SECTION("</script >")
+ {
+ const char dat1[] = "</script";
+ const char dat2[] = ">";
+ const char exp1[] = "</script";
+ const char exp2[] = "";
+
+ NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::SCRIPT_ENDED);
+ }
+ SECTION("str='</ script>'")
+ {
+ const char dat1[] = "var str ='</";
+ const char dat2[] = "script>';";
+ const char exp1[] = "var str='</";
+ const char exp2[] = "var str='";
+
+ NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::CLOSING_TAG);
+ }
+ SECTION("str='</scrip t>'")
+ {
+ const char dat1[] = "var str ='</scrip";
+ const char dat2[] = "t>';";
+ const char exp1[] = "var str='</scrip";
+ const char exp2[] = "'";
+
+ NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::CLOSING_TAG);
+ }
+ SECTION("</ scr ipt>")
+ {
+ const char dat1[] = "</";
+ const char dat2[] = "scr";
+ const char dat3[] = "ipt>";
+ const char exp1[] = "</";
+ const char exp2[] = "</scr";
+ const char exp3[] = "";
+
+ NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::SCRIPT_ENDED);
+ }
+ SECTION("str='</sc rip t>'")
+ {
+ const char dat1[] = "var str =\"</sc";
+ const char dat2[] = "rip";
+ const char dat3[] = "t>\";";
+ const char exp1[] = "var str=\"</sc";
+ const char exp2[] = " str=\"</scrip";
+ const char exp3[] = "\"";
+
+ NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::CLOSING_TAG);
+ }
+}
+
+TEST_CASE("split in string literal", "[JSNormalizer]")
+{
+ SECTION("\\ LF")
+ {
+ const char dat1[] = "var str =\"any\\";
+ const char dat2[] = "\none\";";
+ const char exp1[] = "var str=\"any\\";
+ const char exp2[] = " str=\"anyone\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("\\ CR")
+ {
+ const char dat1[] = "var str =\"any\\";
+ const char dat2[] = "\rone\";";
+ const char exp1[] = "var str=\"any\\";
+ const char exp2[] = " str=\"anyone\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("\\CR LF")
+ {
+ const char dat1[] = "var str =\"any\\\r";
+ const char dat2[] = "\none\";";
+ const char exp1[] = "var str=\"any";
+ const char exp2[] = " str=\"anyone\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("\\ CRLF")
+ {
+ const char dat1[] = "var str =\"any\\";
+ const char dat2[] = "\r\none\";";
+ const char exp1[] = "var str=\"any\\";
+ const char exp2[] = " str=\"anyone\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("\\ \"")
+ {
+ const char dat1[] = "var str =\"any\\";
+ const char dat2[] = "\"one\";";
+ const char exp1[] = "var str=\"any\\";
+ const char exp2[] = " str=\"any\\\"one\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("\\ \'")
+ {
+ const char dat1[] = "var str =\"any\\";
+ const char dat2[] = "\'one\";";
+ const char exp1[] = "var str=\"any\\";
+ const char exp2[] = " str=\"any\\\'one\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("\\ u1234tx")
+ {
+ const char dat1[] = "var str =\"any\\";
+ const char dat2[] = "u1234tx\";";
+ const char exp1[] = "var str=\"any\\";
+ const char exp2[] = " str=\"any\\u1234tx\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("\\u 1234tx")
+ {
+ const char dat1[] = "var str =\"any\\u";
+ const char dat2[] = "1234tx\";";
+ const char exp1[] = "var str=\"any\\u";
+ const char exp2[] = "=\"any\\u1234tx\";";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+}
+
+TEST_CASE("split in identifier", "[JSNormalizer]")
+{
+ SECTION("abc def")
+ {
+ const char dat1[] = "var abc";
+ const char dat2[] = "def = 5";
+ const char exp1[] = "var abc";
+ const char exp2[] = "var abcdef=5";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("abc def")
+ {
+ const char dat1[] = "var abc";
+ const char dat2[] = "def = 5";
+ const char exp1[] = "var abc";
+ const char exp2[] = "var abcdef=5";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("long identifier")
+ {
+ const char dat1[] = "var res = something + long_id_starts_here";
+ const char dat2[] = "_long_id_ends_here;";
+ const char exp1[] = "var res=something+long_id_starts_here";
+ const char exp2[] = "=something+long_id_starts_here_long_id_ends_here;";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+}
+
+TEST_CASE("split in keyword", "[JSNormalizer]")
+{
+ SECTION("finally")
+ {
+ const char dat1[] = "\nfin";
+ const char dat2[] = "ally;";
+ const char exp1[] = "fin";
+ const char exp2[] = "finally;";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("in")
+ {
+ const char dat1[] = "i";
+ const char dat2[] = "n";
+ const char exp1[] = "i";
+ const char exp2[] = "in";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ }
+ SECTION("instanceof")
+ {
+ const char dat1[] = "in";
+ const char dat2[] = "stance";
+ const char dat3[] = "of";
+ const char exp1[] = "in";
+ const char exp2[] = "instance";
+ const char exp3[] = "instanceof";
+
+ NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
+ }
+}
+
+TEST_CASE("memcap", "[JSNormalizer]")
+{
+ SECTION("3 tokens")
+ {
+ const char dat1[] = "var abc=in";
+ const char dat2[] = "put;";
+ const char exp1[] = "var abc=in";
+ const char exp2[] = " abc=input;";
+
+ NORM_LIMITED(6, dat1, dat2, exp1, exp2);
+ }
+ SECTION("2 tokens and a half")
+ {
+ const char dat1[] = "var abc=in";
+ const char dat2[] = "put;";
+ const char exp1[] = "var abc=in";
+ const char exp2[] = " c=input;";
+
+ NORM_LIMITED(4, dat1, dat2, exp1, exp2);
+ }
+ SECTION("1 token")
+ {
+ const char dat1[] = "var abc=in";
+ const char dat2[] = "put;";
+ const char exp1[] = "var abc=in";
+ const char exp2[] = "input;";
+
+ NORM_LIMITED(2, dat1, dat2, exp1, exp2);
+ }
+ SECTION("a half")
+ {
+ const char dat1[] = "var abc=extract";
+ const char dat2[] = "// just a comment\n";
+ const char exp1[] = "var abc=extract";
+ const char exp2[] = "tract";
+
+ NORM_LIMITED(5, dat1, dat2, exp1, exp2);
+ }
+}