%%
-<INITIAL,divop,regst,char_code>{WHITESPACES} { /* skip */ }
-<INITIAL,divop,regst,char_code>{CHAR_ESCAPE_SEQUENCES} { /* skip */ }
+<INITIAL,divop,regst,char_code>{WHITESPACES} { /* skip */ }
+<INITIAL,divop,regst,char_code>{CHAR_ESCAPE_SEQUENCES} { /* skip */ }
-{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; }
-<char_code>{LINE_TERMINATORS} { newline_found = true; }
+{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; }
+<char_code>{LINE_TERMINATORS} { newline_found = true; }
-<INITIAL,regex,dqstr,regst,sqstr,divop,char_code>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
-{HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) }
+<INITIAL,regex,regst,divop,char_code>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
+{HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) }
{HTML_COMMENT_OPEN} { BEGIN(lcomm); }
{LINE_COMMENT_START} { BEGIN(lcomm); }
<lcomm>{LINE_COMMENT_END2} { BEGIN(regst); newline_found = true; }
<char_code_lcomm>{LINE_COMMENT_END1} { BEGIN(char_code); newline_found = true; }
<char_code_lcomm>{LINE_COMMENT_END2} { BEGIN(char_code); newline_found = true; }
-<lcomm,char_code_lcomm>{LINE_COMMENT_END3} { BEGIN(regst); RETURN(OPENING_TAG) }
-<lcomm,char_code_lcomm>{LINE_COMMENT_END4} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END3} { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END4} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } }
<lcomm,char_code_lcomm>{LINE_COMMENT_SKIP} { /* skip */ }
<lcomm,char_code_lcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
- {BLOCK_COMMENT_START} { BEGIN(bcomm); }
-<char_code>{BLOCK_COMMENT_START} { BEGIN(char_code_bcomm); }
-<bcomm>{BLOCK_COMMENT_END1} { BEGIN(regst); }
-<char_code_bcomm>{BLOCK_COMMENT_END1} { BEGIN(char_code); }
-<bcomm,char_code_bcomm>{BLOCK_COMMENT_END2} { BEGIN(regst); RETURN(OPENING_TAG) }
-<bcomm,char_code_bcomm>{BLOCK_COMMENT_END3} { BEGIN(regst); RETURN(CLOSING_TAG) }
-<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE1} |
-<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE2} { newline_found = true; }
-<bcomm,char_code_bcomm>{BLOCK_COMMENT_SKIP} { /* skip */ }
-<bcomm,char_code_bcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
+ {BLOCK_COMMENT_START} { BEGIN(bcomm); }
+<char_code>{BLOCK_COMMENT_START} { BEGIN(char_code_bcomm); }
+<bcomm>{BLOCK_COMMENT_END1} { BEGIN(regst); }
+<char_code_bcomm>{BLOCK_COMMENT_END1} { BEGIN(char_code); }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END2} { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END3} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE1} |
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE2} { newline_found = true; }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_SKIP} { /* skip */ }
+<bcomm,char_code_bcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
{LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) }
<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
-<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_OPEN} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } }
+<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } }
<dqstr,unesc_dqstr>\\{CR}{LF} { /* skip */ }
<dqstr,unesc_dqstr>\\{LF} { /* skip */ }
<dqstr,unesc_dqstr>\\{CR} { /* skip */ }
{LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) }
<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
-<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_OPEN} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } }
+<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } }
<sqstr,unesc_sqstr>\\{CR}{LF} { /* skip */ }
<sqstr,unesc_sqstr>\\{LF} { /* skip */ }
<sqstr,unesc_sqstr>\\{CR} { /* skip */ }
memset((void*)(states + sp), 0, sizeof(states[0]));
}
-JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in)
+JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in, bool external_script)
{
yy_flush_buffer(YY_CURRENT_BUFFER);
unescape_nest_seen = false;
mixed_encoding_seen = false;
+ ext_script = external_script;
auto r = yylex();
int act_len = norm.script_size(); \
const char* dst = norm.take_script();
+#define NORMALIZE_EXT(src) \
+ JSIdentifierCtxStub ident_ctx; \
+ JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
+ auto ret = norm.normalize(src, sizeof(src), true); \
+ const char* ptr = norm.get_src_next(); \
+ int act_len = norm.script_size(); \
+ const char* dst = norm.take_script();
+
#define VALIDATE(src, expected) \
CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
CHECK((ptr - src) == sizeof(src)); \
static const char unexpected_tag_expected0[] =
"var a=1;";
+static const char unexpected_tag_expected0_ext[] =
+ "var a=1;";
+
static const char unexpected_tag_buf1[] =
"var a = 1;\n"
"<script type=application/javascript>\n"
static const char unexpected_tag_expected1[] =
"var a=1;";
+static const char unexpected_tag_expected1_ext[] =
+ "var a=1;";
+
static const char unexpected_tag_buf2[] =
"var a = 1;\n"
"var str = '<script> something';\n"
static const char unexpected_tag_expected2[] =
"var a=1;var str='";
+static const char unexpected_tag_expected2_ext[] =
+ "var a=1;var str='<script> something';var b=2;";
+
static const char unexpected_tag_buf3[] =
"var a = 1;\n"
"var str = 'something <script> something';\n"
static const char unexpected_tag_expected3[] =
"var a=1;var str='something ";
+static const char unexpected_tag_expected3_ext[] =
+ "var a=1;var str='something <script> something';var b=2;";
+
static const char unexpected_tag_buf4[] =
"var a = 1;\n"
"var str = 'something <script>';\n"
static const char unexpected_tag_expected4[] =
"var a=1;var str='something ";
+static const char unexpected_tag_expected4_ext[] =
+ "var a=1;var str='something <script>';var b=2;";
+
static const char unexpected_tag_buf5[] =
"var a = 1;\n"
"var str = '</script> something';\n"
static const char unexpected_tag_expected5[] =
"var a=1;var str='";
+static const char unexpected_tag_expected5_ext[] =
+ "var a=1;var str='</script> something';var b=2;";
+
static const char unexpected_tag_buf6[] =
"var a = 1;\n"
"var str = 'something </script> something';\n"
static const char unexpected_tag_expected6[] =
"var a=1;var str='something ";
+static const char unexpected_tag_expected6_ext[] =
+ "var a=1;var str='something </script> something';var b=2;";
+
static const char unexpected_tag_buf7[] =
"var a = 1;\n"
"var str = 'something </script>';\n"
static const char unexpected_tag_expected7[] =
"var a=1;var str='something ";
+static const char unexpected_tag_expected7_ext[] =
+ "var a=1;var str='something </script>';var b=2;";
+
static const char unexpected_tag_buf8[] =
"var a = 1;\n"
"var str = 'something \\<script\\> something';\n"
static const char unexpected_tag_expected8[] =
"var a=1;var str='something \\";
+static const char unexpected_tag_expected8_ext[] =
+ "var a=1;var str='something \\<script\\> something';var b=2;";
+
static const char unexpected_tag_buf9[] =
"var a = 1;\n"
"var str = 'something \\<\\/script\\> something';\n"
static const char unexpected_tag_expected9[] =
"var a=1;var str='something \\<\\/script\\> something';var b=2;";
+static const char unexpected_tag_expected9_ext[] =
+ "var a=1;var str='something \\<\\/script\\> something';var b=2;";
+
static const char unexpected_tag_buf10[] =
"var a = 1;\n"
"//<script> something\n"
static const char unexpected_tag_expected10[] =
"var a=1;";
+static const char unexpected_tag_expected10_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf11[] =
"var a = 1;\n"
"//something <script> something\n"
static const char unexpected_tag_expected11[] =
"var a=1;";
+static const char unexpected_tag_expected11_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf12[] =
"var a = 1;\n"
"//something <script>\n"
static const char unexpected_tag_expected12[] =
"var a=1;";
+static const char unexpected_tag_expected12_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf13[] =
"var a = 1;\n"
"/*<script> something*/\n"
static const char unexpected_tag_expected13[] =
"var a=1;";
+static const char unexpected_tag_expected13_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf14[] =
"var a = 1;\n"
"/*something <script> something*/\n"
static const char unexpected_tag_expected14[] =
"var a=1;";
+static const char unexpected_tag_expected14_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf15[] =
"var a = 1;\n"
"/*something <script>*/\n"
static const char unexpected_tag_expected15[] =
"var a=1;";
+static const char unexpected_tag_expected15_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf16[] =
"var a = 1;\n"
"//</script> something\n"
static const char unexpected_tag_expected16[] =
"var a=1;";
+static const char unexpected_tag_expected16_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf17[] =
"var a = 1;\n"
"<!--something </script> something//-->\n"
static const char unexpected_tag_expected17[] =
"var a=1;";
+static const char unexpected_tag_expected17_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf18[] =
"var a = 1;\n"
"//something </script>\n"
static const char unexpected_tag_expected18[] =
"var a=1;";
+static const char unexpected_tag_expected18_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf19[] =
"var a = 1;\n"
"/*</script>\n"
static const char unexpected_tag_expected19[] =
"var a=1;";
+static const char unexpected_tag_expected19_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf20[] =
"var a = 1;\n"
"/*something\n"
static const char unexpected_tag_expected20[] =
"var a=1;";
+static const char unexpected_tag_expected20_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf21[] =
"var a = 1;\n"
"/*something\n"
static const char unexpected_tag_expected21[] =
"var a=1;";
+static const char unexpected_tag_expected21_ext[] =
+ "var a=1;var b=2;";
+
static const char unexpected_tag_buf22[] =
"var a = 1;\n"
"var str = 'script somescript /script something';\n"
static const char unexpected_tag_expected22[] =
"var a=1;var str='script somescript /script something';var b=2;";
+static const char unexpected_tag_expected22_ext[] =
+ "var a=1;var str='script somescript /script something';var b=2;";
+
static const char unexpected_tag_buf23[] =
"var a = 1;\n"
"var str = 'script somescript /script something <script>';\n"
static const char unexpected_tag_expected23[] =
"var a=1;var str='script somescript /script something ";
+static const char unexpected_tag_expected23_ext[] =
+ "var a=1;var str='script somescript /script something <script>';var b=2;";
+
static const char unexpected_tag_buf24[] =
"var a = 1;\n"
"var str = 'something <sCrIpT>';\n"
static const char unexpected_tag_expected24[] =
"var a=1;var str='something ";
+static const char unexpected_tag_expected24_ext[] =
+ "var a=1;var str='something <sCrIpT>';var b=2;";
+
TEST_CASE("nested script tags", "[JSNormalizer]")
{
SECTION("explicit open tag - simple")
}
}
+TEST_CASE("nested script tags in an external script", "[JSNormalizer]")
+{
+ SECTION("explicit open tag - simple")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf0);
+ VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0_ext, JSTokenizer::OPENING_TAG, 18);
+ }
+ SECTION("explicit open tag - complex")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf1);
+ VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1_ext, JSTokenizer::OPENING_TAG, 18);
+ }
+ SECTION("open tag within literal - start")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf2);
+ VALIDATE(unexpected_tag_buf2, unexpected_tag_expected2_ext);
+ }
+ SECTION("open tag within literal - mid")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf3);
+ VALIDATE(unexpected_tag_buf3, unexpected_tag_expected3_ext);
+ }
+ SECTION("open tag within literal - end")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf4);
+ VALIDATE(unexpected_tag_buf4, unexpected_tag_expected4_ext);
+ }
+ SECTION("close tag within literal - start")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf5);
+ VALIDATE(unexpected_tag_buf5, unexpected_tag_expected5_ext);
+ }
+ SECTION("close tag within literal - mid")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf6);
+ VALIDATE(unexpected_tag_buf6, unexpected_tag_expected6_ext);
+ }
+ SECTION("close tag within literal - end")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf7);
+ VALIDATE(unexpected_tag_buf7, unexpected_tag_expected7_ext);
+ }
+ SECTION("open tag within literal - escaped")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf8);
+ VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8_ext);
+ }
+ SECTION("close tag within literal - escaped")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf9);
+ VALIDATE(unexpected_tag_buf9, unexpected_tag_expected9_ext);
+ }
+ SECTION("open tag within single-line comment - start")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf10);
+ VALIDATE(unexpected_tag_buf10, unexpected_tag_expected10_ext);
+ }
+ SECTION("open tag within single-line comment - mid")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf11);
+ VALIDATE(unexpected_tag_buf11, unexpected_tag_expected11_ext);
+ }
+ SECTION("open tag within single-line comment - end")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf12);
+ VALIDATE(unexpected_tag_buf12, unexpected_tag_expected12_ext);
+ }
+ SECTION("open tag within multi-line comment - start")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf13);
+ VALIDATE(unexpected_tag_buf13, unexpected_tag_expected13_ext);
+ }
+ SECTION("open tag within multi-line comment - mid")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf14);
+ VALIDATE(unexpected_tag_buf14, unexpected_tag_expected14_ext);
+ }
+ SECTION("open tag within multi-line comment - end")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf15);
+ VALIDATE(unexpected_tag_buf15, unexpected_tag_expected15_ext);
+ }
+ SECTION("close tag within single-line comment - start")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf16);
+ VALIDATE(unexpected_tag_buf16, unexpected_tag_expected16_ext);
+ }
+ SECTION("close tag within single-line comment - mid")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf17);
+ VALIDATE(unexpected_tag_buf17, unexpected_tag_expected17_ext);
+ }
+ SECTION("close tag within single-line comment - end")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf18);
+ VALIDATE(unexpected_tag_buf18, unexpected_tag_expected18_ext);
+ }
+ SECTION("close tag within multi-line comment - start")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf19);
+ VALIDATE(unexpected_tag_buf19, unexpected_tag_expected19_ext);
+ }
+ SECTION("close tag within multi-line comment - mid")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf20);
+ VALIDATE(unexpected_tag_buf20, unexpected_tag_expected20_ext);
+ }
+ SECTION("close tag within multi-line comment - end")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf21);
+ VALIDATE(unexpected_tag_buf21, unexpected_tag_expected21_ext);
+ }
+ SECTION("multiple patterns - not matched")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf22);
+ VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22_ext);
+ }
+ SECTION("multiple patterns - matched")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf23);
+ VALIDATE(unexpected_tag_buf23, unexpected_tag_expected23_ext);
+ }
+ SECTION("mixed lower and upper case")
+ {
+ NORMALIZE_EXT(unexpected_tag_buf24);
+ VALIDATE(unexpected_tag_buf24, unexpected_tag_expected24_ext);
+ }
+}
+
TEST_CASE("split between tokens", "[JSNormalizer]")
{
SECTION("operator string")