DIRECTIVE
};
+ enum ASIGroup
+ {
+ ASI_OTHER = 0,
+ ASI_GROUP_1, // {
+ ASI_GROUP_2, // }
+ ASI_GROUP_3, // [ (
+ ASI_GROUP_4, // ]
+ ASI_GROUP_5, // )
+ ASI_GROUP_6, // + -
+ ASI_GROUP_7, // this true false null identifier literal
+ //IDENTIFIER + LITERAL + KEYWORD_LITERAL
+ ASI_GROUP_8, // ++ --
+ ASI_GROUP_9, // continue break return debugger // same as KEYWORD_BA
+ ASI_GROUP_10, // var function new delete void typeof if do while for with
+ // switch throw try ~ +
+ ASI_GROUP_MAX
+ };
+
public:
enum JSRet
{
JSRet eval_eof();
JSRet do_spacing(JSToken cur_token);
JSRet do_operator_spacing(JSToken cur_token);
+ void do_semicolon_insertion(ASIGroup current);
JSRet do_identifier_substitution(const char* lexeme);
bool unescape(const char* lexeme);
void process_punctuator();
uint8_t max_template_nesting;
std::stack<uint16_t, std::vector<uint16_t>> bracket_depth;
JSToken token = UNDEFINED;
+ ASIGroup previous_group = ASI_OTHER;
JSIdentifierCtxBase& ident_ctx;
struct
char*& tmp_buf;
size_t& tmp_buf_size;
const int tmp_cap_size;
+ bool newline_found = false;
+ constexpr static bool insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX]
+ {
+ {false, false, false, false, false, false, false, false, false, false, false,},
+ {false, false, false, false, false, false, false, false, false, false, false,},
+ {false, false, false, false, false, false, false, false, false, false, false,},
+ {false, false, false, false, false, false, false, false, false, false, false,},
+ {false, true, false, false, false, false, false, true, true, true, true, },
+ {false, false, false, false, false, false, false, true, true, true, true, },
+ {false, false, false, false, false, false, false, false, false, false, false,},
+ {false, true, false, false, false, false, false, true, true, true, true, },
+ {false, true, false, true, false, false, false, true, true, true, true, },
+ {false, true, false, true, false, false, true, true, true, true, true, },
+ {false, false, false, false, false, false, false, false, false, false, false,}
+ };
};
#endif // JS_TOKENIZER_H
#define EXEC(f) { auto r = (f); if (r) { BEGIN(regst); return r; } }
#define EEOF(f) { auto r = (f); if (r) { if (r != SCRIPT_CONTINUE) BEGIN(regst); return r; } }
+constexpr bool JSTokenizer::insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX];
%}
/* The following grammar was created based on ECMAScript specification */
/* source https://ecma-international.org/ecma-262/5.1/ */
-/* whitespaces */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.2 */
-TAB \x9
-VT \xB
-FF \xC
-SP \x20
-NBSP \xA0
-BOM \xEF\xBB\xBF
-WHITESPACES {TAB}|{VT}|{FF}|{SP}|{NBSP}|{BOM}
-
-/* single char escape sequences */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 */
-NUL \x0
-BS \x8
-HT \x9
-CHAR_ESCAPE_SEQUENCES {NUL}|{BS}|{HT}
-
-/* line terminators */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.3 */
-LF \xA
-CR \xD
-LS \xE2\x80\xA8
-PS \xE2\x80\xA9
-LINE_TERMINATORS {LF}|{CR}|{LS}|{PS}
-
-/* comments */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.4 */
-LINE_COMMENT_START "//"
-LINE_COMMENT_END1 [^<\xA\xD]*\xA
-LINE_COMMENT_END2 [^<\xA\xD]*\xD
-LINE_COMMENT_END3 [^<\xA\xD]*"<"+(?i:script)
-LINE_COMMENT_END4 [^<\xA\xD]*"<"+(?i:\/script>)
-LINE_COMMENT_SKIP [^<\xA\xD]*"<"?
-BLOCK_COMMENT_START "/*"
-BLOCK_COMMENT_END1 [^<*]*"*"+"/"
-BLOCK_COMMENT_END2 [^<*]*"<"+(?i:script)
-BLOCK_COMMENT_END3 [^<*]*"<"+(?i:\/script>)
-BLOCK_COMMENT_SKIP [^<*]*[<*]?
-
-/* directives */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-14.1 */
-USE_STRICT_DIRECTIVE "\"use strict\""|"\'use strict\'"
-USE_STRICT_DIRECTIVE_SC "\"use strict\"";*|"\'use strict\'";*
-
-/* keywords */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6.1.1 */
-KEYWORD break|case|debugger|in|import|protected|do|else|function|try|implements|static|instanceof|new|this|class|let|typeof|var|with|enum|private|catch|continue|default|extends|public|finally|for|if|super|yield|return|switch|throw|const|interface|void|while|delete|export|package
-
-/* punctuators */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.7 */
-CLOSING_BRACES ")"|"]"
-OPEN_BRACKET "{"
-CLOSE_BRACKET "}"
-PUNCTUATOR "("|"["|">="|"=="|"!="|"==="|"!=="|"."|";"|","|"<"|">"|"<="|"<<"|">>"|">>>"|"&"|"|"|"^"|"!"|"&&"|"||"|"?"|":"|"="|"+="|"-="|"*="|"%="|"<<="|">>="|">>>="|"&="|"|="|"^="|"~"
-OPERATOR "+"|"-"|"*"|"++"|"--"|"%"
-DIV_OPERATOR "/"
-DIV_ASSIGNMENT_OPERATOR "/="
-
/* Unicode letter ranges (categories Lu, Ll, Lt, Lm, Lo and Nl) */
/* generated with unicode_range_generator.l */
/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 (escape sequence) */
UNICODE_ESCAPE_SEQUENCE \\u[0-9a-fA-F]{4}
+/* whitespaces */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.2 */
+TAB \x9
+VT \xB
+FF \xC
+SP \x20
+NBSP \xA0
+BOM \xEF\xBB\xBF
+WHITESPACES {TAB}|{VT}|{FF}|{SP}|{NBSP}|{BOM}
+
+/* single char escape sequences */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 */
+NUL \x0
+BS \x8
+HT \x9
+CHAR_ESCAPE_SEQUENCES {NUL}|{BS}|{HT}
+
+/* line terminators */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.3 */
+LF \xA
+CR \xD
+LS \xE2\x80\xA8
+PS \xE2\x80\xA9
+LINE_TERMINATORS {LF}|{CR}|{LS}|{PS}
+
+/* comments */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.4 */
+LINE_COMMENT_START "//"
+LINE_COMMENT_END1 [^<\xA\xD]*\xA
+LINE_COMMENT_END2 [^<\xA\xD]*\xD
+LINE_COMMENT_END3 [^<\xA\xD]*"<"+(?i:script)
+LINE_COMMENT_END4 [^<\xA\xD]*"<"+(?i:\/script>)
+LINE_COMMENT_SKIP [^<\xA\xD]*"<"?
+BLOCK_COMMENT_START "/*"
+BLOCK_COMMENT_END1 [^<*\xA\xD]*"*"+"/"
+BLOCK_COMMENT_END2 [^<*\xA\xD]*"<"+(?i:script)
+BLOCK_COMMENT_END3 [^<*\xA\xD]*"<"+(?i:\/script>)
+BLOCK_COMMENT_LINE1 [^<*\xA\xD]*\xA
+BLOCK_COMMENT_LINE2 [^<*\xA\xD]*\xD
+BLOCK_COMMENT_SKIP [^<*\xA\xD]*[<*]?
+
+/* directives */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-14.1 */
+USE_STRICT_DIRECTIVE "\"use strict\""|"\'use strict\'"
+USE_STRICT_DIRECTIVE_SC "\"use strict\"";*|"\'use strict\'";*
+
+/* keywords */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6.1.1 */
+/* keywords that can appear at the begining or the end of Statement*/
+KEYWORD_BA break|continue|debugger|return
+/* keywords that can appear at the beginning of Statement*/
+KEYWORD_B delete|do|for|function|if|new|switch|throw|try|typeof|var|void|while|with
+/* keywords that can not appear at the beginning or the end of Statement*/
+KEYWORD_OTHER case|catch|class|const|default|else|enum|export|extends|finally|implements|import|in|instanceof|interface|let|package|private|protected|public|static|super|yield
+
+/* punctuators */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.7 */
+CLOSING_PAREN ")"
+CLOSING_BRACE "]"
+OPEN_BRACKET "{"
+CLOSE_BRACKET "}"
+PUNCTUATOR_PREFIX "~"|"!"
+OPEN_PAREN_BRACE "("|"["
+PUNCTUATOR ">="|"=="|"!="|"==="|"!=="|"."|";"|","|"<"|">"|"<="|"<<"|">>"|">>>"|"&"|"|"|"^"|"&&"|"||"|"?"|":"|"="|"+="|"-="|"*="|"%="|"<<="|">>="|">>>="|"&="|"|="|"^="
+OPERATOR_PREFIX "+"|"-"
+OPERATOR_INCR_DECR "--"|"++"
+OPERATOR "*"|"%"
+DIV_OPERATOR "/"
+DIV_ASSIGNMENT_OPERATOR "/="
+
/* identifiers */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6 */
IDENTIFIER_START [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}
/* literals */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8 */
LITERAL_NULL null
+LITERAL_THIS this
LITERAL_BOOLEAN true|false
LITERAL_DECIMAL [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]*
LITERAL_HEX_INTEGER 0x[0-9a-fA-F]*|0X[0-9a-fA-F]*
LITERAL_UNDEFINED undefined
LITERAL_INFINITY Infinity|\xE2\x88\x9E
LITERAL_NAN NaN
-LITERAL {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
+LITERAL {LITERAL_NULL}|{LITERAL_THIS}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
HTML_COMMENT_OPEN "<"+"!--"
HTML_TAG_SCRIPT_OPEN "<"+(?i:script)
%%
{WHITESPACES} { }
{CHAR_ESCAPE_SEQUENCES} { }
-{LINE_TERMINATORS} { BEGIN(regst); }
+{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; }
<INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); return OPENING_TAG; }
{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return SCRIPT_ENDED; }
{HTML_COMMENT_OPEN} { BEGIN(lcomm); }
{LINE_COMMENT_START} { BEGIN(lcomm); }
-<lcomm>{LINE_COMMENT_END1} { BEGIN(regst); }
-<lcomm>{LINE_COMMENT_END2} { BEGIN(regst); }
+<lcomm>{LINE_COMMENT_END1} { BEGIN(regst); newline_found = true; }
+<lcomm>{LINE_COMMENT_END2} { BEGIN(regst); newline_found = true; }
<lcomm>{LINE_COMMENT_END3} { BEGIN(regst); return OPENING_TAG; }
<lcomm>{LINE_COMMENT_END4} { BEGIN(regst); return CLOSING_TAG; }
<lcomm>{LINE_COMMENT_SKIP} { }
<bcomm>{BLOCK_COMMENT_END1} { BEGIN(regst); }
<bcomm>{BLOCK_COMMENT_END2} { BEGIN(regst); return OPENING_TAG; }
<bcomm>{BLOCK_COMMENT_END3} { BEGIN(regst); return CLOSING_TAG; }
+<bcomm>{BLOCK_COMMENT_LINE1} |
+<bcomm>{BLOCK_COMMENT_LINE2} { newline_found = true;}
<bcomm>{BLOCK_COMMENT_SKIP} { }
<bcomm><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
- {LITERAL_DQ_STRING_START} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); }
+ {LITERAL_DQ_STRING_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); }
<dqstr>{LITERAL_DQ_STRING_END} { ECHO; BEGIN(divop); }
<dqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
<dqstr>\\{CR}{LF} { }
<dqstr>{LITERAL_DQ_STRING_TEXT} { ECHO; }
<dqstr><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
- {LITERAL_SQ_STRING_START} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); }
+ {LITERAL_SQ_STRING_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); }
<sqstr>{LITERAL_SQ_STRING_END} { ECHO; BEGIN(divop); }
<sqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
<sqstr>\\{CR}{LF} { }
<sqstr>{LITERAL_SQ_STRING_TEXT} { ECHO; }
<sqstr><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
-{OPEN_BRACKET} { if (!bracket_depth.empty()) bracket_depth.top()++; process_punctuator(); }
-{CLOSE_BRACKET} { process_closing_bracket(); }
+{OPEN_BRACKET} { do_semicolon_insertion(ASI_GROUP_1); if (!bracket_depth.empty()) bracket_depth.top()++; process_punctuator(); }
+{CLOSE_BRACKET} { do_semicolon_insertion(ASI_GROUP_2); process_closing_bracket(); }
- {LITERAL_TEMPLATE_START} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); }
+ {LITERAL_TEMPLATE_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); }
<tmpll>(\\\\)*{LITERAL_TEMPLATE_END} { ECHO; BEGIN(divop); }
<tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) }
<tmpll>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
<tmpll>{LITERAL_TEMPLATE_OTHER} { ECHO; }
<tmpll><<EOF>> { return SCRIPT_CONTINUE; }
-<regst>{LITERAL_REGEX_START} { EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); }
+<regst>{LITERAL_REGEX_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); }
<regex>{LITERAL_REGEX_END} { ECHO; BEGIN(divop); }
<regex>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; }
<regex>{LITERAL_REGEX_SKIP} { ECHO; }
<regex><<EOF>> { states_apply(); return SCRIPT_CONTINUE; }
<divop>{DIV_OPERATOR} |
-<divop>{DIV_ASSIGNMENT_OPERATOR} { ECHO; token = PUNCTUATOR; BEGIN(INITIAL); }
+<divop>{DIV_ASSIGNMENT_OPERATOR} { previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); }
+
+{CLOSING_PAREN} { do_semicolon_insertion(ASI_GROUP_5); ECHO; token = PUNCTUATOR; BEGIN(divop); }
+{CLOSING_BRACE} { do_semicolon_insertion(ASI_GROUP_4); ECHO; token = PUNCTUATOR; BEGIN(divop); }
+{PUNCTUATOR_PREFIX} { do_semicolon_insertion(ASI_GROUP_10); process_punctuator(); }
+{OPEN_PAREN_BRACE} { do_semicolon_insertion(ASI_GROUP_3); process_punctuator(); }
+{PUNCTUATOR} { previous_group = ASI_OTHER; process_punctuator(); }
-{CLOSING_BRACES} { ECHO; token = PUNCTUATOR; BEGIN(divop); }
-{PUNCTUATOR} { process_punctuator(); }
+{USE_STRICT_DIRECTIVE} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; }
+{USE_STRICT_DIRECTIVE_SC} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); }
+{KEYWORD_B} { do_semicolon_insertion(ASI_GROUP_10); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
+{KEYWORD_BA} { do_semicolon_insertion(ASI_GROUP_9); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
+{KEYWORD_OTHER} { previous_group = ASI_OTHER; EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-{USE_STRICT_DIRECTIVE} { EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; }
-{USE_STRICT_DIRECTIVE_SC} { EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); }
-{KEYWORD} { EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-{OPERATOR} { EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
-{LITERAL} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); }
-{IDENTIFIER} { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); }
+{OPERATOR_PREFIX} { do_semicolon_insertion(ASI_GROUP_6); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
+{OPERATOR_INCR_DECR} { do_semicolon_insertion(ASI_GROUP_8); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
+{OPERATOR} { previous_group = ASI_OTHER; EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
-.|{ALL_UNICODE} { ECHO; token = UNDEFINED; BEGIN(INITIAL); }
+{LITERAL} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); }
+{IDENTIFIER} { do_semicolon_insertion(ASI_GROUP_7); if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); }
+
+.|{ALL_UNICODE} { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); }
<<EOF>> { EEOF(eval_eof()) }
%%
return IDENTIFIER_OVERFLOW;
}
+void JSTokenizer::do_semicolon_insertion(ASIGroup current)
+{
+ assert(current >= 0 and current < ASI_GROUP_MAX);
+ if (newline_found)
+ {
+ newline_found = false;
+ if (insert_semicolon[previous_group][current])
+ {
+ yyout << ';';
+ previous_group = ASI_OTHER;
+ token = PUNCTUATOR;
+ return;
+ }
+ }
+ previous_group = current;
+}
+
bool JSTokenizer::unescape(const char* lexeme)
{
if (strstr(lexeme, "\\u"))
"ab\xE2\x80\xA9ww ab\xEF\xBB\xBFww ab∞ww 2abc";
static const char all_patterns_expected5[] =
- "$2abc _2abc abc $__$ 肖晗 XÆA12 \u0041abc \u00FBdef \u1234ghi ab ww "
- "ab ww ab ww ab ∞ ww 2 abc";
+ "$2abc _2abc abc $__$ 肖晗 XÆA12 \u0041abc \u00FBdef \u1234ghi ab;ww "
+ "ab;ww ab ww ab ∞ ww 2 abc";
static const char all_patterns_buf6[] =
"tag` template\n ${ a + b } template`";
static const char syntax_cases_expected5[] =
"var i=1;while(i<100){i*=2;document.write(i+\", \");}i=1;do{i*=2;"
- "document.write(i+\", \");}while(i<100)for(var i=0;i<10;i++){if(i==5){break;}"
+ "document.write(i+\", \");}while(i<100);for(var i=0;i<10;i++){if(i==5){break;}"
"document.write(i+\", \");}for(var i=0;i<10;i++){if(i==5){continue;}"
"document.write(i+\", \");}";
"var a = 2\n/ab -cd/";
static const char syntax_cases_expected10[] =
- "var a=2 /ab -cd/";
+ "var a=2;/ab -cd/";
static const char syntax_cases_buf11[] =
"var d_str1 = \"\\\\ \" ; var d_str2 = \"abc\\\"def\" ;"
}
}
+static const char asi_cases_buf0[] =
+ "array[0]\n{}";
+
+static const char asi_cases_expected0[] =
+ "array[0];{}";
+
+static const char asi_cases_buf1[] =
+ "array[0]\ntrue";
+
+static const char asi_cases_expected1[] =
+ "array[0];true";
+
+static const char asi_cases_buf2[] =
+ "array[0]\n++";
+
+static const char asi_cases_expected2[] =
+ "array[0];++";
+
+static const char asi_cases_buf3[] =
+ "array[0]\ncontinue";
+
+static const char asi_cases_expected3[] =
+ "array[0];continue";
+
+static const char asi_cases_buf4[] =
+ "array[0]\nvar b;";
+
+static const char asi_cases_expected4[] =
+ "array[0];var b;";
+
+static const char asi_cases_buf5[] =
+ "func()\ntrue";
+
+static const char asi_cases_expected5[] =
+ "func();true";
+
+static const char asi_cases_buf6[] =
+ "func()\n++";
+
+static const char asi_cases_expected6[] =
+ "func();++";
+
+static const char asi_cases_buf7[] =
+ "func()\ncontinue";
+
+static const char asi_cases_expected7[] =
+ "func();continue";
+
+static const char asi_cases_buf8[] =
+ "func()\nvar b;";
+
+static const char asi_cases_expected8[] =
+ "func();var b;";
+
+static const char asi_cases_buf9[] =
+ "1024\n{}";
+
+static const char asi_cases_expected9[] =
+ "1024;{}";
+
+static const char asi_cases_buf10[] =
+ "1024\ntrue";
+
+static const char asi_cases_expected10[] =
+ "1024;true";
+
+static const char asi_cases_buf11[] =
+ "1024\n++";
+
+static const char asi_cases_expected11[] =
+ "1024;++";
+
+static const char asi_cases_buf12[] =
+ "1024\ncontinue";
+
+static const char asi_cases_expected12[] =
+ "1024;continue";
+
+static const char asi_cases_buf13[] =
+ "1024\nvar b;";
+
+static const char asi_cases_expected13[] =
+ "1024;var b;";
+
+static const char asi_cases_buf14[] =
+ "++\n{}";
+
+static const char asi_cases_expected14[] =
+ "++;{}";
+
+static const char asi_cases_buf15[] =
+ "++\n[1,2,3]";
+
+static const char asi_cases_expected15[] =
+ "++;[1,2,3]";
+
+static const char asi_cases_buf16[] =
+ "++\ntrue";
+
+static const char asi_cases_expected16[] =
+ "++;true";
+
+static const char asi_cases_buf17[] =
+ "++\n++";
+
+static const char asi_cases_expected17[] =
+ "++;++";
+
+static const char asi_cases_buf18[] =
+ "++\ncontinue";
+
+static const char asi_cases_expected18[] =
+ "++;continue";
+
+static const char asi_cases_buf19[] =
+ "++\nvar b;";
+
+static const char asi_cases_expected19[] =
+ "++;var b;";
+
+static const char asi_cases_buf20[] =
+ "return\n{}";
+
+static const char asi_cases_expected20[] =
+ "return;{}";
+
+static const char asi_cases_buf21[] =
+ "return\n[1,2,3]";
+
+static const char asi_cases_expected21[] =
+ "return;[1,2,3]";
+
+static const char asi_cases_buf22[] =
+ "return\n+a";
+
+static const char asi_cases_expected22[] =
+ "return;+a";
+
+static const char asi_cases_buf23[] =
+ "return\ntrue";
+
+static const char asi_cases_expected23[] =
+ "return;true";
+
+static const char asi_cases_buf24[] =
+ "return\n++";
+
+static const char asi_cases_expected24[] =
+ "return;++";
+
+static const char asi_cases_buf25[] =
+ "return\ncontinue";
+
+static const char asi_cases_expected25[] =
+ "return;continue";
+
+static const char asi_cases_buf26[] =
+ "return\nvar b;";
+
+static const char asi_cases_expected26[] =
+ "return;var b;";
+
+TEST_CASE("automatic semicolon insertion", "[JSNormalizer]")
+{
+ SECTION("group_4 to group_1")
+ {
+ NORMALIZE(asi_cases_buf0);
+ VALIDATE(asi_cases_buf0, asi_cases_expected0);
+ }
+
+ SECTION("group_4 to group_7")
+ {
+ NORMALIZE(asi_cases_buf1);
+ VALIDATE(asi_cases_buf1, asi_cases_expected1);
+ }
+
+ SECTION("group_4 to group_8")
+ {
+ NORMALIZE(asi_cases_buf2);
+ VALIDATE(asi_cases_buf2, asi_cases_expected2);
+ }
+
+ SECTION("group_4 to group_9")
+ {
+ NORMALIZE(asi_cases_buf3);
+ VALIDATE(asi_cases_buf3, asi_cases_expected3);
+ }
+
+ SECTION("group_4 to group_10")
+ {
+ NORMALIZE(asi_cases_buf4);
+ VALIDATE(asi_cases_buf4, asi_cases_expected4);
+ }
+
+ SECTION("group_5 to group_7")
+ {
+ NORMALIZE(asi_cases_buf5);
+ VALIDATE(asi_cases_buf5, asi_cases_expected5);
+ }
+
+ SECTION("group_5 to group_8")
+ {
+ NORMALIZE(asi_cases_buf6);
+ VALIDATE(asi_cases_buf6, asi_cases_expected6);
+ }
+
+ SECTION("group_5 to group_9")
+ {
+ NORMALIZE(asi_cases_buf7);
+ VALIDATE(asi_cases_buf7, asi_cases_expected7);
+ }
+
+ SECTION("group_5 to group_10")
+ {
+ NORMALIZE(asi_cases_buf8);
+ VALIDATE(asi_cases_buf8, asi_cases_expected8);
+ }
+
+ SECTION("group_7 to group_1")
+ {
+ NORMALIZE(asi_cases_buf9);
+ VALIDATE(asi_cases_buf9, asi_cases_expected9);
+ }
+
+ SECTION("group_7 to group_7")
+ {
+ NORMALIZE(asi_cases_buf10);
+ VALIDATE(asi_cases_buf10, asi_cases_expected10);
+ }
+
+ SECTION("group_7 to group_8")
+ {
+ NORMALIZE(asi_cases_buf11);
+ VALIDATE(asi_cases_buf11, asi_cases_expected11);
+ }
+
+ SECTION("group_7 to group_9")
+ {
+ NORMALIZE(asi_cases_buf12);
+ VALIDATE(asi_cases_buf12, asi_cases_expected12);
+ }
+
+ SECTION("group_7 to group_10")
+ {
+ NORMALIZE(asi_cases_buf13);
+ VALIDATE(asi_cases_buf13, asi_cases_expected13);
+ }
+
+ SECTION("group_8 to group_1")
+ {
+ NORMALIZE(asi_cases_buf14);
+ VALIDATE(asi_cases_buf14, asi_cases_expected14);
+ }
+
+ SECTION("group_8 to group_3")
+ {
+ NORMALIZE(asi_cases_buf15);
+ VALIDATE(asi_cases_buf15, asi_cases_expected15);
+ }
+
+ SECTION("group_8 to group_7")
+ {
+ NORMALIZE(asi_cases_buf16);
+ VALIDATE(asi_cases_buf16, asi_cases_expected16);
+ }
+
+ SECTION("group_8 to group_8")
+ {
+ NORMALIZE(asi_cases_buf17);
+ VALIDATE(asi_cases_buf17, asi_cases_expected17);
+ }
+
+ SECTION("group_8 to group_9")
+ {
+ NORMALIZE(asi_cases_buf18);
+ VALIDATE(asi_cases_buf18, asi_cases_expected18);
+ }
+
+ SECTION("group_8 to group_10")
+ {
+ NORMALIZE(asi_cases_buf19);
+ VALIDATE(asi_cases_buf19, asi_cases_expected19);
+ }
+
+ SECTION("group_9 to group_1")
+ {
+ NORMALIZE(asi_cases_buf20);
+ VALIDATE(asi_cases_buf20, asi_cases_expected20);
+ }
+
+ SECTION("group_9 to group_3")
+ {
+ NORMALIZE(asi_cases_buf21);
+ VALIDATE(asi_cases_buf21, asi_cases_expected21);
+ }
+
+ SECTION("group_9 to group_6")
+ {
+ NORMALIZE(asi_cases_buf22);
+ VALIDATE(asi_cases_buf22, asi_cases_expected22);
+ }
+
+ SECTION("group_9 to group_7")
+ {
+ NORMALIZE(asi_cases_buf23);
+ VALIDATE(asi_cases_buf23, asi_cases_expected23);
+ }
+
+ SECTION("group_9 to group_8")
+ {
+ NORMALIZE(asi_cases_buf24);
+ VALIDATE(asi_cases_buf24, asi_cases_expected24);
+ }
+
+ SECTION("group_9 to group_9")
+ {
+ NORMALIZE(asi_cases_buf25);
+ VALIDATE(asi_cases_buf25, asi_cases_expected25);
+ }
+
+ SECTION("group_9 to group_10")
+ {
+ NORMALIZE(asi_cases_buf26);
+ VALIDATE(asi_cases_buf26, asi_cases_expected26);
+ }
+}
+
TEST_CASE("endings", "[JSNormalizer]")
{
SECTION("script closing tag is present", "[JSNormalizer]")
static constexpr const char* s_closing_tag = "</script>";
-#define MAKE_INPUT(src, src_len, start, mid, end, depth) \
- std::string input_##src(start); \
- input_##src.append(depth - strlen(start) - strlen(end) - strlen(s_closing_tag), mid); \
- input_##src.append(end, strlen(end)); \
- input_##src.append(s_closing_tag, strlen(s_closing_tag)); \
- const char* src = input_##src.c_str(); \
- size_t src_len = input_##src.size()
+static const std::string make_input(const char* begin, const char* mid,
+ const char* end, size_t len)
+{
+ std::string s(begin);
+ int fill = (len - strlen(begin) - strlen(end)) / strlen(mid);
+ for (int i = 0; i < fill; ++i)
+ s.append(mid);
+ s.append(end);
+ return s;
+}
TEST_CASE("benchmarking - ::normalize() - literals", "[JSNormalizer]")
{
JSIdentifierCtxTest ident_ctx;
JSNormalizer normalizer(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTNIG);
char dst[DEPTH];
-
- MAKE_INPUT(src_ws, src_ws_len, "", ' ', "", DEPTH);
- MAKE_INPUT(src_bcomm, src_bcomm_len, "/*", ' ', "*/", DEPTH);
- MAKE_INPUT(src_dqstr, src_dqstr_len, "\"", ' ', "\"", DEPTH);
-
+ auto whitespace = make_input("", " ", "", DEPTH);
+ auto block_comment = make_input("/*", " ", "*/", DEPTH);
+ auto double_quote = make_input("\"", " ", "\"", DEPTH);
BENCHMARK("memcpy - whitespaces - 65535 bytes")
{
- return memcpy(dst, src_ws, src_ws_len);
+ return memcpy(dst, whitespace.c_str(), whitespace.size());
};
BENCHMARK("whitespaces - 65535 bytes")
{
normalizer.rewind_output();
- return normalizer.normalize(src_ws, src_ws_len);
+ return normalizer.normalize(whitespace.c_str(), whitespace.size());
};
BENCHMARK("block comment - 65535 bytes")
{
normalizer.rewind_output();
- return normalizer.normalize(src_bcomm, src_bcomm_len);
+ return normalizer.normalize(block_comment.c_str(), block_comment.size());
};
BENCHMARK("double quotes string - 65535 bytes")
{
normalizer.rewind_output();
- return normalizer.normalize(src_dqstr, src_dqstr_len);
+ return normalizer.normalize(double_quote.c_str(), double_quote.size());
};
constexpr size_t depth_8k = 8192;
- MAKE_INPUT(src_ws_8k, src_ws_len_8k, "", ' ', "", depth_8k);
- MAKE_INPUT(src_bcomm_8k, src_bcomm_len_8k, "/*", ' ', "*/", depth_8k);
- MAKE_INPUT(src_dqstr_8k, src_dqstr_len_8k, "\"", ' ', "\"", depth_8k);
+ auto whitespace_8k = make_input("", " ", "", depth_8k);
+ auto block_comment_8k = make_input("/*", " ", "*/", depth_8k);
+ auto double_quote_8k = make_input("\"", " ", "\"", depth_8k);
BENCHMARK("memcpy - whitespaces - 8192 bytes")
{
- return memcpy(dst, src_ws_8k, src_ws_len_8k);
+ return memcpy(dst, whitespace_8k.c_str(), whitespace_8k.size());
};
BENCHMARK("whitespaces - 8192 bytes")
{
normalizer.rewind_output();
- return normalizer.normalize(src_ws_8k, src_ws_len_8k);
+ return normalizer.normalize(whitespace_8k.c_str(), whitespace_8k.size());
};
BENCHMARK("block comment - 8192 bytes")
{
normalizer.rewind_output();
- return normalizer.normalize(src_bcomm_8k, src_bcomm_len_8k);
+ return normalizer.normalize(block_comment_8k.c_str(), block_comment_8k.size());
};
BENCHMARK("double quotes string - 8192 bytes")
{
normalizer.rewind_output();
- return normalizer.normalize(src_dqstr_8k, src_dqstr_len_8k);
+ return normalizer.normalize(double_quote_8k.c_str(), double_quote_8k.size());
};
}
};
}
+TEST_CASE("benchmarking - ::normalize() - automatic semicolon insertion")
+{
+ auto w_semicolons = make_input("", "a;\n", s_closing_tag, DEPTH);
+ auto wo_semicolons = make_input("", "a \n", s_closing_tag, DEPTH);
+ const char* src_w_semicolons = w_semicolons.c_str();
+ const char* src_wo_semicolons = wo_semicolons.c_str();
+ size_t src_len = w_semicolons.size();
+
+ JSIdentifierCtxTest ident_ctx_mock;
+ JSNormalizer normalizer_wo_ident(ident_ctx_mock, UNLIM_DEPTH, MAX_TEMPLATE_NESTNIG);
+
+ BENCHMARK("without semicolon insertion")
+ {
+ return normalizer_wo_ident.normalize(src_w_semicolons, src_len);
+ };
+
+ BENCHMARK("with semicolon insertion")
+ {
+ return normalizer_wo_ident.normalize(src_wo_semicolons, src_len);
+ };
+}
#endif // BENCHMARK_TEST