LITERAL_REGEX_START \/[^*\/]
LITERAL_REGEX_END \/[gimsuy]*
-LITERAL_REGEX_SKIP \\\/|\\\\|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}
-LITERAL_REGEX_TEXT [^\\<\xA\xD\xE2/\(\)\[\]\{\}]{1,32}|.
-LITERAL_REGEX_G_OPEN \(|\[|\{
-LITERAL_REGEX_G_CLOSE \)|\]|\}
+LITERAL_REGEX_SKIP \\\/|\\\\|\\\(|\\\)|\\\[|\\\]
+LITERAL_REGEX_TEXT [^\\<\xA\xD\xE2/\(\)\[\]]{1,32}|.
+LITERAL_REGEX_G_OPEN \(|\[
+LITERAL_REGEX_G_CLOSE \)|\]
/* extra literals */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-4.3 */
LITERAL_UNDEFINED undefined
JSTokenizer::JSRet JSTokenizer::literal_regex_g_close()
{
- char c_close = yytext[0];
-
- if (regex_stack.empty())
+ switch (yytext[0])
{
- // a raw bracket is allowed in regex w/o unicode flag,
- // but the parser will accept a bracket in regex with unicode flag
- if (c_close == ']')
+ case ')':
+ if (regex_stack.empty())
{
- ECHO;
- return EOS;
+ debug_logf(5, http_trace, TRACE_JS_PROC, nullptr,
+ "no group to close, .. %c\n", yytext[0]);
+ return BAD_TOKEN;
}
+ else if (regex_stack.top() == '(')
+ regex_stack.pop();
+ else
+ assert(regex_stack.top() == '[');
- debug_logf(5, http_trace, TRACE_JS_PROC, nullptr,
- "no group to close, .. %c\n", yytext[0]);
- return BAD_TOKEN;
- }
-
- char c_open = regex_stack.top();
- bool mismatch = false;
-
- switch (c_open)
- {
- case '(':
- mismatch = c_close != ')';
- regex_stack.pop();
break;
- case '[':
- // only the closing bracket has an effect in a character set
- if (c_close == ']')
+ case ']':
+ if (regex_stack.empty())
+ {
+ // a raw bracket is allowed in regex w/o unicode flag,
+ // but the parser will accept a bracket in regex with unicode flag
+ ECHO;
+ return EOS;
+ }
+ else if (regex_stack.top() == '[')
regex_stack.pop();
- break;
+ else
+ assert(regex_stack.top() == '(');
- case '{':
- mismatch = c_close != '}';
- regex_stack.pop();
break;
default:
assert(false);
- mismatch = true;
- }
-
- if (mismatch)
- {
- debug_logf(5, http_trace, TRACE_JS_PROC, nullptr,
- "closing symbol mismatch, %c .. %c\n", c_open, c_close);
- return BAD_TOKEN;
}
ECHO;
"|[^:/\\\\%]+\\/|[^:/\\\\%]*[?#]|about:blank#)/i;"
"/[/ a b c / 1]/ a b c/1;";
+static const char all_patterns_buf10[] =
+ "function(a){if(!/^\\s*{/.test(a))return!1;a=_.xf(a);return null!==a&&\"object\"===typeof a&&!!a.g};"
+ "/^\\s*$/.test(Q)?0:/^[\\],:{}]*$/.replace(/(?=:|,|]|}|$)/g,z)";
+
+static const char all_patterns_expected10[] =
+ "function(a){if(!/^\\s*{/.test(a))return!1;a=_.xf(a);return null!==a&&\"object\"===typeof a&&!!a.g};"
+ "/^\\s*$/.test(Q)?0:/^[\\],:{}]*$/.replace(/(?=:|,|]|}|$)/g,z)";
+
TEST_CASE("all patterns", "[JSNormalizer]")
{
SECTION("whitespaces and special characters")
NORMALIZE(all_patterns_buf9);
VALIDATE(all_patterns_buf9, all_patterns_expected9);
}
+ SECTION("regex literal with curly brace")
+ {
+ NORMALIZE(all_patterns_buf10);
+ VALIDATE(all_patterns_buf10, all_patterns_expected10);
+ }
}
// Test vectors for different syntax cases
"`${`${`${`${`";
static const char syntax_cases_buf24[] =
- "var a=/{{{{/}}}}/;"
- "var b=/{{{{{/}}}}}/;";
+ "var a=/((((/))))/;"
+ "var b=/(((((/)))))/;";
static const char syntax_cases_expected24[] =
- "var a=/{{{{/}}}}/;"
- "var b=/{{{{";
+ "var a=/((((/))))/;"
+ "var b=/((((";
static const char syntax_cases_buf25[] =
"return /regex0/.foo + /regex1/.bar ;"
NORMALIZE_1(dat2, exp2);
NORMALIZE_1(dat3, exp3);
}
- SECTION("parentheses - wrong closing symbol")
- {
- const char dat1[] = "/({ (} })/";
- const char dat2[] = "/({ (] })/";
- const char exp1[] = "/({ (";
- const char exp2[] = "/({ (";
-
- NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
- NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
- }
- SECTION("curly braces - wrong closing symbol")
- {
- const char dat1[] = "/({ {) })/";
- const char dat2[] = "/({ {] })/";
- const char exp1[] = "/({ {";
- const char exp2[] = "/({ {";
-
- NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
- NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
- }
SECTION("square brackets - raw bracket")
{
const char dat1[] = "/]/";
{
const char dat1[] = "/)/";
const char dat2[] = "/())/";
- const char dat3[] = "/({{ ()) }})/";
+ const char dat3[] = "/( ()) )/";
const char exp1[] = "/";
const char exp2[] = "/()";
- const char exp3[] = "/({{ ()";
-
- NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
- NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
- NORM_BAD_1(dat3, exp3, JSTokenizer::BAD_TOKEN);
- }
- SECTION("curly braces - mismatch")
- {
- const char dat1[] = "/}/";
- const char dat2[] = "/{}}/";
- const char dat3[] = "/({( {}} )})/";
- const char exp1[] = "/";
- const char exp2[] = "/{}";
- const char exp3[] = "/({( {}";
+ const char exp3[] = "/( ()) ";
NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
NORMALIZE_2(dat1, dat2, exp1, exp2);
NORM_COMBINED_2(dat1, dat2, exp);
}
- SECTION("curly braces - continuation")
- {
- const char dat1[] = "/{{";
- const char dat2[] = "}}/";
- const char exp1[] = "/{{";
- const char exp2[] = "}}/";
- const char exp[] = "/{{}}/";
-
- NORMALIZE_2(dat1, dat2, exp1, exp2);
- NORM_COMBINED_2(dat1, dat2, exp);
- }
SECTION("square brackets - continuation")
{
const char dat1[] = "/[[";
const char exp2[] = ")";
const char exp[] = "/()";
- NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN);
- NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN);
- }
- SECTION("curly braces - mismatch in continuation")
- {
- const char dat1[] = "/{";
- const char dat2[] = "}}/";
- const char exp1[] = "/{";
- const char exp2[] = "}";
- const char exp[] = "/{}";
-
NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN);
NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN);
}