if (regex_stack.size() >= max_template_nesting)
return TEMPLATE_NESTING_OVERFLOW;
- regex_stack.push(yytext[0]);
+ // special meaning only outside of a character class
+ if (regex_stack.empty() or regex_stack.top() != '[')
+ regex_stack.push(yytext[0]);
+
ECHO;
return EOS;
}
JSTokenizer::JSRet JSTokenizer::literal_regex_g_close()
{
+ char c_close = yytext[0];
+
if (regex_stack.empty())
{
+ // a raw bracket is allowed in regex w/o unicode flag,
+ // but the parser will accept a bracket in regex with unicode flag
+ if (c_close == ']')
+ {
+ ECHO;
+ return EOS;
+ }
+
debug_logf(5, http_trace, TRACE_JS_PROC, nullptr,
"no group to close, .. %c\n", yytext[0]);
return BAD_TOKEN;
}
- char c = yytext[0];
- char o = regex_stack.top();
- char d = o == '(' ? 1 : 2;
+ char c_open = regex_stack.top();
+ bool mismatch = false;
- regex_stack.pop();
+ switch (c_open)
+ {
+ case '(':
+ mismatch = c_close != ')';
+ regex_stack.pop();
+ break;
+
+ case '[':
+ // only the closing bracket has an effect in a character set
+ if (c_close == ']')
+ regex_stack.pop();
+ break;
+
+ case '{':
+ mismatch = c_close != '}';
+ regex_stack.pop();
+ break;
+
+ default:
+ assert(false);
+ mismatch = true;
+ }
- if (o + d != c)
+ if (mismatch)
{
debug_logf(5, http_trace, TRACE_JS_PROC, nullptr,
- "closing symbol mismatch, %c .. %c\n", o, c);
+ "closing symbol mismatch, %c .. %c\n", c_open, c_close);
return BAD_TOKEN;
}
static const char all_patterns_buf9[] =
"var r = /^(?:(?:https?|mailto|ftp):|[^:/?#]*(?:[/?#]|$))/i;"
- "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)})";
+ "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)});"
+ "pa=/^((https:)?\\/\\/[0-9a-z.:[\\]-]+\\/|\\/[^/\\\\]|"
+ "[^:/\\\\%]+\\/|[^:/\\\\%]*[?#]|about:blank#)/i;"
+ "/[/ a b c / 1]/ a b c / 1;";
static const char all_patterns_expected9[] =
"var r=/^(?:(?:https?|mailto|ftp):|[^:/?#]*(?:[/?#]|$))/i;"
- "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)})";
+ "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)});"
+ "pa=/^((https:)?\\/\\/[0-9a-z.:[\\]-]+\\/|\\/[^/\\\\]"
+ "|[^:/\\\\%]+\\/|[^:/\\\\%]*[?#]|about:blank#)/i;"
+ "/[/ a b c / 1]/ a b c/1;";
TEST_CASE("all patterns", "[JSNormalizer]")
{
"`${`${`${`${`";
static const char syntax_cases_buf24[] =
- "var a=/[[[[/]]]]/;"
- "var b=/[[[[[/]]]]]/;";
+ "var a=/{{{{/}}}}/;"
+ "var b=/{{{{{/}}}}}/;";
static const char syntax_cases_expected24[] =
- "var a=/[[[[/]]]]/;"
- "var b=/[[[[";
+ "var a=/{{{{/}}}}/;"
+ "var b=/{{{{";
static const char syntax_cases_buf25[] =
"return /regex0/.foo + /regex1/.bar ;"
{
const char dat1[] = "a=/[]/;";
const char dat2[] = "b=/[][][]/;";
- const char dat3[] = "c=/[[[]]]/;";
+ const char dat3[] = "c=/[[[[[]/;";
+ const char dat4[] = "d=/[/]/;";
const char exp1[] = "a=/[]/;";
const char exp2[] = "b=/[][][]/;";
- const char exp3[] = "c=/[[[]]]/;";
+ const char exp3[] = "c=/[[[[[]/;";
+ const char exp4[] = "d=/[/]/;";
NORMALIZE_1(dat1, exp1);
NORMALIZE_1(dat2, exp2);
NORMALIZE_1(dat3, exp3);
+ NORMALIZE_1(dat4, exp4);
}
SECTION("mix of brackets")
{
}
SECTION("parentheses - wrong closing symbol")
{
- const char dat1[] = "/({[ (} ]})/";
- const char dat2[] = "/({[ (] ]})/";
- const char exp1[] = "/({[ (";
- const char exp2[] = "/({[ (";
+ const char dat1[] = "/({ (} })/";
+ const char dat2[] = "/({ (] })/";
+ const char exp1[] = "/({ (";
+ const char exp2[] = "/({ (";
NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
}
SECTION("curly braces - wrong closing symbol")
{
- const char dat1[] = "/({[ {) ]})/";
- const char dat2[] = "/({[ {] ]})/";
- const char exp1[] = "/({[ {";
- const char exp2[] = "/({[ {";
+ const char dat1[] = "/({ {) })/";
+ const char dat2[] = "/({ {] })/";
+ const char exp1[] = "/({ {";
+ const char exp2[] = "/({ {";
NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
}
- SECTION("square brackets - wrong closing symbol")
+ SECTION("square brackets - raw bracket")
{
- const char dat1[] = "/([{ [) }])/";
- const char dat2[] = "/([{ [} }])/";
- const char exp1[] = "/([{ [";
- const char exp2[] = "/([{ [";
+ const char dat1[] = "/]/";
+ const char dat2[] = "/[]]/";
+ const char dat3[] = "/][]]/g";
+ const char exp1[] = "/]/";
+ const char exp2[] = "/[]]/";
+ const char exp3[] = "/][]]/g";
- NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
- NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
+ NORMALIZE_1(dat1, exp1);
+ NORMALIZE_1(dat2, exp2);
+ NORMALIZE_1(dat3, exp3);
}
SECTION("parentheses - mismatch")
{
const char dat1[] = "/)/";
const char dat2[] = "/())/";
- const char dat3[] = "/({[ ()) ]})/";
+ const char dat3[] = "/({{ ()) }})/";
const char exp1[] = "/";
const char exp2[] = "/()";
- const char exp3[] = "/({[ ()";
+ const char exp3[] = "/({{ ()";
NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
{
const char dat1[] = "/}/";
const char dat2[] = "/{}}/";
- const char dat3[] = "/({[ {}} ]})/";
+ const char dat3[] = "/({( {}} )})/";
const char exp1[] = "/";
const char exp2[] = "/{}";
- const char exp3[] = "/({[ {}";
-
- NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
- NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
- NORM_BAD_1(dat3, exp3, JSTokenizer::BAD_TOKEN);
- }
- SECTION("square brackets - mismatch")
- {
- const char dat1[] = "/]/";
- const char dat2[] = "/[]]/";
- const char dat3[] = "/([{ []] }])/";
- const char exp1[] = "/";
- const char exp2[] = "/[]";
- const char exp3[] = "/([{ []";
+ const char exp3[] = "/({( {}";
NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN);
NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN);
SECTION("square brackets - continuation")
{
const char dat1[] = "/[[";
- const char dat2[] = "]]/";
+ const char dat2[] = "[]/";
const char exp1[] = "/[[";
- const char exp2[] = "]]/";
- const char exp[] = "/[[]]/";
+ const char exp2[] = "[]/";
+ const char exp[] = "/[[[]/";
NORMALIZE_2(dat1, dat2, exp1, exp2);
NORM_COMBINED_2(dat1, dat2, exp);
const char exp2[] = "}";
const char exp[] = "/{}";
- NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN);
- NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN);
- }
- SECTION("square brackets - mismatch in continuation")
- {
- const char dat1[] = "/[";
- const char dat2[] = "]]/";
- const char exp1[] = "/[";
- const char exp2[] = "]";
- const char exp[] = "/[]";
-
NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN);
NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN);
}