JSToken token = UNDEFINED; // the token before
int orig_len = 0; // current token original length
int norm_len = 0; // normalized length of previous tokens
- int sc = 0; // current Starting Condition (0 means NOT_SET)
+ int sc = 0; // current Starting Condition (0 means NOT_SET)
+ int correction = 0; // correction length
} states[JSTOKENIZER_MAX_STATES];
int sp = 0; // points to the top of states
int eof_sp = 0; // points to the last state before the EOF
bytes_read -= delta;
state.orig_len -= delta;
+ state.correction = take_off;
+
+ yyless(take_off);
}
void JSTokenizer::states_over()
state.orig_len = yyleng;
state.norm_len = yyout.rdbuf()->pubseekoff(0, std::ios_base::cur, std::ios_base::out);
state.sc = yy_start;
+ state.correction = 0;
return true;
}
// Update parsing state every match
else if (bytes_skip > 0)
{
+ // if the state was corrected, reflect this during the parsing
+ if (auto correction = states[sp].correction)
+ {
+ auto delta = yyleng - correction;
+ bytes_skip += delta;
+ bytes_read -= delta;
+
+ yyless(correction);
+ }
+
do { ++sp; sp %= JSTOKENIZER_MAX_STATES; }
while (states[sp].sc == 0);
state.orig_len = yyleng;
state.norm_len = yyout.rdbuf()->pubseekoff(0, std::ios_base::cur, std::ios_base::out);
state.sc = yy_start;
+ state.correction = 0;
return true;
}
EXEC(do_spacing(LITERAL))
yyout << '/';
states_correct(1);
- yyless(1);
BEGIN(regex);
set_ident_norm(true);
regex_stack = VStack<char>();
// discard match of the script tag and scan again without leading '<'
states_correct(1);
- yyless(1);
// process leading '<' as a comparison operator
operator_comparison();
const char exp2[] = "<script)";
const char exp[] = "(a<script)";
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ NORM_COMBINED_2(dat1, dat2, exp);
+ }
+ SECTION("complete regex (1 parsing group) - identifier")
+ {
+ const char dat1[] = "/ss/,";
+ const char dat2[] = " a ;";
+ const char exp1[] = "/ss/,";
+ const char exp2[] = "a;";
+ const char exp[] = "/ss/,a;";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ NORM_COMBINED_2(dat1, dat2, exp);
+ }
+ SECTION("complete regex (2 parsing groups) - identifier")
+ {
+ const char dat1[] = "/\\s/,";
+ const char dat2[] = " a ;";
+ const char exp1[] = "/\\s/,";
+ const char exp2[] = "a;";
+ const char exp[] = "/\\s/,a;";
+
+ NORMALIZE_2(dat1, dat2, exp1, exp2);
+ NORM_COMBINED_2(dat1, dat2, exp);
+ }
+ SECTION("complete regex (not the first) - identifier")
+ {
+ const char dat1[] = ",/\\s/,";
+ const char dat2[] = " a ;";
+ const char exp1[] = ",/\\s/,";
+ const char exp2[] = "a;";
+ const char exp[] = ",/\\s/,a;";
+
NORMALIZE_2(dat1, dat2, exp1, exp2);
NORM_COMBINED_2(dat1, dat2, exp);
}
const char dat2[] = "ipt";
const char dat3[] = ">";
const char exp1[] = "::::</scr";
- const char exp2[] = "cript";
+ const char exp2[] = "script";
const char exp3[] = "";
const char exp[] = "::::";