From: drh Date: Tue, 16 Feb 2016 21:19:49 +0000 (+0000) Subject: Experimental changes to Lemon for improved parser performance. X-Git-Tag: version-3.12.0~180^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=45f31be85dc90a3f76dd930ad4ec2a0174105156;p=thirdparty%2Fsqlite.git Experimental changes to Lemon for improved parser performance. FossilOrigin-Name: a65d583ce97b8c08157268bd054479cda3957a94 --- diff --git a/doc/lemon.html b/doc/lemon.html index b16e35960e..2b8c613640 100644 --- a/doc/lemon.html +++ b/doc/lemon.html @@ -161,8 +161,9 @@ such as ``identifier'' or ``number'' and the third argument will be the name of the identifier or the value of the number.

The Parse() function may have either three or four arguments, -depending on the grammar. If the grammar specification file request -it, the Parse() function will have a fourth parameter that can be +depending on the grammar. If the grammar specification file requests +it (via the extra_argument directive), +the Parse() function will have a fourth parameter that can be of any type chosen by the programmer. The parser doesn't do anything with this argument except to pass it through to action routines. This is a convenient mechanism for passing state information down @@ -263,6 +264,12 @@ with prior yacc and bison experience. But after years of experience using Lemon, I firmly believe that the Lemon way of doing things is better.

+

Updated as of 2016-02-16: +The text above was written in the 1990s. +We are told that Bison has lately been enhanced to support the +tokenizer-calls-parser paradigm used by Lemon, and to obviate the +need for global variables.

+

Input File Syntax

The main purpose of the grammar specification file for Lemon is @@ -617,6 +624,7 @@ build a parser using Lemon that can be used within a long-running program, such as a GUI, that will not leak memory or other resources. To do the same using yacc or bison is much more difficult.

+

The %extra_argument directive

The %extra_argument directive instructs Lemon to add a 4th parameter diff --git a/ext/fts5/fts5parse.y b/ext/fts5/fts5parse.y index 43936767b7..2bdf4b09b2 100644 --- a/ext/fts5/fts5parse.y +++ b/ext/fts5/fts5parse.y @@ -34,7 +34,6 @@ ); } %stack_overflow { - UNUSED_PARAM(yypMinor); /* Silence a compiler warning */ sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow"); } diff --git a/manifest b/manifest index 86aa0b58ec..309d908101 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Minor\ssimplification\sto\sthe\stokenizer.\s\sSlightly\ssmaller\sand\sfaster. -D 2016-02-16T13:04:19.100 +C Experimental\schanges\sto\sLemon\sfor\simproved\sparser\sperformance. +D 2016-02-16T21:19:49.726 F Makefile.in 4e90dc1521879022aa9479268a4cd141d1771142 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 30f075dc4f27a07abb76088946b2944178d85347 @@ -33,7 +33,7 @@ F config.sub 9ebe4c3b3dab6431ece34f16828b594fb420da55 F configure 12d96e3798e612e0ffa53a7a8c4d7fb1090df80e x F configure.ac a2224b1162f79848982d3618ac1deffcd94e88ec F contrib/sqlitecon.tcl 210a913ad63f9f991070821e599d600bd913e0ad -F doc/lemon.html 334dbf6621b8fb8790297ec1abf3cfa4621709d1 +F doc/lemon.html c30255bea0fd87a81f082d17a72c9dffbc3f6dd9 F doc/pager-invariants.txt 27fed9a70ddad2088750c4a2b493b63853da2710 F doc/vfs-shm.txt e101f27ea02a8387ce46a05be2b1a902a021d37a F ext/README.txt 913a7bd3f4837ab14d7e063304181787658b14e1 @@ -114,7 +114,7 @@ F ext/fts5/fts5_tokenize.c 2ce7b44183538ec46b7907726262ee43ffdd39a8 F ext/fts5/fts5_unicode2.c b450b209b157d598f7b9df9f837afb75a14c24bf F ext/fts5/fts5_varint.c a5aceacda04dafcbae725413d7a16818ecd65738 F ext/fts5/fts5_vocab.c dba72ca393d71c2588548b51380387f6b44c77a8 -F ext/fts5/fts5parse.y 59432ea369f1aa65511bad465f55d31a22f9f223 +F ext/fts5/fts5parse.y 86fe6ba094a47e02fe8be2571539e6833d197764 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 61ff0d1a29d98a91c4553b20b3f410d858834ee9 F ext/fts5/test/fts5aa.test 7e814df4a0e6c22a6fe2d84f210fdc0b5068a084 @@ -337,7 +337,7 @@ F src/os_win.c f0d7aa603eb6262143d7169a222aea07c4fca91d F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca F src/pager.c 6812f3803951774b56abded396171e1c12b0b003 F src/pager.h f3eb324a3ff2408b28bab7e81c1c55c13720f865 -F src/parse.y d7bff41d460f2df96fb890f36700e85cb0fc5634 +F src/parse.y 36a367a623589fb1123c10d577737aa3a06177e6 F src/pcache.c 647bb53a86b7bbcf55ad88089b3ea5a9170b90df F src/pcache.h 4d0ccaad264d360981ec5e6a2b596d6e85242545 F src/pcache1.c 72f644dc9e1468c72922eff5904048427b817051 @@ -1379,7 +1379,7 @@ F tool/genfkey.README cf68fddd4643bbe3ff8e31b8b6d8b0a1b85e20f4 F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5 F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce F tool/lemon.c 799e73e19a33b8dd7767a7fa34618ed2a9c2397d -F tool/lempar.c ff7763747f93c9d7eac6df1e802a200b3f0e2cc0 +F tool/lempar.c 5478c5298763b47908b6632a7e3f9d656a98748b F tool/loadfts.c c3c64e4d5e90e8ba41159232c2189dba4be7b862 F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6 F tool/mkautoconfamal.sh c78caa3214f25dc28ea157b5a82abb311f209906 @@ -1427,7 +1427,10 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh a98af506df552f3b3c0d904f94e4cdc4e1a6d598 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 9235b0cf6a37712ae9e5deeb1e5ee064dd5511fa -R a61b67e5f930748de130d8b41c0bc53a +P 9570b6b43df3bc5ce314cded20bca8be9e968efe +R bd4538940a7fa7274e5766ff6172546b +T *branch * parser-performance +T *sym-parser-performance * +T -sym-trunk * U drh -Z d1a1b630582c40b1f2edd9cd2e01402a +Z c2a93192e4911cf28788507cc4af2d10 diff --git a/manifest.uuid b/manifest.uuid index ee978cebbb..13e75b598d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9570b6b43df3bc5ce314cded20bca8be9e968efe \ No newline at end of file +a65d583ce97b8c08157268bd054479cda3957a94 \ No newline at end of file diff --git a/src/parse.y b/src/parse.y index 0bfe4e473a..b5755820b3 100644 --- a/src/parse.y +++ b/src/parse.y @@ -35,7 +35,6 @@ sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &TOKEN); } %stack_overflow { - UNUSED_PARAMETER(yypMinor); /* Silence some compiler warnings */ sqlite3ErrorMsg(pParse, "parser stack overflow"); } diff --git a/tool/lempar.c b/tool/lempar.c index 454702a3a7..c5b65d645d 100644 --- a/tool/lempar.c +++ b/tool/lempar.c @@ -518,7 +518,7 @@ static int yy_find_reduce_action( /* ** The following routine is called if the stack overflows. */ -static void yyStackOverflow(yyParser *yypParser, YYMINORTYPE *yypMinor){ +static void yyStackOverflow(yyParser *yypParser){ ParseARG_FETCH; yypParser->yyidx--; #ifndef NDEBUG @@ -562,7 +562,7 @@ static void yy_shift( yyParser *yypParser, /* The parser to be shifted */ int yyNewState, /* The new state to shift in */ int yyMajor, /* The major token to shift in */ - YYMINORTYPE *yypMinor /* Pointer to the minor token to shift in */ + ParseTOKENTYPE yyMinor /* The minor token to shift in */ ){ yyStackEntry *yytos; yypParser->yyidx++; @@ -573,14 +573,14 @@ static void yy_shift( #endif #if YYSTACKDEPTH>0 if( yypParser->yyidx>=YYSTACKDEPTH ){ - yyStackOverflow(yypParser, yypMinor); + yyStackOverflow(yypParser); return; } #else if( yypParser->yyidx>=yypParser->yystksz ){ yyGrowStack(yypParser); if( yypParser->yyidx>=yypParser->yystksz ){ - yyStackOverflow(yypParser, yypMinor); + yyStackOverflow(yypParser); return; } } @@ -588,7 +588,7 @@ static void yy_shift( yytos = &yypParser->yystack[yypParser->yyidx]; yytos->stateno = (YYACTIONTYPE)yyNewState; yytos->major = (YYCODETYPE)yyMajor; - yytos->minor = *yypMinor; + yytos->minor.yy0 = yyMinor; yyTraceShift(yypParser, yyNewState); } @@ -627,7 +627,32 @@ static void yy_reduce( yyRuleName[yyruleno], yymsp[-yysize].stateno); } #endif /* NDEBUG */ - yygotominor = yyzerominor; + /* yygotominor = yyzerominor; */ + + /* Check that the stack is large enough to grow by a single entry + ** if the RHS of the rule is empty. This ensures that there is room + ** enough on the stack to push the LHS value */ + if( yyRuleInfo[yyruleno].nrhs==0 ){ +#ifdef YYTRACKMAXSTACKDEPTH + if( yypParser->yyidx>yypParser->yyidxMax ){ + yypParser->yyidxMax = yypParser->yyidx; + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yyidx>=YYSTACKDEPTH ){ + yyStackOverflow(yypParser); + return; + } +#else + if( yypParser->yyidx>=yypParser->yystksz ){ + yyGrowStack(yypParser); + if( yypParser->yyidx>=yypParser->yystksz ){ + yyStackOverflow(yypParser); + return; + } + } +#endif + } switch( yyruleno ){ /* Beginning here are the reduction cases. A typical example @@ -645,26 +670,18 @@ static void yy_reduce( assert( yyruleno>=0 && yyrulenoyyidx -= yysize; yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto); if( yyact <= YY_MAX_SHIFTREDUCE ){ if( yyact>YY_MAX_SHIFT ) yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; - /* If the reduce action popped at least - ** one element off the stack, then we can push the new element back - ** onto the stack here, and skip the stack overflow test in yy_shift(). - ** That gives a significant speed improvement. */ - if( yysize ){ - yypParser->yyidx++; - yymsp -= yysize-1; - yymsp->stateno = (YYACTIONTYPE)yyact; - yymsp->major = (YYCODETYPE)yygoto; - yymsp->minor = yygotominor; - yyTraceShift(yypParser, yyact); - }else{ - yy_shift(yypParser,yyact,yygoto,&yygotominor); - } + yypParser->yyidx -= yysize - 1; + yymsp -= yysize-1; + yymsp->stateno = (YYACTIONTYPE)yyact; + yymsp->major = (YYCODETYPE)yygoto; + yymsp->minor = yygotominor; + yyTraceShift(yypParser, yyact); }else{ assert( yyact == YY_ACCEPT_ACTION ); + yypParser->yyidx -= yysize; yy_accept(yypParser); } } @@ -698,10 +715,10 @@ static void yy_parse_failed( static void yy_syntax_error( yyParser *yypParser, /* The parser */ int yymajor, /* The major type of the error token */ - YYMINORTYPE yyminor /* The minor type of the error token */ + ParseTOKENTYPE yyminor /* The minor type of the error token */ ){ ParseARG_FETCH; -#define TOKEN (yyminor.yy0) +#define TOKEN yyminor /************ Begin %syntax_error code ****************************************/ %% /************ End %syntax_error code ******************************************/ @@ -769,9 +786,7 @@ void Parse( if( yypParser->yyidx<0 ){ #if YYSTACKDEPTH<=0 if( yypParser->yystksz <=0 ){ - /*memset(&yyminorunion, 0, sizeof(yyminorunion));*/ - yyminorunion = yyzerominor; - yyStackOverflow(yypParser, &yyminorunion); + yyStackOverflow(yypParser); return; } #endif @@ -788,7 +803,6 @@ void Parse( } #endif } - yyminorunion.yy0 = yyminor; #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) yyendofinput = (yymajor==0); #endif @@ -804,7 +818,7 @@ void Parse( yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); if( yyact <= YY_MAX_SHIFTREDUCE ){ if( yyact > YY_MAX_SHIFT ) yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; - yy_shift(yypParser,yyact,yymajor,&yyminorunion); + yy_shift(yypParser,yyact,yymajor,yyminor); #ifndef YYNOERRORRECOVERY yypParser->yyerrcnt--; #endif @@ -813,6 +827,7 @@ void Parse( yy_reduce(yypParser,yyact-YY_MIN_REDUCE); }else{ assert( yyact == YY_ERROR_ACTION ); + yyminorunion.yy0 = yyminor; #ifdef YYERRORSYMBOL int yymx; #endif @@ -842,7 +857,7 @@ void Parse( ** */ if( yypParser->yyerrcnt<0 ){ - yy_syntax_error(yypParser,yymajor,yyminorunion); + yy_syntax_error(yypParser,yymajor,yyminor); } yymx = yypParser->yystack[yypParser->yyidx].major; if( yymx==YYERRORSYMBOL || yyerrorhit ){ @@ -852,10 +867,10 @@ void Parse( yyTracePrompt,yyTokenName[yymajor]); } #endif - yy_destructor(yypParser, (YYCODETYPE)yymajor,&yyminorunion); + yy_destructor(yypParser, (YYCODETYPE)yymajor, &yyminorunion); yymajor = YYNOCODE; }else{ - while( + while( yypParser->yyidx >= 0 && yymx != YYERRORSYMBOL && (yyact = yy_find_reduce_action( @@ -869,9 +884,7 @@ void Parse( yy_parse_failed(yypParser); yymajor = YYNOCODE; }else if( yymx!=YYERRORSYMBOL ){ - YYMINORTYPE u2; - u2.YYERRSYMDT = 0; - yy_shift(yypParser,yyact,YYERRORSYMBOL,&u2); + yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor); } } yypParser->yyerrcnt = 3; @@ -884,7 +897,7 @@ void Parse( ** Applications can set this macro (for example inside %include) if ** they intend to abandon the parse upon the first syntax error seen. */ - yy_syntax_error(yypParser,yymajor,yyminorunion); + yy_syntax_error(yypParser,yymajor, yyminor); yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); yymajor = YYNOCODE; @@ -899,7 +912,7 @@ void Parse( ** three input tokens have been successfully shifted. */ if( yypParser->yyerrcnt<=0 ){ - yy_syntax_error(yypParser,yymajor,yyminorunion); + yy_syntax_error(yypParser,yymajor, yyminor); } yypParser->yyerrcnt = 3; yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);