From: drh Date: Mon, 25 Dec 2017 04:15:38 +0000 (+0000) Subject: Enhance LEMON so that it generates the action table in such a way that no X-Git-Tag: version-3.22.0~127^2~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3a9d6c7156e33c63f03f12be8e2168c8d0aa7d2b;p=thirdparty%2Fsqlite.git Enhance LEMON so that it generates the action table in such a way that no range check is needed on the lookahead table to verify that the next input token is valid. This makes the lookahead table slightly larger (about 120 bytes) but helps the parser to run faster. FossilOrigin-Name: 7eb0198d0102e97e4b7ad9e359d95985e55e09c510ea4b360265ac8feb9ed814 --- diff --git a/manifest b/manifest index f8d8364cbb..d9f3940ae0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C In\sthe\sLEMON-generated\sparser,\savoid\sunnecessary\stests\sfor\sthe\sacceptance\nstate. -D 2017-12-25T00:10:05.418 +C Enhance\sLEMON\sso\sthat\sit\sgenerates\sthe\saction\stable\sin\ssuch\sa\sway\sthat\sno\nrange\scheck\sis\sneeded\son\sthe\slookahead\stable\sto\sverify\sthat\sthe\snext\sinput\ntoken\sis\svalid.\s\sThis\smakes\sthe\slookahead\stable\sslightly\slarger\s(about\s120\nbytes)\sbut\shelps\sthe\sparser\sto\srun\sfaster. +D 2017-12-25T04:15:38.668 F Makefile.in ceb40bfcb30ebba8e1202b34c56ff7e13e112f9809e2381d99be32c2726058f5 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 6480671f7c129e61208d69492b3c71ce4310d49fceac83cfb17f1c081e242b69 @@ -1608,8 +1608,8 @@ F tool/genfkey.README cf68fddd4643bbe3ff8e31b8b6d8b0a1b85e20f4 F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5 F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce F tool/kvtest-speed.sh 4761a9c4b3530907562314d7757995787f7aef8f -F tool/lemon.c 7c6919d98e459c0f8a3673be64b03425553733dba01c12939b2fadc30e4e2804 -F tool/lempar.c 8062f219b4ce349853cb3ab3ebd3ab44466604235347457d703a9f4252e76dd5 +F tool/lemon.c c8d7ce4fe7a90f7fa6a5985452aa926fcf25376cf90095c9d06c432ab0bebdbc +F tool/lempar.c 427ee280f3c3781e82bbee21f428bc8ae18ab245d4f66d65da46b598ded81648 F tool/libvers.c caafc3b689638a1d88d44bc5f526c2278760d9b9 F tool/loadfts.c c3c64e4d5e90e8ba41159232c2189dba4be7b862 F tool/logest.c 11346aa019e2e77a00902aa7d0cabd27bd2e8cca @@ -1687,7 +1687,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 7bfe7a360261ac7227840db49487c2f0fe338a2f1b868fcaada1e04a8d2b8f7a -R 904d9f58d13471f5c0ad3064ead9b812 +P fdbb35c54f2b6cb65d04ac295f207ff3e69360e0558348c77eb5e62691807046 +R f14cbf100834d359550a23c924f7ebaa U drh -Z c00f50853c78fa733a13b33ab92ee708 +Z bb1d7d0db816330a6f1e2b8011c2819b diff --git a/manifest.uuid b/manifest.uuid index e026b0ddc4..b69ed5a1cc 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fdbb35c54f2b6cb65d04ac295f207ff3e69360e0558348c77eb5e62691807046 \ No newline at end of file +7eb0198d0102e97e4b7ad9e359d95985e55e09c510ea4b360265ac8feb9ed814 \ No newline at end of file diff --git a/tool/lemon.c b/tool/lemon.c index 33ef43d1b1..96dc756d1b 100644 --- a/tool/lemon.c +++ b/tool/lemon.c @@ -413,6 +413,7 @@ struct lemon { char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ + int nlookaheadtab; /* Number of entries in yy_lookahead[] */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int has_fallback; /* True if any %fallback is seen in the grammar */ @@ -589,10 +590,12 @@ struct acttab { int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ + int nterminal; /* Number of terminal symbols */ + int nsymbol; /* total number of symbols */ }; /* Return the number of entries in the yy_action table */ -#define acttab_size(X) ((X)->nAction) +#define acttab_lookahead_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X,N) ((X)->aAction[N].action) @@ -608,13 +611,15 @@ void acttab_free(acttab *p){ } /* Allocate a new acttab structure */ -acttab *acttab_alloc(void){ +acttab *acttab_alloc(int nsymbol, int nterminal){ acttab *p = (acttab *) calloc( 1, sizeof(*p) ); if( p==0 ){ fprintf(stderr,"Unable to allocate memory for a new acttab."); exit(1); } memset(p, 0, sizeof(*p)); + p->nsymbol = nsymbol; + p->nterminal = nterminal; return p; } @@ -655,16 +660,24 @@ void acttab_action(acttab *p, int lookahead, int action){ ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. +** +** If the makeItSafe parameter is true, then the offset is chosen so that +** it is impossible to overread the yy_lookaside[] table regardless of +** the lookaside token. This is done for the terminal symbols, as they +** come from external inputs and can contain syntax errors. When makeItSafe +** is false, there is more flexibility in selecting offsets, resulting in +** a smaller table. For non-terminal symbols, which are never syntax errors, +** makeItSafe can be false. */ -int acttab_insert(acttab *p){ - int i, j, k, n; +int acttab_insert(acttab *p, int makeItSafe){ + int i, j, k, n, end; assert( p->nLookahead>0 ); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ - n = p->mxLookahead + 1; + n = p->nsymbol + 1; if( p->nAction + n >= p->nActionAlloc ){ int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; @@ -686,7 +699,8 @@ int acttab_insert(acttab *p){ ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ - for(i=p->nAction-1; i>=0; i--){ + end = makeItSafe ? p->mnLookahead : 0; + for(i=p->nAction-1; i>=end; i--){ if( p->aAction[i].lookahead==p->mnLookahead ){ /* All lookaheads and actions in the aLookahead[] transaction ** must match against the candidate aAction[i] entry. */ @@ -716,12 +730,13 @@ int acttab_insert(acttab *p){ ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ - if( i<0 ){ + if( inAction, which means the ** transaction will be appended. */ - for(i=0; inActionAlloc - p->mxLookahead; i++){ + i = makeItSafe ? p->mnLookahead : 0; + for(; inActionAlloc - p->mxLookahead; i++){ if( p->aAction[i].lookahead<0 ){ for(j=0; jnLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; @@ -739,11 +754,19 @@ int acttab_insert(acttab *p){ } } /* Insert transaction set at index i. */ +#if 0 + printf("Acttab:"); + for(j=0; jnLookahead; j++){ + printf(" %d", p->aLookahead[j].lookahead); + } + printf(" inserted at %d\n", i); +#endif for(j=0; jnLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if( k>=p->nAction ) p->nAction = k+1; } + if( makeItSafe && i+p->nterminal>p->nAction ) p->nAction = i+p->nterminal; p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the @@ -751,6 +774,16 @@ int acttab_insert(acttab *p){ return i - p->mnLookahead; } +/* +** Return the size of the action table without the trailing syntax error +** entries. +*/ +int acttab_action_size(acttab *p){ + int n = p->nAction; + while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } + return n; +} + /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON @@ -1724,6 +1757,7 @@ int main(int argc, char **argv) stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); + stats_line("lookahead table entries", lem.nlookaheadtab); stats_line("total table size (bytes)", lem.tablesize); } if( lem.nconflict > 0 ){ @@ -4167,7 +4201,7 @@ void ReportTable( ** of placing the largest action sets first */ for(i=0; inxstate*2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); - pActtab = acttab_alloc(); + pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); for(i=0; inxstate*2 && ax[i].nAction>0; i++){ stp = ax[i].stp; if( ax[i].isTkn ){ @@ -4178,7 +4212,7 @@ void ReportTable( if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iTknOfst = acttab_insert(pActtab); + stp->iTknOfst = acttab_insert(pActtab, 1); if( stp->iTknOfstiTknOfst; if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; }else{ @@ -4190,7 +4224,7 @@ void ReportTable( if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iNtOfst = acttab_insert(pActtab); + stp->iNtOfst = acttab_insert(pActtab, 0); if( stp->iNtOfstiNtOfst; if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; } @@ -4249,7 +4283,7 @@ void ReportTable( */ /* Output the yy_action table */ - lemp->nactiontab = n = acttab_size(pActtab); + lemp->nactiontab = n = acttab_action_size(pActtab); lemp->tablesize += n*szActionType; fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; @@ -4268,6 +4302,7 @@ void ReportTable( fprintf(out, "};\n"); lineno++; /* Output the yy_lookahead table */ + lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); lemp->tablesize += n*szCodeType; fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for(i=j=0; inxstate; while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; - fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", lemp->nactiontab); lineno++; fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; @@ -4312,7 +4346,6 @@ void ReportTable( fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ - fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; @@ -4382,10 +4415,8 @@ void ReportTable( */ for(i=0; insymbol; i++){ lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); - fprintf(out," %-15s",line); - if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + fprintf(out," /* %4d */ \"%s\",\n",i, lemp->symbols[i]->name); lineno++; } - if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every diff --git a/tool/lempar.c b/tool/lempar.c index 6c6ca77dde..7ceaaa6207 100644 --- a/tool/lempar.c +++ b/tool/lempar.c @@ -131,19 +131,13 @@ ** (A) N = yy_action[ yy_shift_ofst[S] + X ] ** (B) N = yy_default[S] ** -** The (A) formula is preferred. The B formula is used instead if: -** (1) The yy_shift_ofst[S]+X value is out of range, or -** (2) yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or -** (3) yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT. -** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that -** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. -** Hence only tests (1) and (2) need to be evaluated.) +** The (A) formula is preferred. The B formula is used instead if +** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X. ** ** The formulas above are for computing the action when the lookahead is ** a terminal symbol. If the lookahead is a non-terminal (as occurs after ** a reduce action) then the yy_reduce_ofst[] array is used in place of -** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of -** YY_SHIFT_USE_DFLT. +** the yy_shift_ofst[] array. ** ** The following are the tables generated in this section: ** @@ -478,7 +472,8 @@ static unsigned int yy_find_shift_action( i = yy_shift_ofst[stateno]; assert( iLookAhead!=YYNOCODE ); i += iLookAhead; - if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + assert( i>=0 && i