From: dan Date: Thu, 20 Apr 2017 09:54:04 +0000 (+0000) Subject: Add an option to generate stat1 data based on a subset of the user database X-Git-Tag: version-3.22.0~147^2~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e53b4f977484247b9b893d191252e6ce5bfd8e87;p=thirdparty%2Fsqlite.git Add an option to generate stat1 data based on a subset of the user database table contents to sqlite3_expert. FossilOrigin-Name: c69c3e21db6e141f7e24226c6432f2ed31fe5f177bd23781915871f8600ee56a --- diff --git a/ext/expert/expert.c b/ext/expert/expert.c index 3aafde1d3f..baa7be940f 100644 --- a/ext/expert/expert.c +++ b/ext/expert/expert.c @@ -36,6 +36,7 @@ static void usage(char **argv){ fprintf(stderr, " -sql SQL (analyze SQL statements passed as argument)\n"); fprintf(stderr, " -file FILE (read SQL statements from file FILE)\n"); fprintf(stderr, " -verbose LEVEL (integer verbosity level. default 1)\n"); + fprintf(stderr, " -sample PERCENT (percent of db to sample. default 100)\n"); exit(-1); } @@ -97,11 +98,18 @@ int main(int argc, char **argv){ rc = readSqlFromFile(p, argv[i], &zErr); } - else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){ + else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){ if( ++i==(argc-1) ) option_requires_argument("-sql"); rc = sqlite3_expert_sql(p, argv[i], &zErr); } + else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sample", nArg) ){ + int iSample; + if( ++i==(argc-1) ) option_requires_argument("-sample"); + iSample = option_integer_arg(argv[i]); + sqlite3_expert_config(p, EXPERT_CONFIG_SAMPLE, iSample); + } + else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-verbose", nArg) ){ if( ++i==(argc-1) ) option_requires_argument("-verbose"); iVerbose = option_integer_arg(argv[i]); diff --git a/ext/expert/sqlite3expert.c b/ext/expert/sqlite3expert.c index bba5190d5b..03963e1f7e 100644 --- a/ext/expert/sqlite3expert.c +++ b/ext/expert/sqlite3expert.c @@ -28,6 +28,9 @@ typedef struct IdxStatement IdxStatement; typedef struct IdxTable IdxTable; typedef struct IdxWrite IdxWrite; +#define UNIQUE_TABLE_NAME "t592690916721053953805701627921227776" + + /* ** A single constraint. Equivalent to either "col = ?" or "col < ?" (or ** any other type of single-ended range constraint on a column). @@ -127,6 +130,7 @@ struct IdxHash { ** sqlite3expert object. */ struct sqlite3expert { + int iSample; /* Percentage of tables to sample for stat1 */ sqlite3 *db; /* User database */ sqlite3 *dbm; /* In-memory db for this analysis */ sqlite3 *dbv; /* Vtab schema for this analysis */ @@ -1080,8 +1084,8 @@ static int idxProcessOneTrigger( IdxWrite *pWrite, char **pzErr ){ - static const char *zInt = "t592690916721053953805701627921227776"; - static const char *zDrop = "DROP TABLE t592690916721053953805701627921227776"; + static const char *zInt = UNIQUE_TABLE_NAME; + static const char *zDrop = "DROP TABLE " UNIQUE_TABLE_NAME; IdxTable *pTab = pWrite->pTab; const char *zTab = pTab->zName; const char *zSql = @@ -1235,6 +1239,38 @@ static int idxCreateVtabSchema(sqlite3expert *p, char **pzErrmsg){ return rc; } +struct IdxSampleCtx { + int iTarget; + double target; /* Target nRet/nRow value */ + double nRow; /* Number of rows seen */ + double nRet; /* Number of rows returned */ +}; + +static void idxSampleFunc( + sqlite3_context *pCtx, + int argc, + sqlite3_value **argv +){ + struct IdxSampleCtx *p = (struct IdxSampleCtx*)sqlite3_user_data(pCtx); + int bRet; + + assert( argc==0 ); + if( p->nRow==0.0 ){ + bRet = 1; + }else{ + bRet = (p->nRet / p->nRow) <= p->target; + if( bRet==0 ){ + unsigned short rnd; + sqlite3_randomness(2, (void*)&rnd); + bRet = ((int)rnd % 100) <= p->iTarget; + } + } + + sqlite3_result_int(pCtx, bRet); + p->nRow += 1.0; + p->nRet += (double)bRet; +} + struct IdxRemCtx { int nSlot; struct IdxRemSlot { @@ -1360,6 +1396,8 @@ static int idxPopulateOneStat1( int *aStat = 0; int rc = SQLITE_OK; + assert( p->iSample>0 ); + /* Formulate the query text */ sqlite3_bind_text(pIndexXInfo, 1, zIdx, -1, SQLITE_STATIC); while( SQLITE_OK==rc && SQLITE_ROW==sqlite3_step(pIndexXInfo) ){ @@ -1372,9 +1410,15 @@ static int idxPopulateOneStat1( zOrder = idxAppendText(&rc, zOrder, "%s%d", zComma, ++nCol); } if( rc==SQLITE_OK ){ - zQuery = sqlite3_mprintf( - "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder - ); + if( p->iSample==100 ){ + zQuery = sqlite3_mprintf( + "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder + ); + }else{ + zQuery = sqlite3_mprintf( + "SELECT %s FROM temp."UNIQUE_TABLE_NAME" x ORDER BY %s", zCols, zOrder + ); + } } sqlite3_free(zCols); sqlite3_free(zOrder); @@ -1433,6 +1477,25 @@ static int idxPopulateOneStat1( return rc; } +static int idxBuildSampleTable(sqlite3expert *p, const char *zTab){ + int rc; + char *zSql; + + rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0); + if( rc!=SQLITE_OK ) return rc; + + zSql = sqlite3_mprintf( + "CREATE TABLE temp." UNIQUE_TABLE_NAME + " AS SELECT * FROM %Q WHERE sample()" + , zTab + ); + if( zSql==0 ) return SQLITE_NOMEM; + rc = sqlite3_exec(p->db, zSql, 0, 0, 0); + sqlite3_free(zSql); + + return rc; +} + /* ** This function is called as part of sqlite3_expert_analyze(). Candidate ** indexes have already been created in database sqlite3expert.dbm, this @@ -1444,13 +1507,15 @@ static int idxPopulateStat1(sqlite3expert *p, char **pzErr){ int rc = SQLITE_OK; int nMax =0; struct IdxRemCtx *pCtx = 0; + struct IdxSampleCtx samplectx; int i; + i64 iPrev = -100000; sqlite3_stmt *pAllIndex = 0; sqlite3_stmt *pIndexXInfo = 0; sqlite3_stmt *pWrite = 0; - const char *zAllIndex = - "SELECT s.name, l.name FROM " + const char *zAllIndex = + "SELECT s.rowid, s.name, l.name FROM " " sqlite_master AS s, " " pragma_index_list(s.name) AS l " "WHERE s.type = 'table'"; @@ -1458,6 +1523,9 @@ static int idxPopulateStat1(sqlite3expert *p, char **pzErr){ "SELECT name, coll FROM pragma_index_xinfo(?) WHERE key"; const char *zWrite = "INSERT INTO sqlite_stat1 VALUES(?, ?, ?)"; + /* If iSample==0, no sqlite_stat1 data is required. */ + if( p->iSample==0 ) return SQLITE_OK; + rc = idxLargestIndex(p->dbm, &nMax, pzErr); if( nMax<=0 || rc!=SQLITE_OK ) return rc; @@ -1473,6 +1541,11 @@ static int idxPopulateStat1(sqlite3expert *p, char **pzErr){ p->db, "rem", 2, SQLITE_UTF8, (void*)pCtx, idxRemFunc, 0, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + p->db, "sample", 0, SQLITE_UTF8, (void*)&samplectx, idxSampleFunc, 0, 0 + ); + } if( rc==SQLITE_OK ){ pCtx->nSlot = nMax+1; @@ -1486,9 +1559,24 @@ static int idxPopulateStat1(sqlite3expert *p, char **pzErr){ } while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pAllIndex) ){ - const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 0); - const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 1); + i64 iRowid = sqlite3_column_int64(pAllIndex, 0); + const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 1); + const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 2); + if( p->iSample<100 && iPrev!=iRowid ){ + samplectx.target = (double)p->iSample / 100.0; + samplectx.iTarget = p->iSample; + samplectx.nRow = 0.0; + samplectx.nRet = 0.0; + rc = idxBuildSampleTable(p, zTab); + if( rc!=SQLITE_OK ) break; + } rc = idxPopulateOneStat1(p, pIndexXInfo, pWrite, zTab, zIdx, pzErr); + iPrev = iRowid; + } + if( p->iSample<100 ){ + rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp." UNIQUE_TABLE_NAME, + 0,0,0 + ); } idxFinalize(&rc, pAllIndex); @@ -1503,6 +1591,8 @@ static int idxPopulateStat1(sqlite3expert *p, char **pzErr){ if( rc==SQLITE_OK ){ rc = sqlite3_exec(p->dbm, "ANALYZE sqlite_master", 0, 0, 0); } + + sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0); return rc; } @@ -1523,6 +1613,7 @@ sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErrmsg){ */ if( rc==SQLITE_OK ){ pNew->db = db; + pNew->iSample = 100; rc = sqlite3_open(":memory:", &pNew->dbv); } if( rc==SQLITE_OK ){ @@ -1565,6 +1656,30 @@ sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErrmsg){ return pNew; } +/* +** Configure an sqlite3expert object. +*/ +int sqlite3_expert_config(sqlite3expert *p, int op, ...){ + int rc = SQLITE_OK; + va_list ap; + va_start(ap, op); + switch( op ){ + case EXPERT_CONFIG_SAMPLE: { + int iVal = va_arg(ap, int); + if( iVal<0 ) iVal = 0; + if( iVal>100 ) iVal = 100; + p->iSample = iVal; + break; + } + default: + rc = SQLITE_NOTFOUND; + break; + } + + va_end(ap); + return rc; +} + /* ** Add an SQL statement to the analysis. */ diff --git a/ext/expert/sqlite3expert.h b/ext/expert/sqlite3expert.h index 455f41d991..39135dc274 100644 --- a/ext/expert/sqlite3expert.h +++ b/ext/expert/sqlite3expert.h @@ -27,6 +27,38 @@ typedef struct sqlite3expert sqlite3expert; */ sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErr); +/* +** Configure an sqlite3expert object. +** +** EXPERT_CONFIG_SAMPLE: +** By default, sqlite3_expert_analyze() generates sqlite_stat1 data for +** each candidate index. This involves scanning and sorting the entire +** contents of each user database table once for each candidate index +** associated with the table. For large databases, this can be +** prohibitively slow. This option allows the sqlite3expert object to +** be configured so that sqlite_stat1 data is instead generated based on a +** subset of each table, or so that no sqlite_stat1 data is used at all. +** +** A single integer argument is passed to this option. If the value is less +** than or equal to zero, then no sqlite_stat1 data is generated or used by +** the analysis - indexes are recommended based on the database schema only. +** Or, if the value is 100 or greater, complete sqlite_stat1 data is +** generated for each candidate index (this is the default). Finally, if the +** value falls between 0 and 100, then it represents the percentage of user +** table rows that should be considered when generating sqlite_stat1 data. +** +** Examples: +** +** // Do not generate any sqlite_stat1 data +** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 0); +** +** // Generate sqlite_stat1 data based on 10% of the rows in each table. +** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 10); +*/ +int sqlite3_expert_config(sqlite3expert *p, int op, ...); + +#define EXPERT_CONFIG_SAMPLE 1 /* int */ + /* ** Specify zero or more SQL statements to be included in the analysis. ** @@ -54,6 +86,7 @@ int sqlite3_expert_sql( char **pzErr /* OUT: Error message (if any) */ ); + /* ** This function is called after the sqlite3expert object has been configured ** with all SQL statements using sqlite3_expert_sql() to actually perform diff --git a/manifest b/manifest index 5fa07a1f45..5ae9923e9e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Have\ssqlite3_expert_analyze()\spopulate\sthe\ssqlite_stat1\stable\sbefore\srunning\nqueries\sthrough\sthe\splanner\sfor\sthe\ssecond\stime. -D 2017-04-18T20:10:16.786 +C Add\san\soption\sto\sgenerate\sstat1\sdata\sbased\son\sa\ssubset\sof\sthe\suser\sdatabase\ntable\scontents\sto\ssqlite3_expert. +D 2017-04-20T09:54:04.700 F Makefile.in 1cc758ce3374a32425e4d130c2fe7b026b20de5b8843243de75f087c0a2661fb F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 6a8c838220f7c00820e1fc0ac1bccaaa8e5676067e1dbfa1bafa7a4ffecf8ae6 @@ -41,10 +41,10 @@ F ext/async/README.txt e12275968f6fde133a80e04387d0e839b0c51f91 F ext/async/sqlite3async.c 0f3070cc3f5ede78f2b9361fb3b629ce200d7d74 F ext/async/sqlite3async.h f489b080af7e72aec0e1ee6f1d98ab6cf2e4dcef F ext/expert/README.md 9f15075ec5ad772808eff55ef044c31140fd1146aa0a3c47eafd155e71851b01 -F ext/expert/expert.c 22d2dd096d479049bc332506fc8c0294bf53b7ebfe60af99635d8c87839bb40b +F ext/expert/expert.c 33842ef151d84c5f8000f9c7b938998c6b999eaef7ce1f4eeb0df8ffe6739496 F ext/expert/expert1.test 1033e43071b69dc2f4e88fbf03fc7f18846c9865cac14f28c80f581437f09acb -F ext/expert/sqlite3expert.c 713388c6c440c6759a1e0898c7936a014dc9791237e62780412229e4a79b0035 -F ext/expert/sqlite3expert.h b1c9eedeb647fd734c4206ae6851635284cfbfa5fb688eff74c3265c9f949b4d +F ext/expert/sqlite3expert.c af3b336f83bcd2a586f6119d4040ac36ccf45162c48e3780ed63ab119fd04fe1 +F ext/expert/sqlite3expert.h af6354f8ee5c9e025024e63fec3bd640a802afcc3099a44d804752cf0791d811 F ext/expert/test_expert.c b01a5115f9444a9b416582c985138f5dfdb279848ce8b7452be383530be27f01 F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b @@ -1579,7 +1579,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P ff4976da667872614331d88e68fb67d347874f164a1c7950dd738c7c2320b954 -R d621eb0093377628a3811751fb5daa84 +P a157fcfde5afc27ae38e7cf4669fcc8e60e23d9d301ffe2e541dd69f895b493b +R 2a75643c384b85cff6c5d9d5615ac2a2 U dan -Z 02b917b4563ed84ab0581f0a345c83b3 +Z 7876a034c88841c26664b8a83eb3817f diff --git a/manifest.uuid b/manifest.uuid index 68ccc22188..d816bcc63e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a157fcfde5afc27ae38e7cf4669fcc8e60e23d9d301ffe2e541dd69f895b493b \ No newline at end of file +c69c3e21db6e141f7e24226c6432f2ed31fe5f177bd23781915871f8600ee56a \ No newline at end of file