From: dan Date: Fri, 3 Oct 2014 19:16:53 +0000 (+0000) Subject: Improve the accuracy of the estimates used when searching an index for values not... X-Git-Tag: version-3.8.7~44^2~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=43085d742548e19d3a61c26c547d4dc3cf42bf36;p=thirdparty%2Fsqlite.git Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances. FossilOrigin-Name: e6f7f97dbc677c9f01b23142928c3fa7307c2fba --- diff --git a/manifest b/manifest index 1c0365b036..f180b21817 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\srequirements\smarks\son\sthe\ssqlite3_db_status()\sinterface\simplementation.\nFix\sa\stypo\sin\sthe\sdocumentation.\s\sFix\sthe\snew\ssqlite3_result_text64()\sroutine\nso\sthat\sit\sworks\scorrectly\swith\san\sencoding\sparameter\sof\sSQLITE_UTF16. -D 2014-10-03T16:00:51.115 +C Improve\sthe\saccuracy\sof\sthe\sestimates\sused\swhen\ssearching\san\sindex\sfor\svalues\snot\spresent\sin\sany\sstat4\ssamples\sunder\ssome\scircumstances. +D 2014-10-03T19:16:53.018 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -166,7 +166,7 @@ F sqlite.pc.in 42b7bf0d02e08b9e77734a47798d1a55a9e0716b F sqlite3.1 3d8b83c91651f53472ca17599dae3457b8b89494 F sqlite3.pc.in 48fed132e7cb71ab676105d2a4dc77127d8c1f3a F src/alter.c ba266a779bc7ce10e52e59e7d3dc79fa342e8fdb -F src/analyze.c 6290a109be876daaa242cd7216f97240f5401776 +F src/analyze.c 418c2fc20cd36f1acc82456b5bd9baae77dbda78 F src/attach.c f4e94df2d1826feda65eb0939f7f6f5f923a0ad9 F src/auth.c d8abcde53426275dab6243b441256fcd8ccbebb2 F src/backup.c a31809c65623cc41849b94d368917f8bb66e6a7e @@ -232,7 +232,7 @@ F src/shell.c 38f627b0885191357f55902a3ac199de90d79715 F src/sqlite.h.in a0b09ea5f73f3629c20b9788e0cde2a70f1703f5 F src/sqlite3.rc 992c9f5fb8285ae285d6be28240a7e8d3a7f2bad F src/sqlite3ext.h 17d487c3c91b0b8c584a32fbeb393f6f795eea7d -F src/sqliteInt.h 5a430c5443717d7c5e2c224f9dcc2534348dc3f6 +F src/sqliteInt.h 3e4bd1b2288528b6a7f2d52709618572b422ab7e F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 961d5926e5a8fda611d385ec22c226b8635cd1cb F src/table.c 2e99ef7ef16187e17033d9398dc962ce22dab5cb @@ -1201,7 +1201,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 440705b98a3429b830ea85e71cc1e414bc6d8058 -R 8a35df4f3d17b4f437ae0dc791626bd3 -U drh -Z 3c9270c816943439d7ab9b669a742153 +P d2fc322728331ae2d147c8496129df5e3c655eb5 +R 57c99795ca217ceed92eda9a33e89730 +T *branch * stat4-avgeq +T *sym-stat4-avgeq * +T -sym-trunk * +U dan +Z 343ac493fd77cb5d6f7bcecf2a2a97e2 diff --git a/manifest.uuid b/manifest.uuid index 4d17ba50dd..ea230c321e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d2fc322728331ae2d147c8496129df5e3c655eb5 \ No newline at end of file +e6f7f97dbc677c9f01b23142928c3fa7307c2fba \ No newline at end of file diff --git a/src/analyze.c b/src/analyze.c index aec1f021ea..154033d067 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -1448,12 +1448,12 @@ static void decodeIntArray( #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 if( aOut ){ aOut[i] = v; - }else + } #else assert( aOut==0 ); UNUSED_PARAMETER(aOut); #endif - { + if( aLog ){ aLog[i] = sqlite3LogEst(v); } if( *z==' ' ) z++; @@ -1516,8 +1516,16 @@ static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){ z = argv[2]; if( pIndex ){ + int nCol = pIndex->nKeyCol+1; +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3DbMallocZero( + pInfo->db, sizeof(tRowcnt) * nCol + ); +#else + tRowcnt * const aiRowEst = 0; +#endif pIndex->bUnordered = 0; - decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex); + decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex); if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0]; }else{ Index fakeIdx; @@ -1576,25 +1584,38 @@ static void initAvgEq(Index *pIdx){ pIdx->aAvgEq[nCol] = 1; } for(iCol=0; iColnSample; int i; /* Used to iterate through samples */ tRowcnt sumEq = 0; /* Sum of the nEq values */ - tRowcnt nSum = 0; /* Number of terms contributing to sumEq */ tRowcnt avgEq = 0; - tRowcnt nDLt = pFinal->anDLt[iCol]; + tRowcnt nRow; /* Number of rows in index */ + i64 nSum100 = 0; /* Number of terms contributing to sumEq */ + i64 nDist100; /* Number of distinct values in index */ + + if( pIdx->aiRowEst==0 ){ + nRow = pFinal->anLt[iCol]; + nDist100 = (i64)100 * pFinal->anDLt[iCol]; + nSample--; + }else{ + nRow = pIdx->aiRowEst[0]; + nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1]; + } /* Set nSum to the number of distinct (iCol+1) field prefixes that - ** occur in the stat4 table for this index before pFinal. Set - ** sumEq to the sum of the nEq values for column iCol for the same - ** set (adding the value only once where there exist duplicate - ** prefixes). */ - for(i=0; i<(pIdx->nSample-1); i++){ - if( aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] ){ + ** occur in the stat4 table for this index. Set sumEq to the sum of + ** the nEq values for column iCol for the same set (adding the value + ** only once where there exist duplicate prefixes). */ + for(i=0; inSample-1) + || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] + ){ sumEq += aSample[i].anEq[iCol]; - nSum++; + nSum100 += 100; } } - if( nDLt>nSum ){ - avgEq = (pFinal->anLt[iCol] - sumEq)/(nDLt - nSum); + + if( nDist100>nSum100 ){ + avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100); } if( avgEq==0 ) avgEq = 1; pIdx->aAvgEq[iCol] = avgEq; @@ -1846,6 +1867,11 @@ int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ rc = loadStat4(db, sInfo.zDatabase); db->lookaside.bEnabled = lookasideEnabled; } + for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){ + Index *pIdx = sqliteHashData(i); + sqlite3DbFree(db, pIdx->aiRowEst); + pIdx->aiRowEst = 0; + } #endif if( rc==SQLITE_NOMEM ){ diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 695b63d753..74c36c8db5 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1801,6 +1801,7 @@ struct Index { int nSampleCol; /* Size of IndexSample.anEq[] and so on */ tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ IndexSample *aSample; /* Samples of the left-most key */ + tRowcnt *aiRowEst; /* Non-logarithmic stat1 data for this table */ #endif };