From: drh Date: Thu, 10 Feb 2011 00:08:47 +0000 (+0000) Subject: Refactor the cost function in the query planner. Give extra cost (thus X-Git-Tag: version-3.7.6~153 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3772206769060b79e0d69f9363566b06116edeed;p=thirdparty%2Fsqlite.git Refactor the cost function in the query planner. Give extra cost (thus reduce likelihood of selection) to full table scans. FossilOrigin-Name: 878da276ebf643b716ddd650d4d0ca3595fe5bf2 --- diff --git a/configure b/configure old mode 100644 new mode 100755 diff --git a/install-sh b/install-sh old mode 100644 new mode 100755 diff --git a/manifest b/manifest index 48683ea413..410f03eb5d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,8 @@ -C Make\ssure\scode\s*compiles*\swith\seach\sOMIT\sand\sENABLE\soption.\s\sMostly\schanges\sto\stest\smodules. -D 2011-02-09T19:55:20 +-----BEGIN PGP SIGNED MESSAGE----- +Hash: SHA1 + +C Refactor\sthe\scost\sfunction\sin\sthe\squery\splanner.\s\sGive\sextra\scost\s(thus\nreduce\slikelihood\sof\sselection)\sto\sfull\stable\sscans. +D 2011-02-10T00:08:47.701 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 27701a1653595a1f2187dc61c8117e00a6c1d50f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -22,7 +25,7 @@ F art/src_logo.gif 9341ef09f0e53cd44c0c9b6fc3c16f7f3d6c2ad9 F config.guess 226d9a188c6196f3033ffc651cbc9dcee1a42977 F config.h.in 868fdb48c028421a203470e15c69ada15b9ba673 F config.sub 9ebe4c3b3dab6431ece34f16828b594fb420da55 -F configure c38c1947db7ed4adaed2affcb09cea9d3acd5a9a +F configure c38c1947db7ed4adaed2affcb09cea9d3acd5a9a x F configure.ac 87a3c71bbe9c925381c154413eea7f3cdc397244 F contrib/sqlitecon.tcl 210a913ad63f9f991070821e599d600bd913e0ad F doc/lemon.html f0f682f50210928c07e562621c3b7e8ab912a538 @@ -98,7 +101,7 @@ F ext/rtree/rtree_util.tcl 06aab2ed5b826545bf215fff90ecb9255a8647ea F ext/rtree/sqlite3rtree.h 1af0899c63a688e272d69d8e746f24e76f10a3f0 F ext/rtree/tkt3363.test 142ab96eded44a3615ec79fba98c7bde7d0f96de F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 -F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 +F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F main.mk 54190fab7cdba523e311c274c95ea480f32abfb5 F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a @@ -241,7 +244,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c aca10a60655e103fc8630a75345000f43c6d47ca F src/wal.h 7a5fbb00114b7f2cd40c7e1003d4c41ce9d26840 F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 0ff78ba4787cfc6895be1faed5b4ea98b7af7cfe +F src/where.c 83a89fe4822caf45391459224891b66d3e2ba237 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -254,7 +257,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test 8f2b1534d43f5547ce9a6b736c021d4192c75be3 F test/analyze3.test d61f55d8b472fc6e713160b1e577f7a68e63f38b F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test 18659612dd854330b9f2a0bf4c90658f3739fd67 +F test/analyze5.test 0618d2fe8982a5dae1d4e92152acc8ecbaf52be2 F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -906,7 +909,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 00c4596f0b270120848ab8d06dcdec7813a9a315 -R 49290511d1319608c18e9f8e70116a97 -U shaneh -Z 9431f3ee59a677605bc0096e96d777c4 +P 7cc515edc9cade2bc6c74699b3e4153bf2b74ebb +R 08fe91f055ce4403c87684038df163cb +U drh +Z 1ef92ed18aa7502ebbef18dbb05b7e9d +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFNUyyToxKgR168RlERAiF4AKCIVGuKzORossDsmQaAQbZKU4Xd/gCfWOhI +4aKFZ+7oNbX7dj0deyXznl8= +=Ts3F +-----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 753b7b8912..9097819203 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7cc515edc9cade2bc6c74699b3e4153bf2b74ebb \ No newline at end of file +878da276ebf643b716ddd650d4d0ca3595fe5bf2 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 5f892813de..d3356cee78 100644 --- a/src/where.c +++ b/src/where.c @@ -19,40 +19,6 @@ #include "sqliteInt.h" -/* -** The following parameter define the relative cost of various -** search operations. These parameters are used to estimate query -** plan costs and to select the query plan with the lowest estimated -** cost. -** -** Let the cost of moving from one row in a table or index to the next -** or previous row be SEEK_COST. -** -** Let the base-10 logarithm of the number of rows in a table or -** index be L. The estLog() function below will estimate this -** numbmer given the number of rows in the table. -** -** The cost of doing a lookup of an index will be IDX_LKUP_COST*L. -** -** The cost of doing a lookup on a table is TBL_LKUP_COST*L. -** -** The cost of sorting a result set of N rows is assumed to be -** N*log10(N)*SORT_COST. -*/ -#if defined(SEEK_COST) - /* Assume that IDX_LKUP_COST, TBL_LKUP_COST, and SORT_COST are also defined */ -#elif !defined(SQLITE_OMIT_FLOATING_POINT) -# define SEEK_COST 1.0 -# define IDX_LKUP_COST 1.0 -# define TBL_LKUP_COST 0.1 -# define SORT_COST 3.0 -#else -# define SEEK_COST 10 -# define IDX_LKUP_COST 10 -# define TBL_LKUP_COST 1 -# define SORT_COST 30 -#endif - /* ** Trace output macros */ @@ -2771,8 +2737,7 @@ static void bestBtreeIndex( const unsigned int * const aiRowEst = pProbe->aiRowEst; double cost; /* Cost of using pProbe */ double nRow; /* Estimated number of rows in result set */ - double nTabSrch; /* Est number of table searches */ - double nIdxSrch; /* Est number of index searches */ + double log10N; /* base-10 logarithm of nRow (inexact) */ int rev; /* True to scan in reverse order */ int wsFlags = 0; Bitmask used = 0; @@ -2967,56 +2932,71 @@ static void bestBtreeIndex( } #endif /* SQLITE_ENABLE_STAT2 */ - /* Adjust the number of rows and the cost downward to reflect rows + /* Adjust the number of output rows and downward to reflect rows ** that are excluded by range constraints. */ nRow = (nRow * (double)estBound) / (double)100; if( nRow<1 ) nRow = 1; - /* Assume constant cost to advance from one row to the next and - ** logarithmic cost to do a binary search. Hence, the initial cost - ** is the number of output rows plus log2(table-size) times the - ** number of binary searches. + /* Experiments run on real SQLite databases show that the time needed + ** to do a binary search to locate a row in a table or index is roughly + ** log10(N) times the time to move from one row to the next row within + ** a table or index. The actual times can vary, with the size of + ** records being an important factor. Both moves and searches are + ** slower with larger records, presumably because fewer records fit + ** on one page and hence more pages have to be fetched. ** - ** Because fan-out on tables is so much higher than the fan-out on - ** indices (because table btrees contain only integer keys in non-leaf - ** nodes) we weight the cost of a table binary search as 1/10th the - ** cost of an index binary search. + ** The ANALYZE command and the sqlite_stat1 and sqlite_stat2 tables do + ** not give us data on the relative sizes of table and index records. + ** So this computation assumes table records are about twice as big + ** as index records */ - if( pIdx ){ - if( bLookup ){ - /* For an index lookup followed by a table lookup: - ** nInMul index searches to find the start of each index range - ** + nRow steps through the index - ** + nRow table searches to lookup the table entry using the rowid - */ - nIdxSrch = nInMul; - nTabSrch = nRow; + if( (wsFlags & WHERE_NOT_FULLSCAN)==0 ){ + /* The cost of a full table scan is a number of move operations equal + ** to the number of rows in the table. + ** + ** We add an additional 4x penalty to full table scans. This causes + ** the cost function to err on the side of choosing an index over + ** choosing a full scan. This 4x full-scan penalty is an arguable + ** decision and one which we expect to revisit in the future. But + ** it seems to be working well enough at the moment. + */ + cost = aiRowEst[0]*4; + }else{ + log10N = estLog(aiRowEst[0]); + cost = nRow; + if( pIdx ){ + if( bLookup ){ + /* For an index lookup followed by a table lookup: + ** nInMul index searches to find the start of each index range + ** + nRow steps through the index + ** + nRow table searches to lookup the table entry using the rowid + */ + cost += (nInMul + nRow)*log10N; + }else{ + /* For a covering index: + ** nInMul index searches to find the initial entry + ** + nRow steps through the index + */ + cost += nInMul*log10N; + } }else{ - /* For a covering index: - ** nInMul index searches to find the initial entry - ** + nRow steps through the index + /* For a rowid primary key lookup: + ** nInMult table searches to find the initial entry for each range + ** + nRow steps through the table */ - nIdxSrch = nInMul; - nTabSrch = 0; + cost += nInMul*log10N; } - }else{ - /* For a rowid primary key lookup: - ** nInMult table searches to find the initial entry for each range - ** + nRow steps through the table - */ - nIdxSrch = 0; - nTabSrch = nInMul; } - cost = nRow + (nIdxSrch*IDX_LKUP_COST + nTabSrch*TBL_LKUP_COST) - *estLog(aiRowEst[0])/SEEK_COST; - /* Add in the estimated cost of sorting the result. This cost is expanded - ** by a fudge factor of 3.0 to account for the fact that a sorting step - ** involves a write and is thus more expensive than a lookup step. + /* Add in the estimated cost of sorting the result. Actual experimental + ** measurements of sorting performance in SQLite show that sorting time + ** adds C*N*log10(N) to the cost, where N is the number of rows to be + ** sorted and C is a factor between 1.95 and 4.3. We will split the + ** difference and select C of 3.0. */ if( bSort ){ - cost += nRow*estLog(nRow)*SORT_COST/SEEK_COST; + cost += nRow*estLog(nRow)*3; } /**** Cost of using this index has now been computed ****/ @@ -3082,11 +3062,10 @@ static void bestBtreeIndex( WHERETRACE(( "%s(%s): nEq=%d nInMul=%d estBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n" - " notReady=0x%llx nTSrch=%.1f nISrch=%.1f nRow=%.1f\n" - " estLog=%.1f cost=%.1f used=0x%llx\n", + " notReady=0x%llx log10N=%.1f nRow=%.1f cost=%.1f used=0x%llx\n", pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), nEq, nInMul, estBound, bSort, bLookup, wsFlags, - notReady, nTabSrch, nIdxSrch, nRow, estLog(aiRowEst[0]), cost, used + notReady, log10N, nRow, cost, used )); /* If this index is the best we have seen so far, then record this diff --git a/test/analyze5.test b/test/analyze5.test index 93b8b2e01f..e08893a180 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -102,8 +102,8 @@ foreach {testid where index rows} { 18 {z>=-100 AND z<0} t1z 50 19 {z>=-100 AND z<=1} t1z 700 20 {z>=-100 AND z<2} t1z 700 - 21 {z>=-100 AND z<=2} {} 111 - 22 {z>=-100 AND z<3} {} 111 + 21 {z>=-100 AND z<=2} t1z 900 + 22 {z>=-100 AND z<3} t1z 900 31 {z>=0.0 AND z<=0.0} t1z 400 32 {z>=1.0 AND z<=1.0} t1z 300 @@ -125,8 +125,8 @@ foreach {testid where index rows} { 48 {z>=-100 AND z<0.0} t1z 50 49 {z>=-100 AND z<=1.0} t1z 700 50 {z>=-100 AND z<2.0} t1z 700 - 51 {z>=-100 AND z<=2.0} {} 111 - 52 {z>=-100 AND z<3.0} {} 111 + 51 {z>=-100 AND z<=2.0} t1z 900 + 52 {z>=-100 AND z<3.0} t1z 900 101 {z=-1} t1z 50 102 {z=0} t1z 400 @@ -151,7 +151,7 @@ foreach {testid where index rows} { 206 {z IN (4)} t1z 50 207 {z IN (0.5)} t1z 50 208 {z IN (0,1)} t1z 700 - 209 {z IN (0,1,2)} {} 100 + 209 {z IN (0,1,2)} t1z 900 210 {z IN (0,1,2,3)} {} 100 211 {z IN (0,1,2,3,4,5)} {} 100 212 {z IN (1,2)} t1z 500