-C Make\ssure\scode\s*compiles*\swith\seach\sOMIT\sand\sENABLE\soption.\s\sMostly\schanges\sto\stest\smodules.
-D 2011-02-09T19:55:20
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA1
+
+C Refactor\sthe\scost\sfunction\sin\sthe\squery\splanner.\s\sGive\sextra\scost\s(thus\nreduce\slikelihood\sof\sselection)\sto\sfull\stable\sscans.
+D 2011-02-10T00:08:47.701
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 27701a1653595a1f2187dc61c8117e00a6c1d50f
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F config.guess 226d9a188c6196f3033ffc651cbc9dcee1a42977
F config.h.in 868fdb48c028421a203470e15c69ada15b9ba673
F config.sub 9ebe4c3b3dab6431ece34f16828b594fb420da55
-F configure c38c1947db7ed4adaed2affcb09cea9d3acd5a9a
+F configure c38c1947db7ed4adaed2affcb09cea9d3acd5a9a x
F configure.ac 87a3c71bbe9c925381c154413eea7f3cdc397244
F contrib/sqlitecon.tcl 210a913ad63f9f991070821e599d600bd913e0ad
F doc/lemon.html f0f682f50210928c07e562621c3b7e8ab912a538
F ext/rtree/sqlite3rtree.h 1af0899c63a688e272d69d8e746f24e76f10a3f0
F ext/rtree/tkt3363.test 142ab96eded44a3615ec79fba98c7bde7d0f96de
F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024
-F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
+F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F main.mk 54190fab7cdba523e311c274c95ea480f32abfb5
F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a
F src/wal.c aca10a60655e103fc8630a75345000f43c6d47ca
F src/wal.h 7a5fbb00114b7f2cd40c7e1003d4c41ce9d26840
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
-F src/where.c 0ff78ba4787cfc6895be1faed5b4ea98b7af7cfe
+F src/where.c 83a89fe4822caf45391459224891b66d3e2ba237
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125
F test/analyze2.test 8f2b1534d43f5547ce9a6b736c021d4192c75be3
F test/analyze3.test d61f55d8b472fc6e713160b1e577f7a68e63f38b
F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045
-F test/analyze5.test 18659612dd854330b9f2a0bf4c90658f3739fd67
+F test/analyze5.test 0618d2fe8982a5dae1d4e92152acc8ecbaf52be2
F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3
F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6
F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P 00c4596f0b270120848ab8d06dcdec7813a9a315
-R 49290511d1319608c18e9f8e70116a97
-U shaneh
-Z 9431f3ee59a677605bc0096e96d777c4
+P 7cc515edc9cade2bc6c74699b3e4153bf2b74ebb
+R 08fe91f055ce4403c87684038df163cb
+U drh
+Z 1ef92ed18aa7502ebbef18dbb05b7e9d
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFNUyyToxKgR168RlERAiF4AKCIVGuKzORossDsmQaAQbZKU4Xd/gCfWOhI
+4aKFZ+7oNbX7dj0deyXznl8=
+=Ts3F
+-----END PGP SIGNATURE-----
#include "sqliteInt.h"
-/*
-** The following parameter define the relative cost of various
-** search operations. These parameters are used to estimate query
-** plan costs and to select the query plan with the lowest estimated
-** cost.
-**
-** Let the cost of moving from one row in a table or index to the next
-** or previous row be SEEK_COST.
-**
-** Let the base-10 logarithm of the number of rows in a table or
-** index be L. The estLog() function below will estimate this
-** numbmer given the number of rows in the table.
-**
-** The cost of doing a lookup of an index will be IDX_LKUP_COST*L.
-**
-** The cost of doing a lookup on a table is TBL_LKUP_COST*L.
-**
-** The cost of sorting a result set of N rows is assumed to be
-** N*log10(N)*SORT_COST.
-*/
-#if defined(SEEK_COST)
- /* Assume that IDX_LKUP_COST, TBL_LKUP_COST, and SORT_COST are also defined */
-#elif !defined(SQLITE_OMIT_FLOATING_POINT)
-# define SEEK_COST 1.0
-# define IDX_LKUP_COST 1.0
-# define TBL_LKUP_COST 0.1
-# define SORT_COST 3.0
-#else
-# define SEEK_COST 10
-# define IDX_LKUP_COST 10
-# define TBL_LKUP_COST 1
-# define SORT_COST 30
-#endif
-
/*
** Trace output macros
*/
const unsigned int * const aiRowEst = pProbe->aiRowEst;
double cost; /* Cost of using pProbe */
double nRow; /* Estimated number of rows in result set */
- double nTabSrch; /* Est number of table searches */
- double nIdxSrch; /* Est number of index searches */
+ double log10N; /* base-10 logarithm of nRow (inexact) */
int rev; /* True to scan in reverse order */
int wsFlags = 0;
Bitmask used = 0;
}
#endif /* SQLITE_ENABLE_STAT2 */
- /* Adjust the number of rows and the cost downward to reflect rows
+ /* Adjust the number of output rows and downward to reflect rows
** that are excluded by range constraints.
*/
nRow = (nRow * (double)estBound) / (double)100;
if( nRow<1 ) nRow = 1;
- /* Assume constant cost to advance from one row to the next and
- ** logarithmic cost to do a binary search. Hence, the initial cost
- ** is the number of output rows plus log2(table-size) times the
- ** number of binary searches.
+ /* Experiments run on real SQLite databases show that the time needed
+ ** to do a binary search to locate a row in a table or index is roughly
+ ** log10(N) times the time to move from one row to the next row within
+ ** a table or index. The actual times can vary, with the size of
+ ** records being an important factor. Both moves and searches are
+ ** slower with larger records, presumably because fewer records fit
+ ** on one page and hence more pages have to be fetched.
**
- ** Because fan-out on tables is so much higher than the fan-out on
- ** indices (because table btrees contain only integer keys in non-leaf
- ** nodes) we weight the cost of a table binary search as 1/10th the
- ** cost of an index binary search.
+ ** The ANALYZE command and the sqlite_stat1 and sqlite_stat2 tables do
+ ** not give us data on the relative sizes of table and index records.
+ ** So this computation assumes table records are about twice as big
+ ** as index records
*/
- if( pIdx ){
- if( bLookup ){
- /* For an index lookup followed by a table lookup:
- ** nInMul index searches to find the start of each index range
- ** + nRow steps through the index
- ** + nRow table searches to lookup the table entry using the rowid
- */
- nIdxSrch = nInMul;
- nTabSrch = nRow;
+ if( (wsFlags & WHERE_NOT_FULLSCAN)==0 ){
+ /* The cost of a full table scan is a number of move operations equal
+ ** to the number of rows in the table.
+ **
+ ** We add an additional 4x penalty to full table scans. This causes
+ ** the cost function to err on the side of choosing an index over
+ ** choosing a full scan. This 4x full-scan penalty is an arguable
+ ** decision and one which we expect to revisit in the future. But
+ ** it seems to be working well enough at the moment.
+ */
+ cost = aiRowEst[0]*4;
+ }else{
+ log10N = estLog(aiRowEst[0]);
+ cost = nRow;
+ if( pIdx ){
+ if( bLookup ){
+ /* For an index lookup followed by a table lookup:
+ ** nInMul index searches to find the start of each index range
+ ** + nRow steps through the index
+ ** + nRow table searches to lookup the table entry using the rowid
+ */
+ cost += (nInMul + nRow)*log10N;
+ }else{
+ /* For a covering index:
+ ** nInMul index searches to find the initial entry
+ ** + nRow steps through the index
+ */
+ cost += nInMul*log10N;
+ }
}else{
- /* For a covering index:
- ** nInMul index searches to find the initial entry
- ** + nRow steps through the index
+ /* For a rowid primary key lookup:
+ ** nInMult table searches to find the initial entry for each range
+ ** + nRow steps through the table
*/
- nIdxSrch = nInMul;
- nTabSrch = 0;
+ cost += nInMul*log10N;
}
- }else{
- /* For a rowid primary key lookup:
- ** nInMult table searches to find the initial entry for each range
- ** + nRow steps through the table
- */
- nIdxSrch = 0;
- nTabSrch = nInMul;
}
- cost = nRow + (nIdxSrch*IDX_LKUP_COST + nTabSrch*TBL_LKUP_COST)
- *estLog(aiRowEst[0])/SEEK_COST;
- /* Add in the estimated cost of sorting the result. This cost is expanded
- ** by a fudge factor of 3.0 to account for the fact that a sorting step
- ** involves a write and is thus more expensive than a lookup step.
+ /* Add in the estimated cost of sorting the result. Actual experimental
+ ** measurements of sorting performance in SQLite show that sorting time
+ ** adds C*N*log10(N) to the cost, where N is the number of rows to be
+ ** sorted and C is a factor between 1.95 and 4.3. We will split the
+ ** difference and select C of 3.0.
*/
if( bSort ){
- cost += nRow*estLog(nRow)*SORT_COST/SEEK_COST;
+ cost += nRow*estLog(nRow)*3;
}
/**** Cost of using this index has now been computed ****/
WHERETRACE((
"%s(%s): nEq=%d nInMul=%d estBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n"
- " notReady=0x%llx nTSrch=%.1f nISrch=%.1f nRow=%.1f\n"
- " estLog=%.1f cost=%.1f used=0x%llx\n",
+ " notReady=0x%llx log10N=%.1f nRow=%.1f cost=%.1f used=0x%llx\n",
pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"),
nEq, nInMul, estBound, bSort, bLookup, wsFlags,
- notReady, nTabSrch, nIdxSrch, nRow, estLog(aiRowEst[0]), cost, used
+ notReady, log10N, nRow, cost, used
));
/* If this index is the best we have seen so far, then record this
18 {z>=-100 AND z<0} t1z 50
19 {z>=-100 AND z<=1} t1z 700
20 {z>=-100 AND z<2} t1z 700
- 21 {z>=-100 AND z<=2} {} 111
- 22 {z>=-100 AND z<3} {} 111
+ 21 {z>=-100 AND z<=2} t1z 900
+ 22 {z>=-100 AND z<3} t1z 900
31 {z>=0.0 AND z<=0.0} t1z 400
32 {z>=1.0 AND z<=1.0} t1z 300
48 {z>=-100 AND z<0.0} t1z 50
49 {z>=-100 AND z<=1.0} t1z 700
50 {z>=-100 AND z<2.0} t1z 700
- 51 {z>=-100 AND z<=2.0} {} 111
- 52 {z>=-100 AND z<3.0} {} 111
+ 51 {z>=-100 AND z<=2.0} t1z 900
+ 52 {z>=-100 AND z<3.0} t1z 900
101 {z=-1} t1z 50
102 {z=0} t1z 400
206 {z IN (4)} t1z 50
207 {z IN (0.5)} t1z 50
208 {z IN (0,1)} t1z 700
- 209 {z IN (0,1,2)} {} 100
+ 209 {z IN (0,1,2)} t1z 900
210 {z IN (0,1,2,3)} {} 100
211 {z IN (0,1,2,3,4,5)} {} 100
212 {z IN (1,2)} t1z 500