-C Fix\stest\scases\sto\salign\swith\sthe\simproved\sstats\scomputation.
-D 2016-03-01T12:45:08.294
+C Change\sthe\sestimated\srow\scounts\sin\sstat1\sto\sbe\sone-third\sworst-case\sand\ntwo-threads\saverage\scase.
+D 2016-03-01T14:31:59.247
F Makefile.in 4e90dc1521879022aa9479268a4cd141d1771142
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc 4f319afb7c049d40aff7af6e8c4e7cc2ba18e079
F sqlite3.1 fc7ad8990fc8409983309bb80de8c811a7506786
F sqlite3.pc.in 48fed132e7cb71ab676105d2a4dc77127d8c1f3a
F src/alter.c 44e18dfd78e8942d65d3cdaec4de972b5cd9f1f2
-F src/analyze.c 997b0ce0f4efd68b0499f941ebebdd1ec0c71549
+F src/analyze.c dac2f49d4a622091df1964a9ec9de1d2cd0584bb
F src/attach.c a3724c64de1099d85e30751213d285752aed9505
F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240
F src/backup.c f60f0aa55d25d853ffde53d0b0370a7bb7ee41ce
F test/alter4.test c461150723ac957f3b2214aa0b11552cd72023ec
F test/altermalloc.test e81ac9657ed25c6c5bb09bebfa5a047cd8e4acfc
F test/amatch1.test b5ae7065f042b7f4c1c922933f4700add50cdb9f
-F test/analyze.test 6e7bb2c94df69461b83627bdc3d960b0cb2847cd
+F test/analyze.test 3eb35a4af972f98422e5dc0586501b17d103d321
F test/analyze3.test 1dccda46a6c374018af617fba00bfe297a61d442
F test/analyze4.test eff2df19b8dd84529966420f29ea52edc6b56213
F test/analyze5.test 765c4e284aa69ca172772aa940946f55629bc8c4
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 421b5b544af734b97e3da47699fa0f82b21617d3
-R d260dd34a084c646db10204191cf68af
+P 810967bff6ba262f38a2833e2d4efef4ef00f463
+R cd9cc1e55f6145b7b1e4855011dd5873
U drh
-Z 232c83ab04b03227d0856ebcd7ccd2ec
+Z 85196c62dcaae63dbb4c33e57a54153c
-810967bff6ba262f38a2833e2d4efef4ef00f463
\ No newline at end of file
+21bfd47c4245846b12aeeb7cf0212529e300b878
\ No newline at end of file
assert( argc==1 );
#endif
{
- /* Return the value to store in the "stat" column of the sqlite_stat1
+ /* Return a value for the "stat" column of the sqlite_stat1
** table for this index.
**
** The value is a string composed of a list of integers describing
- ** the index. The first integer in the list is the total number of
- ** entries in the index. There is one additional integer in the list
- ** for each indexed column. This additional integer is an estimate of
- ** the number of rows matched by a query on the index using
- ** a key with the corresponding number of fields. In other words,
- ** if the index is on columns (a,b) and the sqlite_stat1 value is
- ** "100 10 2", then SQLite estimates that:
+ ** the index. The first integer is the (estimated) total number
+ ** entries in the index. There is one additional integer for each
+ ** column in the index. The first added integer is an estimate of
+ ** the number of rows that match a single key in the first column of
+ ** the index. The second added integer is an estimate on of the number
+ ** of rows that match a single key consisting of the first two columns
+ ** of the index. And so forth.
**
- ** * the index contains 100 rows,
- ** * "WHERE a=?" matches 10 rows, and
- ** * "WHERE a=? AND b=?" matches 2 rows.
+ ** For example, for an index on columns (a,b), if the sqlite_stat1.stat
+ ** values is "100 10 2", that means there are about 100 rows in the
+ ** index, and that a query against a=$key1 will match about 10 rows
+ ** and a query against "a=$key1 AND b=$key2" will match about 2 rows.
**
- ** A worst-case estimate is used: the maximum number of rows that
- ** could be select for any set of query parameters. The worst case
- ** is the estimate we want for choosing indexes.
+ ** Let V be the average number of rows that match a key, and let M
+ ** be the most number of rows that match the key for any possible value
+ ** of that key. The estimate is computed as:
+ **
+ ** E = (2*V + M)/3
+ **
+ ** Consider two indexes. Index X has with 100 values of exactly 0 and
+ ** 100 singleton values between 1 and 100. Index Y has 200 values
+ ** evenly distributed between 1 and 20. If only the average (V) is
+ ** used in the estimate, X would have "200 2" and Y would have "200 10"
+ ** and so the planner would think X is the more selective index. And
+ ** X often would be more selective. But when searching for 0, index X
+ ** would perform badly. To avoid this problem, the M is added into the
+ ** estimate so that the stat for X is "200 34" and Y is still "200 10".
+ ** In this way, Y is the preferred index (all else being equal) and
+ ** the pathological case is avoided.
**
** For deciding whether or not to do a skip-scan, we want to know the
- ** average number of rows with the same key. We can approximate this
- ** using the (worst case) most number of rows with the same key. But
- ** sometimes that approximation can be badly off. In those cases,
- ** mark the index as "noskipscan" to avoid suboptimal skip-scan plans.
+ ** average number of rows (V) with the same key, not the mixed estimate
+ ** E shown above. Usually E will be close enough. However, if E is
+ ** large but V is small, that could trick the query planner into thinking
+ ** that a skip-scan might work well on this index. To avoid that, the
+ ** "noskipscan" flag is added in cases where the divergence between E
+ ** and V might mislead the query planner.
*/
char *z;
int i;
sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
z = zRet + sqlite3Strlen30(zRet);
for(i=0; i<p->nKeyCol; i++){
- u64 iVal = p->current.amxEq[i];
+ u64 nDistinct = p->current.anDLt[i];
+ u64 iMx = p->current.amxEq[i]; /* M: Most rows per key */
+ u64 iAvg = (p->nRow+nDistinct)/(nDistinct+1); /* V: Average per key */
+ u64 iVal = (iMx+iAvg*2)/3; /* E: The estimate */
sqlite3_snprintf(24, z, " %llu", iVal);
z += sqlite3Strlen30(z);
assert( p->current.anEq[i] );
- if( iVal>=WHERE_SKIPSCAN_ONSET
- && p->current.anDLt[i] > p->nRow/(WHERE_SKIPSCAN_ONSET*2/3)
- ){
+ if( iVal>=WHERE_SKIPSCAN_ONSET && iAvg<(WHERE_SKIPSCAN_ONSET*2/3) ){
noSkipScan = 1;
}
}
ANALYZE main;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1}}
+} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1}}
do_test analyze-3.4 {
execsql {
CREATE TABLE t2 AS SELECT * FROM t1;
ANALYZE;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4} t2i2 {5 2} t2i3 {5 4 1}}
+} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2} t2i3 {5 3 1}}
do_test analyze-3.5 {
execsql {
DROP INDEX t2i3;
ANALYZE t1;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4} t2i2 {5 2}}
+} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2}}
do_test analyze-3.6 {
execsql {
ANALYZE t2;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4} t2i2 {5 2}}
+} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2}}
do_test analyze-3.7 {
execsql {
DROP INDEX t2i2;
ANALYZE t2;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4}}
+} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3}}
do_test analyze-3.8 {
execsql {
CREATE TABLE t3 AS SELECT a, b, rowid AS c, 'hi' AS d FROM t1;
ANALYZE;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}
+} {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}
do_test analyze-3.10 {
execsql {
ANALYZE;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {{another foolish ' name} {2 1} {foolish ' name} {2 1 1} t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}
+} {{another foolish ' name} {2 1} {foolish ' name} {2 1 1} t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}
do_test analyze-3.11 {
execsql {
DROP INDEX "foolish ' name";
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {{another foolish ' name} {2 1} t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}
+} {{another foolish ' name} {2 1} t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}
do_test analyze-3.11 {
execsql {
DROP TABLE "silly "" name";
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}
+} {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}
# Try corrupting the sqlite_stat1 table and make sure the
# database is still able to function.
ANALYZE;
SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
}
-} {t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1} t4i1 {5 4} t4i2 {5 2}}
+} {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1} t4i1 {5 3} t4i2 {5 2}}
do_test analyze-4.1 {
execsql {
PRAGMA writable_schema=on;