Change the query planner to do a better job of estimating the number rows

author drh <drh@noemail.net>

Wed, 5 Nov 2014 19:26:12 +0000 (19:26 +0000)

committer drh <drh@noemail.net>

Wed, 5 Nov 2014 19:26:12 +0000 (19:26 +0000)
author drh <drh@noemail.net>
Wed, 5 Nov 2014 19:26:12 +0000 (19:26 +0000)
committer drh <drh@noemail.net>
Wed, 5 Nov 2014 19:26:12 +0000 (19:26 +0000)
diff --git a/manifest b/manifest

index 29f44c63b92bac42204e04130b7e4fb2f23db4f5..d1dcbe649d8049dd1358061a9a1633ebadc07135 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Make\ssure\sthat\sNULL\sresults\sfrom\sOP_Column\sare\sfully\sand\scompletely\sNULL\nand\sdo\snot\shave\sthe\sMEM_Ephem\sbit\sset.\s\sFix\sfor\sticket\s[094d39a4c95ee4].
-D 2014-11-05T15:57:39.984
+C Change\sthe\squery\splanner\sto\sdo\sa\sbetter\sjob\sof\sestimating\sthe\snumber\srows\nselected\sby\sa\sBETWEEN\soperator\susing\sSTAT4\swhen\sboth\supper\sand\slower\sbounds\nare\scontained\swithin\sthe\ssame\ssample.
+D 2014-11-05T19:26:12.741
  F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
  F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a
  F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -302,7 +302,7 @@ F src/vtab.c 2a30791bbd7926b589401bd09c3abb33de563793
  F src/wal.c 825c948066c7604a07d56e67958cdab210749016
  F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
  F src/walker.c c253b95b4ee44b21c406e2a1052636c31ea27804
-F src/where.c 240961041f35862ebcafd260587c79a1ab7347f8
+F src/where.c 2c2081c546c90227577c502765611555503ce3f7
  F src/whereInt.h d3633e9b592103241b74b0ec76185f3e5b8b62e0
  F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
  F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
@@ -321,7 +321,7 @@ F test/analyze4.test eff2df19b8dd84529966420f29ea52edc6b56213
  F test/analyze5.test 765c4e284aa69ca172772aa940946f55629bc8c4
  F test/analyze6.test f1c552ce39cca4ec922a7e4e0e5d0203d6b3281f
  F test/analyze7.test bb1409afc9e8629e414387ef048b8e0e3e0bdc4f
-F test/analyze8.test 093d15c1c888eed5034304a98c992f7360130b88
+F test/analyze8.test c05a461d0a6b05991106467d0c47480f2e709c82
  F test/analyze9.test 72795c8113604b5dcd47a1498a61d6d7fb5d041a
  F test/analyzeA.test 3335697f6700c7052295cfd0067fc5b2aacddf9a
  F test/analyzeB.test 8bf35ee0a548aea831bf56762cb8e7fdb1db083d
@@ -1211,7 +1211,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
  F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
  F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
  F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 948d6e5d07bc14b6de32ec2144c716a5532f894c
-R 4628888308aa60120db393287d665a20
+P 42705fd7d892c4fdfb95fbbb468c99569beece25
+R b31ea7cd782cb3cc21bda8e8c039f16c
  U drh
-Z e07070b00a71f6116b6f23e15e08fb5e
+Z 7c6a788d1b7b1ae57295ef90b3d9da88
diff --git a/manifest.uuid b/manifest.uuid

index 8df21c81cd5ac8318f853cbb5c1805742146c89a..64b0a08a4bc0c3d4c8a1f45725e4d98c38d68356 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-42705fd7d892c4fdfb95fbbb468c99569beece25
-\ No newline at end of file
+2d36be5d9a1cdd4fd2d54fc4eeece32a81cbacc1
+\ No newline at end of file
diff --git a/src/where.c b/src/where.c

index 34614e6bb32c1d0a44d1f49298ff5a1220446317..92f783b2e92464aa6e5e2dfa0505fa5c344bb849 100644 (file)
--- a/src/where.c
+++ b/src/where.c
@@ -1897,7 +1897,6 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){
  }
  #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
  
-
  #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  /*
  ** Estimate the location of a particular key among all keys in an
@@ -1906,9 +1905,10 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){
  **    aStat[0]      Est. number of rows less than pVal
  **    aStat[1]      Est. number of rows equal to pVal
  **
-** Return SQLITE_OK on success.
+** Return the index of the sample that is the smallest sample that
+** is greater than or equal to pRec.
  */
-static void whereKeyStats(
+static int whereKeyStats(
    Parse *pParse,              /* Database connection */
    Index *pIdx,                /* Index to consider domain of */
    UnpackedRecord *pRec,       /* Vector of values to consider */
@@ -1990,6 +1990,7 @@ static void whereKeyStats(
      }
      aStat[0] = iLower + iGap;
    }
+  return i;
  }
  #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
  
@@ -2140,7 +2141,7 @@ static int whereRangeSkipScanEst(
  ** If either of the upper or lower bound is not present, then NULL is passed in
  ** place of the corresponding WhereTerm.
  **
-** The value in (pBuilder->pNew->u.btree.nEq) is the index of the index
+** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index
  ** column subject to the range constraint. Or, equivalently, the number of
  ** equality constraints optimized by the proposed index scan. For example,
  ** assuming index p is on t1(a, b), and the SQL query is:
@@ -2156,7 +2157,7 @@ static int whereRangeSkipScanEst(
  **
  ** When this function is called, *pnOut is set to the sqlite3LogEst() of the
  ** number of rows that the index scan is expected to visit without 
-** considering the range constraints. If nEq is 0, this is the number of 
+** considering the range constraints. If nEq is 0, then *pnOut is the number of 
  ** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced)
  ** to account for the range constraints pLower and pUpper.
  ** 
@@ -2180,9 +2181,7 @@ static int whereRangeScanEst(
    Index *p = pLoop->u.btree.pIndex;
    int nEq = pLoop->u.btree.nEq;
  
-  if( p->nSample>0
-   && nEq<p->nSampleCol
-  ){
+  if( p->nSample>0 && nEq<p->nSampleCol ){
      if( nEq==pBuilder->nRecValid ){
        UnpackedRecord *pRec = pBuilder->pRec;
        tRowcnt a[2];
@@ -2198,15 +2197,19 @@ static int whereRangeScanEst(
        ** is not a simple variable or literal value), the lower bound of the
        ** range is $P. Due to a quirk in the way whereKeyStats() works, even
        ** if $L is available, whereKeyStats() is called for both ($P) and 
-      ** ($P:$L) and the larger of the two returned values used.
+      ** ($P:$L) and the larger of the two returned values is used.
        **
        ** Similarly, iUpper is to be set to the estimate of the number of rows
        ** less than the upper bound of the range query. Where the upper bound
        ** is either ($P) or ($P:$U). Again, even if $U is available, both values
        ** of iUpper are requested of whereKeyStats() and the smaller used.
+      **
+      ** The number of rows between the two bounds is then just iUpper-iLower.
        */
-      tRowcnt iLower;
-      tRowcnt iUpper;
+      tRowcnt iLower;     /* Rows less than the lower bound */
+      tRowcnt iUpper;     /* Rows less than the upper bound */
+      int iLwrIdx = -2;   /* aSample[] for the lower bound */
+      int iUprIdx = -1;   /* aSample[] for the upper bound */
  
        if( pRec ){
          testcase( pRec->nField!=pBuilder->nRecValid );
@@ -2244,7 +2247,7 @@ static int whereRangeScanEst(
          rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk);
          if( rc==SQLITE_OK && bOk ){
            tRowcnt iNew;
-          whereKeyStats(pParse, p, pRec, 0, a);
+          iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a);
            iNew = a[0] + ((pLower->eOperator & (WO_GT|WO_LE)) ? a[1] : 0);
            if( iNew>iLower ) iLower = iNew;
            nOut--;
@@ -2259,7 +2262,7 @@ static int whereRangeScanEst(
          rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk);
          if( rc==SQLITE_OK && bOk ){
            tRowcnt iNew;
-          whereKeyStats(pParse, p, pRec, 1, a);
+          iUprIdx = whereKeyStats(pParse, p, pRec, 1, a);
            iNew = a[0] + ((pUpper->eOperator & (WO_GT|WO_LE)) ? a[1] : 0);
            if( iNew<iUpper ) iUpper = iNew;
            nOut--;
@@ -2271,6 +2274,11 @@ static int whereRangeScanEst(
        if( rc==SQLITE_OK ){
          if( iUpper>iLower ){
            nNew = sqlite3LogEst(iUpper - iLower);
+          /* TUNING:  If both iUpper and iLower are derived from the same
+          ** sample, then assume they are 4x more selective.  This brings
+          ** the estimated selectivity more in line with what it would be
+          ** if estimated without the use of STAT3/4 tables. */
+          if( iLwrIdx==iUprIdx ) nNew -= 20;  assert( 20==sqlite3LogEst(4) );
          }else{
            nNew = 10;        assert( 10==sqlite3LogEst(2) );
          }
diff --git a/test/analyze8.test b/test/analyze8.test

index 4384c39676fc683a526626fbdd545b2082818494..1079e68080d1c5d0de8ceafb84ecf5f00f7c269a 100644 (file)
--- a/test/analyze8.test
+++ b/test/analyze8.test
@@ -86,23 +86,23 @@ do_test 2.1 {
  # range.
  # 
  # Test 3.2 is a little unstable. It depends on the planner estimating
-# that (b BETWEEN 50 AND 54) will match more rows than (c BETWEEN
+# that (b BETWEEN 30 AND 34) will match more rows than (c BETWEEN
  # 800000 AND 900000). Which is a pretty close call (50 vs. 32), so
  # the planner could get it wrong with an unlucky set of samples. This
  # case happens to work, but others ("b BETWEEN 40 AND 44" for example) 
  # will fail.
  #
  do_execsql_test 3.0 {
-  SELECT count(*) FROM t1 WHERE b BETWEEN 50 AND 54;
+  SELECT count(*) FROM t1 WHERE b BETWEEN 30 AND 34;
    SELECT count(*) FROM t1 WHERE c BETWEEN 0 AND 100000;
    SELECT count(*) FROM t1 WHERE c BETWEEN 800000 AND 900000;
  } {50 376 32}
  do_test 3.1 {
-  eqp {SELECT * FROM t1 WHERE b BETWEEN 50 AND 54 AND c BETWEEN 0 AND 100000}
+  eqp {SELECT * FROM t1 WHERE b BETWEEN 30 AND 34 AND c BETWEEN 0 AND 100000}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)}}
  do_test 3.2 {
    eqp {SELECT * FROM t1
-       WHERE b BETWEEN 50 AND 54 AND c BETWEEN 800000 AND 900000}
+       WHERE b BETWEEN 30 AND 34 AND c BETWEEN 800000 AND 900000}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1c (c>? AND c<?)}}
  do_test 3.3 {
    eqp {SELECT * FROM t1 WHERE a=100 AND c BETWEEN 0 AND 100000}
author	drh <drh@noemail.net>
	Wed, 5 Nov 2014 19:26:12 +0000 (19:26 +0000)
committer	drh <drh@noemail.net>
	Wed, 5 Nov 2014 19:26:12 +0000 (19:26 +0000)
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/where.c		patch \| blob \| blame \| history
test/analyze8.test		patch \| blob \| blame \| history