Minor cleanup of the code in the query planner that computes the costs

author drh <drh@noemail.net>

Thu, 27 Mar 2014 18:36:34 +0000 (18:36 +0000)

committer drh <drh@noemail.net>

Thu, 27 Mar 2014 18:36:34 +0000 (18:36 +0000)
author drh <drh@noemail.net>
Thu, 27 Mar 2014 18:36:34 +0000 (18:36 +0000)
committer drh <drh@noemail.net>
Thu, 27 Mar 2014 18:36:34 +0000 (18:36 +0000)
diff --git a/manifest b/manifest

index fc133ce7b93938026000ef37ac07d8f8c71d207a..c8233bfa12913bd0a0f34a866247eeb6d35688e5 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Enhance\sthe\slogest.c\sutility\swith\snew\soperators:\s"dup",\s"inv",\s"log",\sand\n"nlogn".\s\sProvide\shelp\son\san\sinvalid\sinput.
-D 2014-03-27T14:05:38.733
+C Minor\scleanup\sof\sthe\scode\sin\sthe\squery\splanner\sthat\scomputes\sthe\scosts\s\nestimates\sfor\sthe\svarious\splans.\s\sThere\sare\sno\schanges\sto\sthe\scosts\sat\sthis\ntime.\s\sBut\sthe\scode\sis\sslightly\smore\sreadable\snow\sand\sthat\smight\sfacilitate\nfuture\senhancements.
+D 2014-03-27T18:36:34.321
  F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
  F Makefile.in 2ef13430cd359f7b361bb863504e227b25cc7f81
  F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -291,7 +291,7 @@ F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd
  F src/wal.c 76e7fc6de229bea8b30bb2539110f03a494dc3a8
  F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
  F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45
-F src/where.c da8ec216f14af617505799b0b4e52c73dda7a5ca
+F src/where.c 7c53de68bd6762848b746510cf4eb077ffd7d70d
  F src/whereInt.h 2564055b440e44ebec8b47f237bbccae6719b7af
  F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
  F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
@@ -1159,7 +1159,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
  F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
  F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
  F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
-P f585f5d7a0f9bf8c590388654a3638231eba8892
-R fc449dde2f7cf3aac00c9e1a5da52aee
+P b4bd2a062c4baf5f622d61b7411f00de5904ef56
+R 96966a646967b8a2388d32a924ac0ae2
  U drh
-Z ebedee01c152a936b8dfbac0e8a85bc4
+Z 308d20d60c0618b1b3ae79dc397c2638
diff --git a/manifest.uuid b/manifest.uuid

index bf7192882eb1e86699233217794eff62739023ed..65bd2a013db6b7a39735306eff3b425bad690623 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-b4bd2a062c4baf5f622d61b7411f00de5904ef56
-\ No newline at end of file
+9b4d7226bcee38be5ac68a54bee03b4179cb69fc
+\ No newline at end of file
diff --git a/src/where.c b/src/where.c

index 15084f099ab50800abd6a335a945370f9f966a7c..16e7e40bc32f5ceb9d648dca6d59c4551a66276e 100644 (file)
--- a/src/where.c
+++ b/src/where.c
@@ -4328,18 +4328,34 @@ static int whereLoopAddBtree(
            )
        ){
          pNew->iSortIdx = b ? iSortIdx : 0;
+        /* TUNING:  The base cost of an index scan is N + log2(N).
+        ** The log2(N) is for the initial seek to the beginning and the N
+        ** is for the scan itself. */
+        pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize);
          if( m==0 ){
            /* TUNING: Cost of a covering index scan is K*(N + log2(N)).
            **  +  The extra factor K of between 1.1 and 3.0 that depends
            **     on the relative sizes of the table and the index.  K
            **     is smaller for smaller indices, thus favoring them.
+          **     The upper bound on K (3.0) matches the penalty factor
+          **     on a full table scan that tries to encourage the use of
+          **     indexed lookups over full scans.
            */
-          pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 1 +
-                        (15*pProbe->szIdxRow)/pTab->szTabRow;
+          pNew->rRun +=  1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
          }else{
-          /* TUNING: Cost of scanning a non-covering index is (N+1)*log2(N)
-          ** which we will simplify to just N*log2(N) */
-          pNew->rRun = rSize + rLogSize;
+          /* TUNING: The cost of scanning a non-covering index is multiplied
+          ** by log2(N) to account for the binary search of the main table
+          ** that must happen for each row of the index.
+          ** TODO: Should there be a multiplier here, analogous to the 3x
+          ** multiplier for a fulltable scan or covering index scan, to
+          ** further discourage the use of an index scan?  Or is the log2(N)
+          ** term sufficient discouragement?
+          ** TODO: What if some or all of the WHERE clause terms can be
+          ** computed without reference to the original table.  Then the
+          ** penality should reduce to logK where K is the number of output
+          ** rows.
+          */
+          pNew->rRun += rLogSize;
          }
          whereLoopOutputAdjust(pWC, pNew);
          rc = whereLoopInsert(pBuilder, pNew);
@@ -5041,11 +5057,19 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
                         pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
                         iLoop, pWLoop, &revMask);
            if( isOrdered>=0 && isOrdered<nOrderBy ){
-            /* TUNING: Estimated cost of sorting cost as roughly N*log(N).
-            ** If some but not all of the columns are in sorted order, then
-            ** scale down the log(N) term. */
-            LogEst rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy);
-            LogEst rSortCost = nRowEst + estLog(nRowEst) + rScale - 66;
+            /* TUNING: Estimated cost of sorting is N*log(N).
+            ** If the order-by clause has X terms but only the last Y terms
+            ** are out of order, then block-sorting will reduce the sorting
+            ** cost to N*log(N)*log(Y/X).  The log(Y/X) term is computed
+            ** by rScale.
+            ** TODO: Should the sorting cost get a small multiplier to help
+            ** discourage the use of sorting and encourage the use of index
+            ** scans instead?
+            */
+            LogEst rScale, rSortCost;
+            assert( nOrderBy>0 );
+            rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy) - 66;
+            rSortCost = nRowEst + estLog(nRowEst) + rScale;
              /* TUNING: The cost of implementing DISTINCT using a B-TREE is
              ** also N*log(N) but it has a larger constant of proportionality.
              ** Multiply by 3.0. */
author	drh <drh@noemail.net>
	Thu, 27 Mar 2014 18:36:34 +0000 (18:36 +0000)
committer	drh <drh@noemail.net>
	Thu, 27 Mar 2014 18:36:34 +0000 (18:36 +0000)
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/where.c		patch \| blob \| blame \| history