From: drh Date: Thu, 20 Aug 2009 13:45:07 +0000 (+0000) Subject: Incremental code and comment cleanup in where.c. There is more to be done. X-Git-Tag: fts3-refactor~243 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cdaca55eb15f65ab909589922c052d9d844365a4;p=thirdparty%2Fsqlite.git Incremental code and comment cleanup in where.c. There is more to be done. FossilOrigin-Name: 4a5d9550bdc08633535a7869d7748f56ac3e9a36 --- diff --git a/manifest b/manifest index c92e1d2d90..432e1a01c5 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Set\sthe\s"type"\scorrectly\sof\sbuilt-in\sBINARY\scollating\ssequences\sfor\sUTF16. -D 2009-08-20T02:49:31 +C Incremental\scode\sand\scomment\scleanup\sin\swhere.c.\s\sThere\sis\smore\sto\sbe\sdone. +D 2009-08-20T13:45:08 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 0f7761c5d1c62ae7a841e3393ffaff1fa0f5c00a F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -166,7 +166,7 @@ F src/select.c 67b0778c9585905c8aa75aaa469e76ef3c1d315a F src/shell.c db2643650b9268df89a4bedca3f1c6d9e786f1bb F src/sqlite.h.in 3ccf717d82101f19548d0b1243f0a6f4854d51ee F src/sqlite3ext.h 1db7d63ab5de4b3e6b83dd03d1a4e64fef6d2a17 -F src/sqliteInt.h 26356ea41d8a6d0c3438d1b483b8633dc5e91923 +F src/sqliteInt.h 20ab1da1a9a652ea673e5bc586382143914381c0 F src/sqliteLimit.h ffe93f5a0c4e7bd13e70cd7bf84cfb5c3465f45d F src/status.c 237b193efae0cf6ac3f0817a208de6c6c6ef6d76 F src/table.c cc86ad3d6ad54df7c63a3e807b5783c90411a08d @@ -217,7 +217,7 @@ F src/vdbeblob.c a3f3e0e877fc64ea50165eec2855f5ada4477611 F src/vdbemem.c c4a5188ff43692f2ca78d3539ad4877e14b70712 F src/vtab.c aedd76e8670d5a5379f93804398d3ba960125547 F src/walker.c 1edca756275f158b80f20eb6f104c8d3fcc96a04 -F src/where.c e43ddc772b4cab29f07f236864ff6d9425dc05df +F src/where.c 02f2bb999fa80df9399b5a906d2ce988b2e85541 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 14165b3e32715b700b5f0cbf8f6e3833dda0be45 @@ -750,14 +750,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl 672f81d693a03f80f5ae60bfefacd8a349e76746 -P 4ee44322ca3c92ed8d6f5d4a3f89d219bf379595 -R a229bfa6fe4f806b055faae10403d403 +P 167644f33c949b532655c2297aedf13f93876396 +R fb372557659e26b35bbe68ecf447e1ca U drh -Z e854e162390a77ab2c2b41dc6df78930 +Z 24339a8c1ef1b0f0335c1d9d08b33691 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFKjLm+oxKgR168RlERAsLHAJwN2kz7PF+0ZFK6UY6zOKEcdbVNmACfRdSQ -gRzico4HrGZ+RZD/AC3jBKY= -=Uer1 +iD8DBQFKjVNnoxKgR168RlERAlJzAJwNEG76YvhUi3RkLEp7KwaZArgnPQCghw3J +pSL+ypKyYegD2N/rATGSaX4= +=lyU7 -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 581f18bc35..b7291a4b4f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -167644f33c949b532655c2297aedf13f93876396 \ No newline at end of file +4a5d9550bdc08633535a7869d7748f56ac3e9a36 \ No newline at end of file diff --git a/src/sqliteInt.h b/src/sqliteInt.h index bcc4a64301..4eda0119b4 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -306,7 +306,7 @@ # define double sqlite_int64 # define LONGDOUBLE_TYPE sqlite_int64 # ifndef SQLITE_BIG_DBL -# define SQLITE_BIG_DBL (((sqlite3_int64)1)<<60) +# define SQLITE_BIG_DBL (((sqlite3_int64)1)<<50) # endif # define SQLITE_OMIT_DATETIME_FUNCS 1 # define SQLITE_OMIT_TRACE 1 diff --git a/src/where.c b/src/where.c index fb3032ef7e..c9da247744 100644 --- a/src/where.c +++ b/src/where.c @@ -1922,11 +1922,15 @@ static int whereRangeRegion( if( aSample[i].eType==SQLITE_NULL ) continue; if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break; } - }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){ + }else{ sqlite3 *db = pParse->db; CollSeq *pColl; const u8 *z; int n; + + /* pVal comes from sqlite3ValueFromExpr() so the type cannot be NULL */ + assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB ); + if( eType==SQLITE_BLOB ){ z = (const u8 *)sqlite3_value_blob(pVal); pColl = db->pDfltColl; @@ -1988,7 +1992,7 @@ static int whereRangeRegion( ** pLower pUpper ** ** If the upper or lower bound is not present, then NULL should be passed in -** place of a WhereTerm. +** place of the corresponding WhereTerm. ** ** The nEq parameter is passed the index of the index column subject to the ** range constraint. Or, equivalently, the number of equality constraints @@ -2012,12 +2016,12 @@ static int whereRangeRegion( ** constraints. */ static int whereRangeScanEst( - Parse *pParse, - Index *p, - int nEq, - WhereTerm *pLower, - WhereTerm *pUpper, - int *piEst /* OUT: Return value */ + Parse *pParse, /* Parsing & code generating context */ + Index *p, /* The index containing the range-compared column; "x" */ + int nEq, /* index into p->aCol[] of the range-compared column */ + WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + int *piEst /* OUT: Return value */ ){ int rc = SQLITE_OK; @@ -2108,12 +2112,12 @@ static void bestBtreeIndex( Index *pIdx; /* Copy of pProbe, or zero for IPK index */ int eqTermMask; /* Current mask of valid equality operators */ int idxEqTermMask; /* Index mask of valid equality operators */ + Index sPk; /* A fake index object for the primary key */ + unsigned int aiRowEstPk[2]; /* The aiRowEst[] value for the sPk index */ + int aiColumnPk = -1; /* The aColumn[] value for the sPk index */ + int wsFlagMask; /* Allowed flags in pCost->plan.wsFlag */ - Index pk; - unsigned int pkint[2] = {1000000, 1}; - int pkicol = -1; - int wsFlagMask; - + /* Initialize the cost to a worst-case value */ memset(pCost, 0, sizeof(*pCost)); pCost->rCost = SQLITE_BIG_DBL; @@ -2129,24 +2133,36 @@ static void bestBtreeIndex( } if( pSrc->pIndex ){ + /* An INDEXED BY clause specifies a particular index to use */ pIdx = pProbe = pSrc->pIndex; wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE); eqTermMask = idxEqTermMask; }else{ - Index *pFirst = pSrc->pTab->pIndex; - memset(&pk, 0, sizeof(Index)); - pk.nColumn = 1; - pk.aiColumn = &pkicol; - pk.aiRowEst = pkint; - pk.onError = OE_Replace; - pk.pTable = pSrc->pTab; + /* There is no INDEXED BY clause. Create a fake Index object to + ** represent the primary key */ + Index *pFirst; /* Any other index on the table */ + memset(&sPk, 0, sizeof(Index)); + sPk.nColumn = 1; + sPk.aiColumn = &aiColumnPk; + sPk.aiRowEst = aiRowEstPk; + aiRowEstPk[1] = 1; + sPk.onError = OE_Replace; + sPk.pTable = pSrc->pTab; + pFirst = pSrc->pTab->pIndex; if( pSrc->notIndexed==0 ){ - pk.pNext = pFirst; + sPk.pNext = pFirst; } - if( pFirst && pFirst->aiRowEst ){ - pkint[0] = pFirst->aiRowEst[0]; + /* The aiRowEstPk[0] is an estimate of the total number of rows in the + ** table. Get this information from the ANALYZE information if it is + ** available. If not available, assume the table 1 million rows in size. + */ + if( pFirst ){ + assert( pFirst->aiRowEst!=0 ); /* Allocated together with pFirst */ + aiRowEstPk[0] = pFirst->aiRowEst[0]; + }else{ + aiRowEstPk[0] = 1000000; } - pProbe = &pk; + pProbe = &sPk; wsFlagMask = ~( WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE ); @@ -2154,7 +2170,8 @@ static void bestBtreeIndex( pIdx = 0; } - + /* Loop over all indices looking for the best one to use + */ for(; pProbe; pIdx=pProbe=pProbe->pNext){ const unsigned int * const aiRowEst = pProbe->aiRowEst; double cost; /* Cost of using pProbe */ @@ -2194,11 +2211,11 @@ static void bestBtreeIndex( ** Set to true if there was at least one "x IN (SELECT ...)" term used ** in determining the value of nInMul. ** - ** nBound: - ** Set based on whether or not there is a range constraint on the - ** (nEq+1)th column of the index. 1 if there is neither an upper or - ** lower bound, 3 if there is an upper or lower bound, or 9 if there - ** is both an upper and lower bound. + ** nBound: + ** An estimate on the amount of the table that must be searched due + ** to a range constraint. The value is between 1 and 9 and indicates + ** 9ths of the table. 1 means that about 1/9th of the is searched. + ** 9 indicates that the entire table is searched. ** ** bSort: ** Boolean. True if there is an ORDER BY clause that will require an @@ -2307,36 +2324,43 @@ static void bestBtreeIndex( } } -#if 0 - if( bInEst && (nInMul*aiRowEst[nEq])>(aiRowEst[0]/2) ){ - nInMul = aiRowEst[0] / (2 * aiRowEst[nEq]); - } - nRow = (double)(aiRowEst[nEq] * nInMul) / nBound; - cost = (nEq>0) * nInMul * estLog(aiRowEst[0]) - + nRow - + bSort * nRow * estLog(nRow) - + bLookup * nRow * estLog(aiRowEst[0]); -#else - - /* The following block calculates nRow and cost for the index scan - ** in the same way as SQLite versions 3.6.17 and earlier. Some elements - ** of this calculation are difficult to justify. But using this strategy - ** works well in practice and causes the test suite to pass. */ + /**** Begin adding up the cost of using this index (Needs improvements) + ** + ** Estimate the number of rows of output. For an IN operator, + ** do not let the estimate exceed half the rows in the table. + */ nRow = (double)(aiRowEst[nEq] * nInMul); if( bInEst && nRow*2>aiRowEst[0] ){ nRow = aiRowEst[0]/2; nInMul = nRow / aiRowEst[nEq]; } + + /* Assume constant cost to access a row and logarithmic cost to + ** do a binary search. Hence, the initial cost is the number of output + ** rows plus log2(table-size) times the number of binary searches. + */ cost = nRow + nInMul*estLog(aiRowEst[0]); - nRow = nRow * (double)nBound / 9.0; - cost = cost * (double)nBound / 9.0; + + /* Adjust the number of rows and the cost downward to reflect rows + ** that are excluded by range constraints. + */ + nRow = nRow * (double)nBound / (double)9; + cost = cost * (double)nBound / (double)9; + + /* Add in the estimated cost of sorting the result + */ if( bSort ){ cost += cost*estLog(cost); } + + /* If all information can be taken directly from the index, we avoid + ** doing table lookups. This reduces the cost by half. (Not really - + ** this needs to be fixed.) + */ if( pIdx && bLookup==0 ){ - cost /= 2; + cost /= (double)2; } -#endif + /**** Cost of using this index has now been computed ****/ WHERETRACE(( "tbl=%s idx=%s nEq=%d nInMul=%d nBound=%d bSort=%d bLookup=%d" @@ -2345,6 +2369,9 @@ static void bestBtreeIndex( nEq, nInMul, nBound, bSort, bLookup, wsFlags, nRow, cost )); + /* If this index is the best we have seen so far, then record this + ** index and its cost in the pCost structure. + */ if( (!pIdx || wsFlags) && costrCost ){ pCost->rCost = cost; pCost->nRow = nRow; @@ -2354,7 +2381,11 @@ static void bestBtreeIndex( pCost->plan.u.pIdx = pIdx; } + /* If there was an INDEXED BY clause, then only that one index is + ** considered. */ if( pSrc->pIndex ) break; + + /* Reset masks for the next index in the loop */ wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE); eqTermMask = idxEqTermMask; }