From: dan Date: Thu, 26 Jun 2014 20:21:46 +0000 (+0000) Subject: Attempt to use sqlite_stat4 data to estimate the number of rows visited by a range... X-Git-Tag: version-3.8.6~92^2~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b0b8290ecb57a9b535a3ccc7ad1b6d499f5d63dc;p=thirdparty%2Fsqlite.git Attempt to use sqlite_stat4 data to estimate the number of rows visited by a range query that uses a skip-scan. This code is largely untested. FossilOrigin-Name: 01dc8102592427b71a18c2cb82301d2266dd59c2 --- diff --git a/manifest b/manifest index fea0a6e2cc..5a780681cb 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sshowstat4.c\sso\sthat\sit\sdecodes\stypecodes\s8\sand\s9\scorrectly. -D 2014-06-24T20:19:21.030 +C Attempt\sto\suse\ssqlite_stat4\sdata\sto\sestimate\sthe\snumber\sof\srows\svisited\sby\sa\srange\squery\sthat\suses\sa\sskip-scan.\sThis\scode\sis\slargely\suntested. +D 2014-06-26T20:21:46.005 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -227,7 +227,7 @@ F src/shell.c 56de2dfa3f25def4bf03098f7e2256fbb42f6e3c F src/sqlite.h.in a98eb3e8c86c934ea6f5bcfc6b69653dde2f4ed4 F src/sqlite3.rc 11094cc6a157a028b301a9f06b3d03089ea37c3e F src/sqlite3ext.h 886f5a34de171002ad46fae8c36a7d8051c190fc -F src/sqliteInt.h fccdc735c27b3dc12322fec7cdad8bc76be8d00b +F src/sqliteInt.h e88614d7371b80ff69dbbb5e4b9813ee93dfd890 F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 7ac05a5c7017d0b9f0b4bcd701228b784f987158 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e @@ -289,14 +289,14 @@ F src/vdbeInt.h e6d83e5bfd62fc6685ba1ed6153f7099f82de9f7 F src/vdbeapi.c 0ed6053f947edd0b30f64ce5aeb811872a3450a4 F src/vdbeaux.c e493f38758c4b8f4ca2007cf6a700bd405d192f3 F src/vdbeblob.c 9205ce9d3b064d9600f8418a897fc88b5687d9ac -F src/vdbemem.c 6fc77594c60f6155404f3f8d71bf36d1fdeb4447 +F src/vdbemem.c 8f28cb5bdd5b8748dba67aab5a07a47386fe40dc F src/vdbesort.c 44441d73b08b3a638dcdb725afffb87c6574ad27 F src/vdbetrace.c 6f52bc0c51e144b7efdcfb2a8f771167a8816767 F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd F src/wal.c 264df50a1b33124130b23180ded2e2c5663c652a F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45 -F src/where.c 7b9e13cff91a2f14ac61e6a1bc3a83b5113e6298 +F src/where.c 643abd2dce6650d537c240c1ecdfc4090271091f F src/whereInt.h 929c1349b5355fd44f22cee5c14d72b3329c58a6 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 @@ -1180,7 +1180,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b4d9f6053d1d95fdc1eab8ce610b51e7df8d896d -R 508dd98757238bd14305734cabeeae37 -U drh -Z 96b01d8567714808bc64d5917b4d4853 +P 9ca737c0b41f87998d842e7772c3e483bb291c50 +R 48ab9caf5628261ea5cdb35094645ef7 +T *branch * stat4-skipscan +T *sym-stat4-skipscan * +T -sym-trunk * +U dan +Z a77d7df51663ffd17fb07ab0f5d5231a diff --git a/manifest.uuid b/manifest.uuid index 5313f68aef..4422476212 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9ca737c0b41f87998d842e7772c3e483bb291c50 \ No newline at end of file +01dc8102592427b71a18c2cb82301d2266dd59c2 \ No newline at end of file diff --git a/src/sqliteInt.h b/src/sqliteInt.h index cc3a030510..7e347c4ee2 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -3431,7 +3431,9 @@ void sqlite3BackupUpdate(sqlite3_backup *, Pgno, const u8 *); #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 void sqlite3AnalyzeFunctions(void); int sqlite3Stat4ProbeSetValue(Parse*,Index*,UnpackedRecord**,Expr*,u8,int,int*); +int sqlite3Stat4ValueFromExpr(Parse*, Expr*, u8, sqlite3_value**); void sqlite3Stat4ProbeFree(UnpackedRecord*); +int sqlite3Stat4Column(sqlite3*, const void*, int, int, sqlite3_value**); #endif /* diff --git a/src/vdbemem.c b/src/vdbemem.c index 2c4aa4ad7f..67dea9c282 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -1152,6 +1152,50 @@ void sqlite3AnalyzeFunctions(void){ } } +static int stat4ValueFromExpr( + Parse *pParse, /* Parse context */ + Expr *pExpr, /* The expression to extract a value from */ + u8 affinity, /* Affinity to use */ + struct ValueNewStat4Ctx *pAlloc,/* How to allocate space */ + sqlite3_value **ppVal /* OUT: New value object (or NULL) */ +){ + int rc = SQLITE_OK; + sqlite3_value *pVal = 0; + sqlite3 *db = pParse->db; + + /* Skip over any TK_COLLATE nodes */ + pExpr = sqlite3ExprSkipCollate(pExpr); + + if( !pExpr ){ + pVal = valueNew(db, pAlloc); + if( pVal ){ + sqlite3VdbeMemSetNull((Mem*)pVal); + } + }else if( pExpr->op==TK_VARIABLE + || NEVER(pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE) + ){ + Vdbe *v; + int iBindVar = pExpr->iColumn; + sqlite3VdbeSetVarmask(pParse->pVdbe, iBindVar); + if( (v = pParse->pReprepare)!=0 ){ + pVal = valueNew(db, pAlloc); + if( pVal ){ + rc = sqlite3VdbeMemCopy((Mem*)pVal, &v->aVar[iBindVar-1]); + if( rc==SQLITE_OK ){ + sqlite3ValueApplyAffinity(pVal, affinity, ENC(db)); + } + pVal->db = pParse->db; + } + } + }else{ + rc = valueFromExpr(db, pExpr, ENC(db), affinity, &pVal, pAlloc); + } + + assert( pVal==0 || pVal->db==db ); + *ppVal = pVal; + return rc; +} + /* ** This function is used to allocate and populate UnpackedRecord ** structures intended to be compared against sample index keys stored @@ -1191,47 +1235,76 @@ int sqlite3Stat4ProbeSetValue( int iVal, /* Array element to populate */ int *pbOk /* OUT: True if value was extracted */ ){ - int rc = SQLITE_OK; + int rc; sqlite3_value *pVal = 0; - sqlite3 *db = pParse->db; - - struct ValueNewStat4Ctx alloc; + alloc.pParse = pParse; alloc.pIdx = pIdx; alloc.ppRec = ppRec; alloc.iVal = iVal; - /* Skip over any TK_COLLATE nodes */ - pExpr = sqlite3ExprSkipCollate(pExpr); + rc = stat4ValueFromExpr(pParse, pExpr, affinity, &alloc, &pVal); + assert( pVal==0 || pVal->db==pParse->db ); + *pbOk = (pVal!=0); + return rc; +} - if( !pExpr ){ - pVal = valueNew(db, &alloc); - if( pVal ){ - sqlite3VdbeMemSetNull((Mem*)pVal); +/* +** Attempt to extract a value from expression pExpr using the methods +** as described for sqlite3Stat4ProbeSetValue() above. +** +** If successful, set *ppVal to point to a new value object and return +** SQLITE_OK. If no value can be extracted, but no other error occurs +** (e.g. OOM), return SQLITE_OK and set *ppVal to NULL. Or, if an error +** does occur, return an SQLite error code. The final value of *ppVal +** is undefined in this case. +*/ +int sqlite3Stat4ValueFromExpr( + Parse *pParse, /* Parse context */ + Expr *pExpr, /* The expression to extract a value from */ + u8 affinity, /* Affinity to use */ + sqlite3_value **ppVal /* OUT: New value object (or NULL) */ +){ + return stat4ValueFromExpr(pParse, pExpr, affinity, 0, ppVal); +} + +int sqlite3Stat4Column( + sqlite3 *db, /* Database handle */ + const void *pRec, /* Pointer to buffer containing record */ + int nRec, /* Size of buffer pRec in bytes */ + int iCol, /* Column to extract */ + sqlite3_value **ppVal /* OUT: Extracted value */ +){ + int rc = SQLITE_OK; + Mem *pMem = *ppVal; + if( pMem==0 ){ + pMem = (Mem*)sqlite3ValueNew(db); + if( pMem==0 ){ + rc = SQLITE_NOMEM; } - }else if( pExpr->op==TK_VARIABLE - || NEVER(pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE) - ){ - Vdbe *v; - int iBindVar = pExpr->iColumn; - sqlite3VdbeSetVarmask(pParse->pVdbe, iBindVar); - if( (v = pParse->pReprepare)!=0 ){ - pVal = valueNew(db, &alloc); - if( pVal ){ - rc = sqlite3VdbeMemCopy((Mem*)pVal, &v->aVar[iBindVar-1]); - if( rc==SQLITE_OK ){ - sqlite3ValueApplyAffinity(pVal, affinity, ENC(db)); - } - pVal->db = pParse->db; - } + } + + if( rc==SQLITE_OK ){ + u32 t; + int nHdr; + int iHdr; + int iField; + int i; + u8 *a = (u8*)pRec; + + iHdr = getVarint32(a, nHdr); + iField = nHdr; + for(i=0; idb==db ); + *ppVal = pMem; return rc; } diff --git a/src/where.c b/src/where.c index fd5831872b..a500b5a024 100644 --- a/src/where.c +++ b/src/where.c @@ -18,6 +18,7 @@ */ #include "sqliteInt.h" #include "whereInt.h" +#include "vdbeInt.h" /* ** Return the estimated number of output rows from a WHERE clause @@ -1998,6 +1999,99 @@ static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ return nRet; } +/* +** This function is called to estimate the number of rows visited by a +** range-scan on a skip-scan index. For example: +** +** CREATE INDEX i1 ON t1(a, b, c); +** SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?; +** +** Value pLoop->nOut is currently set to the estimated number of rows +** visited for scanning (a=? AND b=?). This function reduces that estimate +** by some factor to account for the (c BETWEEN ? AND ?) expression based +** on the stat4 data for the index. this scan will be peformed multiple +** times (once for each (a,b) combination that matches a=?) is dealt with +** by the caller. +** +** It does this by scanning through all stat4 samples, comparing values +** extracted from pLower and pUpper with the corresponding column in each +** sample. If L and U are the number of samples found to be less than or +** equal to the values extracted from pLower and pUpper respectively, and +** N is the total number of samples, the pLoop->nOut value is adjusted +** as follows: +** +** nOut = nOut * ( min(U - L, 1) / N ) +** +** If pLower is NULL, or a value cannot be extracted from the term, L is +** set to zero. If pUpper is NULL, or a value cannot be extracted from it, +** U is set to N. +** +** Normally, this function sets *pbDone to 1 before returning. However, +** if no value can be extracted from either pLower or pUpper (and so the +** estimate of the number of rows delivered remains unchanged), *pbDone +** is left as is. +** +** If an error occurs, an SQLite error code is returned. Otherwise, +** SQLITE_OK. +*/ +static int whereRangeSkipScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + WhereLoop *pLoop, /* Update the .nOut value of this loop */ + int *pbDone /* Set to true if at least one expr. value extracted */ +){ + Index *p = pLoop->u.btree.pIndex; + int nEq = pLoop->u.btree.nEq; + sqlite3 *db = pParse->db; + int nLower = 0; + int nUpper = 0; + int rc = SQLITE_OK; + u8 aff = p->pTable->aCol[ p->aiColumn[nEq] ].affinity; + CollSeq *pColl; + + sqlite3_value *p1 = 0; /* Value extracted from pLower */ + sqlite3_value *p2 = 0; /* Value extracted from pUpper */ + sqlite3_value *pVal = 0; /* Value extracted from record */ + + pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]); + if( pLower ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1); + } + if( pUpper && rc==SQLITE_OK ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); + } + + if( p1 || p2 ){ + int i; + int nDiff; + for(i=0; rc==SQLITE_OK && inSample; i++){ + rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal); + if( rc==SQLITE_OK && p1 ){ + int res = sqlite3MemCompare(p1, pVal, pColl); + if( res<=0 ) nLower++; + } + if( rc==SQLITE_OK && p2 ){ + int res = sqlite3MemCompare(p2, pVal, pColl); + if( res<=0 ) nUpper++; + } + } + if( p2==0 ) nUpper = p->nSample; + nDiff = (nUpper - nLower); + if( nDiff<=0 ) nDiff = 1; + pLoop->nOut -= (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); + *pbDone = 1; + }else{ + assert( *pbDone==0 ); + } + + sqlite3ValueFree(p1); + sqlite3ValueFree(p2); + sqlite3ValueFree(pVal); + + return rc; +} + /* ** This function is used to estimate the number of rows that will be visited ** by scanning an index for a range of values. The range may have an upper @@ -2054,95 +2148,100 @@ static int whereRangeScanEst( int nEq = pLoop->u.btree.nEq; if( p->nSample>0 - && nEq==pBuilder->nRecValid && nEqnSampleCol && OptimizationEnabled(pParse->db, SQLITE_Stat3) ){ - UnpackedRecord *pRec = pBuilder->pRec; - tRowcnt a[2]; - u8 aff; - - /* Variable iLower will be set to the estimate of the number of rows in - ** the index that are less than the lower bound of the range query. The - ** lower bound being the concatenation of $P and $L, where $P is the - ** key-prefix formed by the nEq values matched against the nEq left-most - ** columns of the index, and $L is the value in pLower. - ** - ** Or, if pLower is NULL or $L cannot be extracted from it (because it - ** is not a simple variable or literal value), the lower bound of the - ** range is $P. Due to a quirk in the way whereKeyStats() works, even - ** if $L is available, whereKeyStats() is called for both ($P) and - ** ($P:$L) and the larger of the two returned values used. - ** - ** Similarly, iUpper is to be set to the estimate of the number of rows - ** less than the upper bound of the range query. Where the upper bound - ** is either ($P) or ($P:$U). Again, even if $U is available, both values - ** of iUpper are requested of whereKeyStats() and the smaller used. - */ - tRowcnt iLower; - tRowcnt iUpper; + if( nEq==pBuilder->nRecValid ){ + UnpackedRecord *pRec = pBuilder->pRec; + tRowcnt a[2]; + u8 aff; + + /* Variable iLower will be set to the estimate of the number of rows in + ** the index that are less than the lower bound of the range query. The + ** lower bound being the concatenation of $P and $L, where $P is the + ** key-prefix formed by the nEq values matched against the nEq left-most + ** columns of the index, and $L is the value in pLower. + ** + ** Or, if pLower is NULL or $L cannot be extracted from it (because it + ** is not a simple variable or literal value), the lower bound of the + ** range is $P. Due to a quirk in the way whereKeyStats() works, even + ** if $L is available, whereKeyStats() is called for both ($P) and + ** ($P:$L) and the larger of the two returned values used. + ** + ** Similarly, iUpper is to be set to the estimate of the number of rows + ** less than the upper bound of the range query. Where the upper bound + ** is either ($P) or ($P:$U). Again, even if $U is available, both values + ** of iUpper are requested of whereKeyStats() and the smaller used. + */ + tRowcnt iLower; + tRowcnt iUpper; - if( nEq==p->nKeyCol ){ - aff = SQLITE_AFF_INTEGER; - }else{ - aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; - } - /* Determine iLower and iUpper using ($P) only. */ - if( nEq==0 ){ - iLower = 0; - iUpper = sqlite3LogEstToInt(p->aiRowLogEst[0]); - }else{ - /* Note: this call could be optimized away - since the same values must - ** have been requested when testing key $P in whereEqualScanEst(). */ - whereKeyStats(pParse, p, pRec, 0, a); - iLower = a[0]; - iUpper = a[0] + a[1]; - } - - /* If possible, improve on the iLower estimate using ($P:$L). */ - if( pLower ){ - int bOk; /* True if value is extracted from pExpr */ - Expr *pExpr = pLower->pExpr->pRight; - assert( (pLower->eOperator & (WO_GT|WO_GE))!=0 ); - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); - if( rc==SQLITE_OK && bOk ){ - tRowcnt iNew; + if( nEq==p->nKeyCol ){ + aff = SQLITE_AFF_INTEGER; + }else{ + aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; + } + /* Determine iLower and iUpper using ($P) only. */ + if( nEq==0 ){ + iLower = 0; + iUpper = sqlite3LogEstToInt(p->aiRowLogEst[0]); + }else{ + /* Note: this call could be optimized away - since the same values must + ** have been requested when testing key $P in whereEqualScanEst(). */ whereKeyStats(pParse, p, pRec, 0, a); - iNew = a[0] + ((pLower->eOperator & WO_GT) ? a[1] : 0); - if( iNew>iLower ) iLower = iNew; - nOut--; + iLower = a[0]; + iUpper = a[0] + a[1]; } - } - /* If possible, improve on the iUpper estimate using ($P:$U). */ - if( pUpper ){ - int bOk; /* True if value is extracted from pExpr */ - Expr *pExpr = pUpper->pExpr->pRight; - assert( (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); - if( rc==SQLITE_OK && bOk ){ - tRowcnt iNew; - whereKeyStats(pParse, p, pRec, 1, a); - iNew = a[0] + ((pUpper->eOperator & WO_LE) ? a[1] : 0); - if( iNewpExpr->pRight; + assert( (pLower->eOperator & (WO_GT|WO_GE))!=0 ); + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); + if( rc==SQLITE_OK && bOk ){ + tRowcnt iNew; + whereKeyStats(pParse, p, pRec, 0, a); + iNew = a[0] + ((pLower->eOperator & WO_GT) ? a[1] : 0); + if( iNew>iLower ) iLower = iNew; + nOut--; + } } - } - pBuilder->pRec = pRec; - if( rc==SQLITE_OK ){ - if( iUpper>iLower ){ - nNew = sqlite3LogEst(iUpper - iLower); - }else{ - nNew = 10; assert( 10==sqlite3LogEst(2) ); + /* If possible, improve on the iUpper estimate using ($P:$U). */ + if( pUpper ){ + int bOk; /* True if value is extracted from pExpr */ + Expr *pExpr = pUpper->pExpr->pRight; + assert( (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); + if( rc==SQLITE_OK && bOk ){ + tRowcnt iNew; + whereKeyStats(pParse, p, pRec, 1, a); + iNew = a[0] + ((pUpper->eOperator & WO_LE) ? a[1] : 0); + if( iNewpRec = pRec; + if( rc==SQLITE_OK ){ + if( iUpper>iLower ){ + nNew = sqlite3LogEst(iUpper - iLower); + }else{ + nNew = 10; assert( 10==sqlite3LogEst(2) ); + } + if( nNewnOut = (LogEst)nOut; + WHERETRACE(0x10, ("range scan regions: %u..%u est=%d\n", + (u32)iLower, (u32)iUpper, nOut)); + return SQLITE_OK; } - pLoop->nOut = (LogEst)nOut; - WHERETRACE(0x10, ("range scan regions: %u..%u est=%d\n", - (u32)iLower, (u32)iUpper, nOut)); - return SQLITE_OK; + }else{ + int bDone = 0; + rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone); + if( bDone ) return rc; } } #else