From 0c50fa0f61724035b6be561c17e50dc0407d4305 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 21 Jan 2011 16:27:18 +0000 Subject: [PATCH] Make use of histogram data to make better estimates for the number of rows that will be returned from "x IN (v1,v2,v3,...)" constraints. FossilOrigin-Name: fd3977a27ae68e694df12a4713e55515c1e87c5d --- manifest | 18 ++++----- manifest.uuid | 2 +- src/where.c | 103 ++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 97 insertions(+), 26 deletions(-) diff --git a/manifest b/manifest index bb09b610c7..e202477c08 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Add\sthe\sability\sto\suse\sindices\swhen\sa\srange\scontraint\sis\sbounded\son\nthe\slower\send\sby\sNULL. -D 2011-01-21T14:37:04.663 +C Make\suse\sof\shistogram\sdata\sto\smake\sbetter\sestimates\sfor\sthe\snumber\sof\srows\nthat\swill\sbe\sreturned\sfrom\s"x\sIN\s(v1,v2,v3,...)"\sconstraints. +D 2011-01-21T16:27:18.621 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c cf219a4275cf430d0e7df9d2db04e9ba29702f8e +F src/where.c 7f2844afffd9e09373e874a74de81d3502b2a35c F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P c7b59afaf0c0bf85dbaf0a122cc8d65fca93680f -R 07d2f363ee58370384294fd182ee30ff +P f73a167b434fadcbbd15e3891c4b7f4f87f6363c +R 46f7d508c889f9891a64638b5f1737ae U drh -Z 32ed47abc5f3838cb9feba0ba8b99416 +Z 14ba122ed035896c7c1b08aa324c4833 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNOZoToxKgR168RlERAkh6AJ4kec2vRcpVEtGIXoGz4TjpsHbYigCeMtj1 -RYqs8Oaohb8CL1KIU7eSI9Q= -=lXJE +iD8DBQFNObPpoxKgR168RlERAmgEAJ97hcV3wI5jmVOjUrAeDzSnM45gLACghPy2 +7kt0j2FfeGbbS4tWO9hsJaU= +=BYPr -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index a9f1839b77..ad6a13da6f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f73a167b434fadcbbd15e3891c4b7f4f87f6363c \ No newline at end of file +fd3977a27ae68e694df12a4713e55515c1e87c5d \ No newline at end of file diff --git a/src/where.c b/src/where.c index 57552a4cba..8eaef351d7 100644 --- a/src/where.c +++ b/src/where.c @@ -2475,18 +2475,19 @@ range_est_fallback: ** column of an index and sqlite_stat2 histogram data is available ** for that index. ** -** Write the estimated row count into *pnRow. If unable to make -** an estimate, leave *pnRow unchanged. +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. ** ** This routine can fail if it is unable to load a collating sequence ** required for string comparison, or if unable to allocate memory ** for a UTF conversion required for comparison. The error is stored ** in the pParse structure. */ -void whereEqScanEst( +int whereEqualScanEst( Parse *pParse, /* Parsing & code generating context */ Index *p, /* The index whose left-most column is pTerm */ - WhereTerm *pTerm, /* The x=VALUE constraint */ + Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ double *pnRow /* Write the revised row estimate here */ ){ sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ @@ -2496,14 +2497,14 @@ void whereEqScanEst( double nRowEst; /* New estimate of the number of rows */ assert( p->aSample!=0 ); - assert( pTerm->eOperator==WO_EQ ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; - rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs); - if( rc ) goto whereEqScanEst_cancel; + rc = valueFromExpr(pParse, pExpr, aff, &pRhs); + if( rc ) goto whereEqualScanEst_cancel; + if( pRhs==0 ) return SQLITE_NOTFOUND; rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); - if( rc ) goto whereEqScanEst_cancel; + if( rc ) goto whereEqualScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); - if( rc ) goto whereEqScanEst_cancel; + if( rc ) goto whereEqualScanEst_cancel; WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); if( iLower>=iUpper ){ nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); @@ -2513,8 +2514,76 @@ void whereEqScanEst( *pnRow = nRowEst; } -whereEqScanEst_cancel: +whereEqualScanEst_cancel: sqlite3ValueFree(pRhs); + return rc; +} +#endif /* defined(SQLITE_ENABLE_STAT2) */ + +#ifdef SQLITE_ENABLE_STAT2 +/* +** Estimate the number of rows that will be returned based on +** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side +** of the IN operator is a list of values. +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +int whereInScanEst( + Parse *pParse, /* Parsing & code generating context */ + Index *p, /* The index whose left-most column is pTerm */ + ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ + double *pnRow /* Write the revised row estimate here */ +){ + sqlite3_value *pVal = 0; /* One value from list */ + int iLower, iUpper; /* Range of histogram regions containing pRhs */ + u8 aff; /* Column affinity */ + int rc; /* Subfunction return code */ + double nRowEst; /* New estimate of the number of rows */ + int nRegion = 0; /* Number of histogram regions spanned */ + int nSingle = 0; /* Count of values contained within one region */ + int nNotFound = 0; /* Count of values that are not constants */ + int i; /* Loop counter */ + u8 aHit[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ + + assert( p->aSample!=0 ); + aff = p->pTable->aCol[p->aiColumn[0]].affinity; + memset(aHit, 0, sizeof(aHit)); + for(i=0; inExpr; i++){ + sqlite3ValueFree(pVal); + rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); + if( rc ) break; + if( pVal==0 ){ + nNotFound++; + continue; + } + rc = whereRangeRegion(pParse, p, pVal, 0, &iLower); + if( rc ) break; + rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); + if( rc ) break; + if( iLower>=iUpper ){ + nSingle++; + } + assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); + while( iLower<=iUpper ) aHit[iLower++] = 1; + } + if( rc==SQLITE_OK ){ + for(i=nRegion=0; iaiRowEst[0]/(SQLITE_INDEX_SAMPLES+1) + + nNotFound*p->aiRowEst[1]; + if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; + *pnRow = nRowEst; + WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n", + nRegion, nSingle, nNotFound)); + } + sqlite3ValueFree(pVal); + return rc; } #endif /* defined(SQLITE_ENABLE_STAT2) */ @@ -2686,7 +2755,7 @@ static void bestBtreeIndex( int bLookup = 0; WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 - WhereTerm *pFirstEqTerm = 0; /* First WO_EQ term */ + WhereTerm *pFirstTerm = 0; /* First term matching the index */ #endif /* Determine the values of nEq and nInMul */ @@ -2710,9 +2779,7 @@ static void bestBtreeIndex( wsFlags |= WHERE_COLUMN_NULL; } #ifdef SQLITE_ENABLE_STAT2 - else if( nEq==0 && pProbe->aSample ){ - pFirstEqTerm = pTerm; - } + if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm; #endif used |= pTerm->prereqRight; } @@ -2796,8 +2863,12 @@ static void bestBtreeIndex( ** to get a better estimate on the number of rows based on ** VALUE and how common that value is according to the histogram. */ - if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){ - whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow); + if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){ + if( pFirstTerm->eOperator==WO_EQ ){ + whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow); + }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){ + whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow); + } } #endif /* SQLITE_ENABLE_STAT2 */ -- 2.47.3