From: drh Date: Tue, 11 Jun 2013 18:59:38 +0000 (+0000) Subject: Improved processing of DISTINCT. X-Git-Tag: version-3.8.0~130^2~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4f402f26b15902d22f13df5e77cfdc4d8a5622cb;p=thirdparty%2Fsqlite.git Improved processing of DISTINCT. FossilOrigin-Name: ba897100fed291d2025f68d09334f9985312298b --- diff --git a/ext/misc/fuzzer.c b/ext/misc/fuzzer.c index 023bdb1d09..fe41cda8c2 100644 --- a/ext/misc/fuzzer.c +++ b/ext/misc/fuzzer.c @@ -1079,7 +1079,7 @@ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ int i; int seenMatch = 0; const struct sqlite3_index_constraint *pConstraint; - double rCost = 100000; + double rCost = 1e12; pConstraint = pIdxInfo->aConstraint; for(i=0; inConstraint; i++, pConstraint++){ @@ -1095,7 +1095,7 @@ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ iPlan |= 1; pIdxInfo->aConstraintUsage[i].argvIndex = 1; pIdxInfo->aConstraintUsage[i].omit = 1; - rCost /= 1000000.0; + rCost /= 1e6; } if( (iPlan & 2)==0 && pConstraint->iColumn==1 @@ -1132,7 +1132,7 @@ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ ){ pIdxInfo->orderByConsumed = 1; } - if( seenMatch && (iPlan&1)==0 ) rCost *= 1e30; + if( seenMatch && (iPlan&1)==0 ) rCost = 1e99; pIdxInfo->estimatedCost = rCost; return SQLITE_OK; diff --git a/manifest b/manifest index 9e3a53b7f0..bdcdfbf749 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\sParse.nQueryLoop\sstate\svariable\sto\swork\swith\sNGQP. -D 2013-06-11T13:30:04.431 +C Improved\sprocessing\sof\sDISTINCT. +D 2013-06-11T18:59:38.240 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -108,7 +108,7 @@ F ext/icu/icu.c 7538f98eab2854cf17fa5f7797bffa6c76e3863b F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 F ext/misc/amatch.c eae8454cd9dcb287b2a3ec2e65a865a4ac5f0d06 F ext/misc/closure.c 997c20ddf35f85ab399f4a02a557a9baa822ec32 -F ext/misc/fuzzer.c 79ac20b68794dfe03626f3a31681e6c240583052 +F ext/misc/fuzzer.c 136533c53cfce0957f0b48fa11dba27e21c5c01d F ext/misc/ieee754.c 2565ce373d842977efe0922dc50b8a41b3289556 F ext/misc/nextchar.c 1131e2b36116ffc6fe6b2e3464bfdace27978b1e F ext/misc/percentile.c 4fb5e46c4312b0be74e8e497ac18f805f0e3e6c5 @@ -220,7 +220,7 @@ F src/shell.c ab6eea968c8745be3aa74e45fedb37d057b4cd0d F src/sqlite.h.in 5b390ca5d94e09e56e7fee6a51ddde4721b89f8e F src/sqlite3.rc fea433eb0a59f4c9393c8e6d76a6e2596b1fe0c0 F src/sqlite3ext.h d936f797812c28b81b26ed18345baf8db28a21a5 -F src/sqliteInt.h f2e51444e15f7dd35c1ee139f95f43b0bccf3fb5 +F src/sqliteInt.h 026a52f3e24c15603fddb78025e786b96c4c0d2c F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c bedc37ec1a6bb9399944024d63f4c769971955a9 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e @@ -289,7 +289,7 @@ F src/vtab.c b05e5f1f4902461ba9f5fc49bb7eb7c3a0741a83 F src/wal.c 436bfceb141b9423c45119e68e444358ee0ed35d F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c 4fa43583d0a84b48f93b1e88f11adf2065be4e73 -F src/where.c 572357246bee5389751427c670cb0222fe36be78 +F src/where.c 2afa8f1a71dd088b137a7772f5c5dfee70c8fbe8 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/aggnested.test 45c0201e28045ad38a530b5a144b73cd4aa2cfd6 @@ -1095,7 +1095,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/wherecosttest.c 4d0393bdbe7230adb712e925863744dd2b7ffc5b F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac -P b777b1097dcf9dfeb1b86c71e1b5f6918d68c975 -R 1aa8d10751da8bc78a75655bc8318da4 +P f1cac24f06b9c71cfa472fdcf2da4cd8689a7cc3 +R f41c25fe4af487f857bb0cfd93c46d44 U drh -Z 0a42be2e93f4ddc55a00a5d0e3551610 +Z 15fc6bc86393280f1819e7074b4ff66c diff --git a/manifest.uuid b/manifest.uuid index 3db045bf33..64bfde3e8b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f1cac24f06b9c71cfa472fdcf2da4cd8689a7cc3 \ No newline at end of file +ba897100fed291d2025f68d09334f9985312298b \ No newline at end of file diff --git a/src/sqliteInt.h b/src/sqliteInt.h index d3d6fcf70a..b22689462c 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1967,8 +1967,10 @@ struct SrcList { #define WHERE_ONETABLE_ONLY 0x0040 /* Only code the 1st table in pTabList */ #define WHERE_AND_ONLY 0x0080 /* Don't use indices for OR terms */ #define WHERE_GROUPBY 0x0100 /* pOrderBy is really a GROUP BY */ +#define WHERE_DISTINCTBY 0x0200 /* pOrderby is really a DISTINCT clause */ -/* Allowed values for WhereInfo.eDistinct and DistinctCtx.eTnctType */ +/* Allowed return values from sqlite3WhereIsDistinct() +*/ #define WHERE_DISTINCT_NOOP 0 /* DISTINCT keyword not used */ #define WHERE_DISTINCT_UNIQUE 1 /* No duplicates */ #define WHERE_DISTINCT_ORDERED 2 /* All duplicates are adjacent */ @@ -2226,7 +2228,7 @@ struct Parse { /* Information used while coding trigger programs. */ Parse *pToplevel; /* Parse structure for main program (or NULL) */ Table *pTriggerTab; /* Table triggers are being coded for */ - u32 grep nQueryLoop; /* Est number of iterations of a query (10*log2(N)) */ + u32 nQueryLoop; /* Est number of iterations of a query (10*log2(N)) */ u32 oldmask; /* Mask of old.* columns referenced */ u32 newmask; /* Mask of new.* columns referenced */ u8 eTriggerOp; /* TK_UPDATE, TK_INSERT or TK_DELETE */ diff --git a/src/where.c b/src/where.c index d493dba46f..c21f3fc494 100644 --- a/src/where.c +++ b/src/where.c @@ -345,9 +345,11 @@ struct WhereInfo { SrcList *pTabList; /* List of tables in the join */ ExprList *pOrderBy; /* The ORDER BY clause or NULL */ ExprList *pDistinct; /* DISTINCT ON values, or NULL */ + WhereLoop *pLoops; /* List of all WhereLoop objects */ Bitmask revMask; /* Mask of ORDER BY terms that need reversing */ - u16 nOBSat; /* Number of ORDER BY terms satisfied by indices */ + WhereCost nRowOut; /* Estimated number of output rows */ u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ + u8 bOBSat; /* ORDER BY satisfied by indices */ u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE/DELETE */ u8 untestedTerms; /* Not all WHERE terms resolved by outer loop */ u8 eDistinct; /* One of the WHERE_DISTINCT_* values below */ @@ -355,11 +357,9 @@ struct WhereInfo { int iContinue; /* Jump here to continue with next record */ int iBreak; /* Jump here to break out of the loop */ int nLevel; /* Number of nested loop */ + int savedNQueryLoop; /* pParse->nQueryLoop outside the WHERE loop */ WhereMaskSet sMaskSet; /* Map cursor numbers to bitmasks */ WhereClause sWC; /* Decomposition of the WHERE clause */ - WhereLoop *pLoops; /* List of all WhereLoop objects */ - int savedNQueryLoop; /* pParse->nQueryLoop outside the WHERE loop */ - WhereCost nRowOut; /* Estimated number of output rows */ WhereLevel a[1]; /* Information about each nest loop in WHERE */ }; @@ -441,7 +441,7 @@ int sqlite3WhereIsDistinct(WhereInfo *pWInfo){ ** Return FALSE if the output needs to be sorted. */ int sqlite3WhereIsOrdered(WhereInfo *pWInfo){ - return pWInfo->nOBSat>0; + return pWInfo->bOBSat!=0; } /* @@ -1776,15 +1776,16 @@ static int findIndexCol( /* ** Return true if the DISTINCT expression-list passed as the third argument -** is redundant. A DISTINCT list is redundant if the database contains a -** UNIQUE index that guarantees that the result of the query will be distinct -** anyway. +** is redundant. +** +** A DISTINCT list is redundant if the database contains some set of +** columns that are unique and non-null. */ static int isDistinctRedundant( - Parse *pParse, - SrcList *pTabList, - WhereClause *pWC, - ExprList *pDistinct + Parse *pParse, /* Parsing context */ + SrcList *pTabList, /* The FROM clause */ + WhereClause *pWC, /* The WHERE clause */ + ExprList *pDistinct /* The result set that needs to be DISTINCT */ ){ Table *pTab; Index *pIdx; @@ -3418,7 +3419,7 @@ static Bitmask codeOneLoopStart( ** this requires some special handling. */ if( (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)!=0 - && (pWInfo->nOBSat>0) + && (pWInfo->bOBSat!=0) && (pIdx->nColumn>nEq) ){ /* assert( pOrderBy->nExpr==1 ); */ @@ -4493,7 +4494,9 @@ static int whereLoopAddBtree( ){ pNew->iSortIdx = b ? iSortIdx : 0; pNew->nOut = rSize; - pNew->rRun = whereCostAdd(rSize,rLogSize) + ((m==0 && b) ? 10 : 0); + pNew->rRun = whereCostAdd(rSize,rLogSize); + if( m!=0 ) pNew->rRun += rLogSize; + if( b ) pNew->rRun--; rc = whereLoopInsert(pBuilder, pNew); if( rc ) break; } @@ -4804,11 +4807,13 @@ static int whereLoopAddAll(WhereLoopBuilder *pBuilder){ */ static int wherePathSatisfiesOrderBy( WhereInfo *pWInfo, /* The WHERE clause */ + ExprList *pOrderBy, /* ORDER BY or GROUP BY or DISTINCT clause to check */ WherePath *pPath, /* The WherePath to check */ - int nLoop, /* Number of entries in pPath->aLoop[] */ - int isLastLoop, /* True if pLast is the inner-most loop */ + u16 wctrlFlags, /* Might contain WHERE_GROUPBY or WHERE_DISTINCTBY */ + u16 nLoop, /* Number of entries in pPath->aLoop[] */ + u8 isLastLoop, /* True if pLast is the inner-most loop */ WhereLoop *pLast, /* Add this WhereLoop to the end of pPath->aLoop[] */ - Bitmask *pRevMask /* Mask of WhereLoops to run in reverse order */ + Bitmask *pRevMask /* OUT: Mask of WhereLoops to run in reverse order */ ){ u8 revSet; /* True if rev is known */ u8 rev; /* Composite sort order */ @@ -4823,7 +4828,6 @@ static int wherePathSatisfiesOrderBy( int iCur; /* Cursor number for current WhereLoop */ int iColumn; /* A column number within table iCur */ WhereLoop *pLoop; /* Current WhereLoop being processed. */ - ExprList *pOrderBy = pWInfo->pOrderBy; /* the ORDER BY clause */ WhereTerm *pTerm; /* A single term of the WHERE clause */ Expr *pOBExpr; /* An expression from the ORDER BY clause */ CollSeq *pColl; /* COLLATE function from an ORDER BY clause term */ @@ -4960,7 +4964,7 @@ static int wherePathSatisfiesOrderBy( for(i=0; bOnce && ia[i].pExpr); - if( (pWInfo->wctrlFlags & WHERE_GROUPBY)==0 ) bOnce = 0; + if( (wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY))==0 ) bOnce = 0; if( pOBExpr->op!=TK_COLUMN ) continue; if( pOBExpr->iTable!=iCur ) continue; if( pOBExpr->iColumn!=iColumn ) continue; @@ -5111,8 +5115,9 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ rCost = whereCostAdd(rCost, pFrom->rCost); maskNew = pFrom->maskLoop | pWLoop->maskSelf; if( !isOrderedValid ){ - switch( wherePathSatisfiesOrderBy(pWInfo, pFrom, iLoop, iLoop==nLoop-1, - pWLoop, &revMask) ){ + switch( wherePathSatisfiesOrderBy(pWInfo, + pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags, + iLoop, iLoop==nLoop-1, pWLoop, &revMask) ){ case 1: /* Yes. pFrom+pWLoop does satisfy the ORDER BY clause */ isOrdered = 1; isOrderedValid = 1; @@ -5249,9 +5254,22 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ pLevel->iFrom = pWLoop->iTab; /* FIXME: Omit the iFrom field */ pLevel->iTabCur = pWInfo->pTabList->a[pLevel->iFrom].iCursor; } + if( (pWInfo->wctrlFlags & WHERE_DISTINCTBY)==0 + && pWInfo->pDistinct + && nRowEst + ){ + Bitmask notUsed; + int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pDistinct, pFrom, + WHERE_DISTINCTBY, nLoop-1, 1, pFrom->aLoop[nLoop-1], ¬Used); + if( rc==1 ) pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; + } if( pFrom->isOrdered ){ - pWInfo->nOBSat = pWInfo->pOrderBy->nExpr; - pWInfo->revMask = pFrom->revLoop; + if( pWInfo->wctrlFlags & WHERE_DISTINCTBY ){ + pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; + }else{ + pWInfo->bOBSat = 1; + pWInfo->revMask = pFrom->revLoop; + } } pWInfo->nRowOut = pFrom->nRow; @@ -5327,7 +5345,8 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pLoop->maskSelf = getMask(&pWInfo->sMaskSet, iCur); pWInfo->a[0].iTabCur = iCur; pWInfo->nRowOut = 1; - pWInfo->nOBSat = pWInfo->pOrderBy ? pWInfo->pOrderBy->nExpr : 0; + if( pWInfo->pOrderBy ) pWInfo->bOBSat = 1; + if( pWInfo->pDistinct ) pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; #ifdef SQLITE_DEBUG pLoop->cId = '0'; #endif @@ -5414,15 +5433,6 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ ** pOrderBy is a pointer to the ORDER BY clause of a SELECT statement, ** if there is one. If there is no ORDER BY clause or if this routine ** is called from an UPDATE or DELETE statement, then pOrderBy is NULL. -** -** If an index can be used so that the natural output order of the table -** scan is correct for the ORDER BY clause, then that index is used and -** the returned WhereInfo.nOBSat field is set to pOrderBy->nExpr. This -** is an optimization that prevents an unnecessary sort of the result set -** if an index appropriate for the ORDER BY clause already exists. -** -** If the where clause loops cannot be arranged to provide the correct -** output order, then WhereInfo.nOBSat is 0. */ WhereInfo *sqlite3WhereBegin( Parse *pParse, /* The parser context */ @@ -5558,13 +5568,32 @@ WhereInfo *sqlite3WhereBegin( goto whereBeginError; } + /* If the ORDER BY (or GROUP BY) clause contains references to general + ** expressions, then we won't be able to satisfy it using indices, so + ** go ahead and disable it now. + */ + if( pOrderBy ){ + for(ii=0; iinExpr; ii++){ + Expr *pExpr = sqlite3ExprSkipCollate(pOrderBy->a[ii].pExpr); + if( pExpr->op!=TK_COLUMN ){ + pWInfo->pOrderBy = pOrderBy = 0; + break; + } + } + } + /* Check if the DISTINCT qualifier, if there is one, is redundant. ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT. */ - if( pDistinct && isDistinctRedundant(pParse,pTabList,&pWInfo->sWC,pDistinct) ){ - pDistinct = 0; - pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + if( pDistinct ){ + if( isDistinctRedundant(pParse,pTabList,&pWInfo->sWC,pDistinct) ){ + pDistinct = 0; + pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + }else if( pOrderBy==0 ){ + pWInfo->wctrlFlags |= WHERE_DISTINCTBY; + pWInfo->pOrderBy = pDistinct; + } } /* Construct the WhereLoop objects */ @@ -5587,7 +5616,7 @@ WhereInfo *sqlite3WhereBegin( } #endif - wherePathSolver(pWInfo, -1); + wherePathSolver(pWInfo, 0); if( db->mallocFailed ) goto whereBeginError; if( pWInfo->pOrderBy ){ wherePathSolver(pWInfo, pWInfo->nRowOut); @@ -5603,12 +5632,25 @@ WhereInfo *sqlite3WhereBegin( #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace ){ int ii; - sqlite3DebugPrintf("---- Solution"); - if( pWInfo->nOBSat ){ - sqlite3DebugPrintf(" ORDER BY omitted rev=0x%llx\n", pWInfo->revMask); - }else{ - sqlite3DebugPrintf("\n"); + sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); + if( pWInfo->bOBSat ){ + sqlite3DebugPrintf(" ORDERBY=0x%llx", pWInfo->revMask); + } + switch( pWInfo->eDistinct ){ + case WHERE_DISTINCT_UNIQUE: { + sqlite3DebugPrintf(" DISTINCT=unique"); + break; + } + case WHERE_DISTINCT_ORDERED: { + sqlite3DebugPrintf(" DISTINCT=ordered"); + break; + } + case WHERE_DISTINCT_UNORDERED: { + sqlite3DebugPrintf(" DISTINCT=unordered"); + break; + } } + sqlite3DebugPrintf("\n"); for(ii=0; iia[ii].pWLoop, pTabList); }