From: dan Date: Sat, 2 Jul 2011 13:34:15 +0000 (+0000) Subject: Cherrypick [45e581bff7] into the 3.7.2 branch. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0ac2aa4ed006167a7eecf3299c00a233bda7b1ea;p=thirdparty%2Fsqlite.git Cherrypick [45e581bff7] into the 3.7.2 branch. FossilOrigin-Name: c593792ce00d0a6379f31cab1bca22f5e159ce18 --- diff --git a/manifest b/manifest index a6e5543e6a..7e46f3a08a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Backport\scheck-ins\s[0900e35348f4b9bf3]\sand\s[4fead8e714c7e50]\sto\sthe\s3.7.2\nbranch.\s\sThese\scheck-ins\sprovide\shints\sto\sthe\sbtree\slayer\sfor\swhen\sit\sis\npossible\sto\suse\sa\shash\stable\srather\sthan\sa\sbtree\sto\simplement\san\sindex.\nThe\sSQLite\sBTree\slayer\sdoes\snot\suse\sthese\shints,\sbut\salternative\sbtree\slayers\nmight. -D 2011-05-01T22:57:41.307 +C Cherrypick\s[45e581bff7]\sinto\sthe\s3.7.2\sbranch. +D 2011-07-02T13:34:15.849 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 543f91f24cd7fee774ecc0a61c19704c0c3e78fd F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -121,10 +121,10 @@ F src/callback.c da3c38d0ef5d7f04fae371e519bda61aa9cb1704 F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac F src/ctime.c 2e39d3374e785a63117e077bcba9d4a6656df363 F src/date.c 5dd8448a0bfea8d31fb14cff487d0c06ff8c8b20 -F src/delete.c 7ed8a8c8b5f748ece92df173d7e0f7810c899ebd +F src/delete.c daff6cef77fe8ed57b8acfc5ecebce28244af2fa F src/expr.c 21ad2494bd7c0826072216ea0f20db86a0a4ac82 F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb -F src/fkey.c 58bbf52c6ddd3f64ca40a3230f9e548a83a5cb16 +F src/fkey.c d56da9f698067e52a45736e97b17ee01cd849b78 F src/func.c 464b0dc70618b896c402c574eb04bc5eacf35341 F src/global.c 02335177cf6946fe5525c6f0755cf181140debf3 F src/hash.c 458488dcc159c301b8e7686280ab209f1fb915af @@ -168,11 +168,11 @@ F src/printf.c 8ae5082dd38a1b5456030c3755ec3a392cd51506 F src/random.c cd4a67b3953b88019f8cd4ccd81394a8ddfaba50 F src/resolve.c 1c0f32b64f8e3f555fe1f732f9d6f501a7f05706 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 -F src/select.c 567d1aa0fdf7b253d06c5391ed725669773d5b87 +F src/select.c 27ceaf3ae2c493d299adec578bbc9e397ebf2806 F src/shell.c 8517fc1f9c59ae4007e6cc8b9af91ab231ea2056 F src/sqlite.h.in 2d72a6242df41c517e38eec8791abcf5484a36f1 F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89 -F src/sqliteInt.h 81894df40eb6023fc786bb9e4aeef7dcbf54b873 +F src/sqliteInt.h efeea5b4666749c99eceae31194e085c059cdb3e F src/sqliteLimit.h a17dcd3fb775d63b64a43a55c54cb282f9726f44 F src/status.c 496913d4e8441195f6f2a75b1c95993a45b9b30b F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e @@ -214,7 +214,7 @@ F src/test_vfs.c 702e52636113f6b9721da90ef1bf26e07fff414d F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/tokenize.c 604607d6813e9551cf5189d899e0a25c12681080 F src/trigger.c b8bedb9c0084ceb51a40f54fcca2ce048c8de852 -F src/update.c 227e6cd512108b84f69421fc6c7aa1b83d60d6e0 +F src/update.c c6be6a5af1198aeac9b25d842d97e52695ffc9e6 F src/utf.c 1baeeac91707a4df97ccc6141ec0f808278af685 F src/util.c 32aebf04c10e51ad3977a928b7416bed671b620b F src/vacuum.c 241a8386727c1497eba4955933356dfba6ff8c9f @@ -230,7 +230,7 @@ F src/vtab.c 0e8e0cb30dffb078367e843e84e37ef99236c7e4 F src/wal.c 5ac2119e23ee4424599d4275b66dc88d612a0543 F src/wal.h 96669b645e27cd5a111ba59f0cae7743a207bc3c F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c c232055196c3847206add5852102f2637d50e8ed +F src/where.c ecdf9bfa72cc2b6dade7770abebd0f58520dc4c3 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 6745008c144bd2956d58864d21f7b304689c1cce @@ -296,7 +296,7 @@ F test/collate1.test e3eaa48c21e150814be1a7b852d2a8af24458d04 F test/collate2.test 04cebe4a033be319d6ddbb3bbc69464e01700b49 F test/collate3.test d28d2cfab2c3a3d4628ae4b2b7afc9965daa3b4c F test/collate4.test 3d3f123f83fd8ccda6f48d617e44e661b9870c7d -F test/collate5.test fe0f43c4740d7b71b959cac668d19e42f2e06e4d +F test/collate5.test 67f1d3e848e230ff4802815a79acb0a8b5e69bd7 F test/collate6.test 8be65a182abaac8011a622131486dafb8076e907 F test/collate7.test fac8db7aac3978466c04ae892cc74dcf2bc031aa F test/collate8.test df26649cfcbddf109c04122b340301616d3a88f6 @@ -342,6 +342,7 @@ F test/descidx1.test b1353c1a15cfbee97b13a1dcedaf0fe78163ba6a F test/descidx2.test 9f1a0c83fd57f8667c82310ca21b30a350888b5d F test/descidx3.test fe720e8b37d59f4cef808b0bf4e1b391c2e56b6f F test/diskfull.test 0cede7ef9d8f415d9d3944005c76be7589bb5ebb +F test/distinct.test b3c4ebe6dbddb31d55b168fdaec08456ef323dc4 F test/distinctagg.test 1a6ef9c87a58669438fc771450d7a72577417376 F test/e_expr.test 8a35ce2718c61e871970bda09f4f3e549067c1ba F test/e_fkey.test 35a4ec281da928fa8ebc3591a9c957258ecbb8be @@ -454,7 +455,7 @@ F test/init.test 15c823093fdabbf7b531fe22cf037134d09587a7 F test/insert.test aef273dd1cee84cc92407469e6bd1b3cdcb76908 F test/insert2.test 4f3a04d168c728ed5ec2c88842e772606c7ce435 F test/insert3.test 1b7db95a03ad9c5013fdf7d6722b6cd66ee55e30 -F test/insert4.test c1469999a58e86a85b74df645a820f4cc7a8273b +F test/insert4.test 50e7a074a555663ad8de7a998ccb78ecf1c771c9 F test/insert5.test 1f93cbe9742110119133d7e8e3ccfe6d7c249766 F test/intarray.test 066b7d7ac38d25bf96f87f1b017bfc687551cdd4 F test/interrupt.test 42e7cf98646fd9cb4a3b131a93ed3c50b9e149f1 @@ -529,7 +530,7 @@ F test/misc1.test e56baf44656dd68d6475a4b44521045a60241e9b F test/misc2.test a628db7b03e18973e5d446c67696b03de718c9fd F test/misc3.test 72c5dc87a78e7865c5ec7a969fc572913dbe96b6 F test/misc4.test 9c078510fbfff05a9869a0b6d8b86a623ad2c4f6 -F test/misc5.test 45b2e3ed5f79af2b4f38ae362eaf4c49674575bd +F test/misc5.test 9f9338f8211c7f5d1cbe16331fa65d019501aa50 F test/misc6.test 953cc693924d88e6117aeba16f46f0bf5abede91 F test/misc7.test c5f4e6a82e04e71820c0f9f64f6733f04c8ae0ae F test/misuse.test 30b3a458e5a70c31e74c291937b6c82204c59f33 @@ -587,7 +588,7 @@ F test/select7.test dad6f00f0d49728a879d6eb6451d4752db0b0abe F test/select8.test 391de11bdd52339c30580dabbbbe97e3e9a3c79d F test/select9.test 74c0fb2c6eecb0219cbed0cbe3df136f8fbf9343 F test/selectA.test 06d1032fa9009314c95394f2ca2e60d9f7ae8532 -F test/selectB.test f305cc6660804cb239aab4e2f26b0e288b59958b +F test/selectB.test 0d072c5846071b569766e6cd7f923f646a8b2bfa F test/selectC.test f9bf1bc4581b5b8158caa6e4e4f682acb379fb25 F test/server1.test f5b790d4c0498179151ca8a7715a65a7802c859c F test/shared.test b9114eaea7e748a3a4c8ff7b9ca806c8f95cef3e @@ -624,7 +625,7 @@ F test/tclsqlite.test 8c154101e704170c2be10f137a5499ac2c6da8d3 F test/tempdb.test 800c36623d67a2ad1f58784b9c5644e0405af6e6 F test/temptable.test f42121a0d29a62f00f93274464164177ab1cc24a F test/temptrigger.test b0273db072ce5f37cf19140ceb1f0d524bbe9f05 -F test/tester.tcl c491b9f9533dacdf2523238940b8e8033f2f287b +F test/tester.tcl 8a7f88189e7001979cde27877ff4b19781f097a3 F test/thread001.test a3e6a7254d1cb057836cb3145b60c10bf5b7e60f F test/thread002.test afd20095e6e845b405df4f2c920cb93301ca69db F test/thread003.test b824d4f52b870ae39fc5bae4d8070eca73085dca @@ -849,7 +850,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 8d924e160703e02958db9c9bc1fd2c600e01e606 -R 7803554a59050b798bb3c36bfc235df9 -U drh -Z 02cc4f019cee77499bb297aefe853ba9 +P 7155e6f3282671487f262fd51a7cb139dffdbde6 +R 6e0436030204a21a4eb3844cebd2fc86 +U dan +Z 721893e46221c159c6e716d0c7bd616b diff --git a/manifest.uuid b/manifest.uuid index 1b6a94285a..e5f7c63763 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7155e6f3282671487f262fd51a7cb139dffdbde6 \ No newline at end of file +c593792ce00d0a6379f31cab1bca22f5e159ce18 \ No newline at end of file diff --git a/src/delete.c b/src/delete.c index bd7ac3d1f3..ca516fbf11 100644 --- a/src/delete.c +++ b/src/delete.c @@ -362,7 +362,9 @@ void sqlite3DeleteFrom( /* Collect rowids of every row to be deleted. */ sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,WHERE_DUPLICATES_OK); + pWInfo = sqlite3WhereBegin( + pParse, pTabList, pWhere, 0, 0, WHERE_DUPLICATES_OK + ); if( pWInfo==0 ) goto delete_from_cleanup; regRowid = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, iRowid); sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, regRowid); diff --git a/src/fkey.c b/src/fkey.c index f0ad40d837..e19e082a4c 100644 --- a/src/fkey.c +++ b/src/fkey.c @@ -548,7 +548,7 @@ static void fkScanChildren( ** clause. If the constraint is not deferred, throw an exception for ** each row found. Otherwise, for deferred constraints, increment the ** deferred constraint counter by nIncr for each row selected. */ - pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0); + pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0); if( nIncr>0 && pFKey->isDeferred==0 ){ sqlite3ParseToplevel(pParse)->mayAbort = 1; } diff --git a/src/select.c b/src/select.c index 0f24c6838a..75e4caa22e 100644 --- a/src/select.c +++ b/src/select.c @@ -3633,6 +3633,7 @@ int sqlite3Select( int distinct; /* Table to use for the distinct set */ int rc = 1; /* Value to return from this function */ int addrSortIndex; /* Address of an OP_OpenEphemeral instruction */ + int addrDistinctIndex; /* Address of an OP_OpenEphemeral instruction */ AggInfo sAggInfo; /* Information used by aggregate queries */ int iEnd; /* Address of the end of the query */ sqlite3 *db; /* The database connection */ @@ -3751,17 +3752,6 @@ int sqlite3Select( } #endif - /* If possible, rewrite the query to use GROUP BY instead of DISTINCT. - ** GROUP BY might use an index, DISTINCT never does. - */ - assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 ); - if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){ - p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); - pGroupBy = p->pGroupBy; - p->selFlags &= ~SF_Distinct; - isDistinct = 0; - } - /* If there is both a GROUP BY and an ORDER BY clause and they are ** identical, then disable the ORDER BY clause since the GROUP BY ** will cause elements to come out in the correct order. This is @@ -3774,6 +3764,30 @@ int sqlite3Select( pOrderBy = 0; } + /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and + ** if the select-list is the same as the ORDER BY list, then this query + ** can be rewritten as a GROUP BY. In other words, this: + ** + ** SELECT DISTINCT xyz FROM ... ORDER BY xyz + ** + ** is transformed to: + ** + ** SELECT xyz FROM ... GROUP BY xyz + ** + ** The second form is preferred as a single index (or temp-table) may be + ** used for both the ORDER BY and DISTINCT processing. As originally + ** written the query must use a temp-table for at least one of the ORDER + ** BY and DISTINCT, and an index or separate temp-table for the other. + */ + if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct + && sqlite3ExprListCompare(pOrderBy, p->pEList)==0 + ){ + p->selFlags &= ~SF_Distinct; + p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); + pGroupBy = p->pGroupBy; + pOrderBy = 0; + } + /* If there is an ORDER BY clause, then this sorting ** index might end up being unused if the data can be ** extracted in pre-sorted order. If that is the case, then the @@ -3807,13 +3821,12 @@ int sqlite3Select( /* Open a virtual index to use for the distinct set. */ - if( isDistinct ){ + if( p->selFlags & SF_Distinct ){ KeyInfo *pKeyInfo; - assert( isAgg || pGroupBy ); distinct = pParse->nTab++; pKeyInfo = keyInfoFromExprList(pParse, p->pEList); - sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0, - (char*)pKeyInfo, P4_KEYINFO_HANDOFF); + addrDistinctIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0, + (char*)pKeyInfo, P4_KEYINFO_HANDOFF); sqlite3VdbeChangeP5(v, BTREE_UNORDERED); }else{ distinct = -1; @@ -3821,10 +3834,10 @@ int sqlite3Select( /* Aggregate and non-aggregate queries are handled differently */ if( !isAgg && pGroupBy==0 ){ - /* This case is for non-aggregate queries - ** Begin the database scan - */ - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, 0); + ExprList *pDist = (isDistinct ? p->pEList : 0); + + /* Begin the database scan. */ + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, pDist, 0); if( pWInfo==0 ) goto select_end; if( pWInfo->nRowOut < p->nSelectRow ) p->nSelectRow = pWInfo->nRowOut; @@ -3837,10 +3850,51 @@ int sqlite3Select( p->addrOpenEphm[2] = -1; } - /* Use the standard inner loop - */ - assert(!isDistinct); - selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, pDest, + if( pWInfo->eDistinct ){ + VdbeOp *pOp; /* No longer required OpenEphemeral instr. */ + + pOp = sqlite3VdbeGetOp(v, addrDistinctIndex); + + assert( isDistinct ); + assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED + || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE + ); + distinct = -1; + if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){ + int iJump; + int iExpr; + int iFlag = ++pParse->nMem; + int iBase = pParse->nMem+1; + int iBase2 = iBase + pEList->nExpr; + pParse->nMem += (pEList->nExpr*2); + + /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The + ** OP_Integer initializes the "first row" flag. */ + pOp->opcode = OP_Integer; + pOp->p1 = 1; + pOp->p2 = iFlag; + + sqlite3ExprCodeExprList(pParse, pEList, iBase, 1); + iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1; + sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1); + for(iExpr=0; iExprnExpr; iExpr++){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr); + sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr); + sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ); + sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); + } + sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue); + + sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag); + assert( sqlite3VdbeCurrentAddr(v)==iJump ); + sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr); + }else{ + pOp->opcode = OP_Noop; + } + } + + /* Use the standard inner loop. */ + selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest, pWInfo->iContinue, pWInfo->iBreak); /* End the database scan loop. @@ -3950,7 +4004,7 @@ int sqlite3Select( ** in the right order to begin with. */ sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0, 0); if( pWInfo==0 ) goto select_end; if( pGroupBy==0 ){ /* The optimizer is able to deliver rows in group by order so @@ -4209,7 +4263,7 @@ int sqlite3Select( ** of output. */ resetAccumulator(pParse, &sAggInfo); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, flag); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, 0, flag); if( pWInfo==0 ){ sqlite3ExprListDelete(db, pDel); goto select_end; diff --git a/src/sqliteInt.h b/src/sqliteInt.h index fece2ba33f..8fd2d709d3 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1913,6 +1913,7 @@ struct WhereInfo { u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE or DELETE */ u8 untestedTerms; /* Not all WHERE terms resolved by outer loop */ + u8 eDistinct; SrcList *pTabList; /* List of tables in the join */ int iTop; /* The very beginning of the WHERE loop */ int iContinue; /* Jump here to continue with next record */ @@ -1924,6 +1925,9 @@ struct WhereInfo { WhereLevel a[1]; /* Information about each nest loop in WHERE */ }; +#define WHERE_DISTINCT_UNIQUE 1 +#define WHERE_DISTINCT_ORDERED 2 + /* ** A NameContext defines a context in which to resolve table and column ** names. The context consists of a list of tables (the pSrcList) field and @@ -2673,7 +2677,7 @@ Expr *sqlite3LimitWhere(Parse *, SrcList *, Expr *, ExprList *, Expr *, Expr *, #endif void sqlite3DeleteFrom(Parse*, SrcList*, Expr*); void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int); -WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**, u16); +WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**,ExprList*,u16); void sqlite3WhereEnd(WhereInfo*); int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int); void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int); diff --git a/src/update.c b/src/update.c index 8bf58d7666..a2fd7b9c0c 100644 --- a/src/update.c +++ b/src/update.c @@ -312,7 +312,9 @@ void sqlite3Update( /* Begin the database scan */ sqlite3VdbeAddOp2(v, OP_Null, 0, regOldRowid); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0, WHERE_ONEPASS_DESIRED); + pWInfo = sqlite3WhereBegin( + pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED + ); if( pWInfo==0 ) goto update_cleanup; okOnePass = pWInfo->okOnePass; diff --git a/src/where.c b/src/where.c index c1885856c0..ad6b6dd779 100644 --- a/src/where.c +++ b/src/where.c @@ -254,6 +254,7 @@ struct WhereCost { #define WHERE_MULTI_OR 0x10000000 /* OR using multiple indices */ #define WHERE_TEMP_INDEX 0x20000000 /* Uses an ephemeral index */ #define WHERE_UNQ_WANTED 0x40000000 /* True if UNIQUE would be helpful */ +#define WHERE_DISTINCT 0x80000000 /* Correct order for DISTINCT */ /* ** Initialize a preallocated WhereClause structure. @@ -1398,6 +1399,158 @@ static int referencesOtherTables( return 0; } +/* +** This function searches the expression list passed as the second argument +** for an expression of type TK_COLUMN that refers to the same column and +** uses the same collation sequence as the iCol'th column of index pIdx. +** Argument iBase is the cursor number used for the table that pIdx refers +** to. +** +** If such an expression is found, its index in pList->a[] is returned. If +** no expression is found, -1 is returned. +*/ +static int findIndexCol( + Parse *pParse, /* Parse context */ + ExprList *pList, /* Expression list to search */ + int iBase, /* Cursor for table associated with pIdx */ + Index *pIdx, /* Index to match column of */ + int iCol /* Column of index to match */ +){ + int i; + const char *zColl = pIdx->azColl[iCol]; + + for(i=0; inExpr; i++){ + Expr *p = pList->a[i].pExpr; + if( pIdx->aiColumn[iCol]==p->iColumn && iBase==p->iTable ){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); + if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){ + return i; + } + } + } + + return -1; +} + +/* +** This routine determines if pIdx can be used to assist in processing a +** DISTINCT qualifier. In other words, it tests whether or not using this +** index for the outer loop guarantees that rows with equal values for +** all expressions in the pDistinct list are delivered grouped together. +** +** For example, the query +** +** SELECT DISTINCT a, b, c FROM tbl WHERE a = ? +** +** can benefit from any index on columns "b" and "c". +*/ +static int isDistinctIndex( + Parse *pParse, /* Parsing context */ + WhereClause *pWC, /* The WHERE clause */ + Index *pIdx, /* The index being considered */ + int base, /* Cursor number for the table pIdx is on */ + ExprList *pDistinct, /* The DISTINCT expressions */ + int nEqCol /* Number of index columns with == */ +){ + Bitmask mask = 0; /* Mask of unaccounted for pDistinct exprs */ + int i; /* Iterator variable */ + + if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0; + + /* Loop through all the expressions in the distinct list. If any of them + ** are not simple column references, return early. Otherwise, test if the + ** WHERE clause contains a "col=X" clause. If it does, the expression + ** can be ignored. If it does not, and the column does not belong to the + ** same table as index pIdx, return early. Finally, if there is no + ** matching "col=X" expression and the column is on the same table as pIdx, + ** set the corresponding bit in variable mask. + */ + for(i=0; inExpr; i++){ + WhereTerm *pTerm; + Expr *p = pDistinct->a[i].pExpr; + if( p->op!=TK_COLUMN ) return 0; + pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0); + if( pTerm ){ + Expr *pX = pTerm->pExpr; + CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); + CollSeq *p2 = sqlite3ExprCollSeq(pParse, p); + if( p1==p2 ) continue; + } + if( p->iTable!=base ) return 0; + mask |= (((Bitmask)1) << i); + } + + for(i=nEqCol; mask && inColumn; i++){ + int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i); + if( iExpr<0 ) break; + mask &= ~(((Bitmask)1) << iExpr); + } + + return (mask==0); +} + + +/* +** Return true if the DISTINCT expression-list passed as the third argument +** is redundant. A DISTINCT list is redundant if the database contains a +** UNIQUE index that guarantees that the result of the query will be distinct +** anyway. +*/ +static int isDistinctRedundant( + Parse *pParse, + SrcList *pTabList, + WhereClause *pWC, + ExprList *pDistinct +){ + Table *pTab; + Index *pIdx; + int i; + int iBase; + + /* If there is more than one table or sub-select in the FROM clause of + ** this query, then it will not be possible to show that the DISTINCT + ** clause is redundant. */ + if( pTabList->nSrc!=1 ) return 0; + iBase = pTabList->a[0].iCursor; + pTab = pTabList->a[0].pTab; + + /* If any of the expressions is an IPK column on table iBase, then return + ** true. Note: The (p->iTable==iBase) part of this test may be false if the + ** current SELECT is a correlated sub-query. + */ + for(i=0; inExpr; i++){ + Expr *p = pDistinct->a[i].pExpr; + if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1; + } + + /* Loop through all indices on the table, checking each to see if it makes + ** the DISTINCT qualifier redundant. It does so if: + ** + ** 1. The index is itself UNIQUE, and + ** + ** 2. All of the columns in the index are either part of the pDistinct + ** list, or else the WHERE clause contains a term of the form "col=X", + ** where X is a constant value. The collation sequences of the + ** comparison and select-list expressions must match those of the index. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->onError==OE_None ) continue; + for(i=0; inColumn; i++){ + int iCol = pIdx->aiColumn[i]; + if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) + && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i) + ){ + break; + } + } + if( i==pIdx->nColumn ){ + /* This index implies that the DISTINCT qualifier is redundant. */ + return 1; + } + } + + return 0; +} /* ** This routine decides if pIdx can be used to satisfy the ORDER BY @@ -1434,7 +1587,10 @@ static int isSortingIndex( struct ExprList_item *pTerm; /* A term of the ORDER BY clause */ sqlite3 *db = pParse->db; - assert( pOrderBy!=0 ); + if( !pOrderBy ) return 0; + if( wsFlags & WHERE_COLUMN_IN ) return 0; + if( pIdx->bUnordered ) return 0; + nTerm = pOrderBy->nExpr; assert( nTerm>0 ); @@ -1524,7 +1680,7 @@ static int isSortingIndex( } } - *pbRev = sortOrder!=0; + if( pbRev ) *pbRev = sortOrder!=0; if( j>=nTerm ){ /* All terms of the ORDER BY clause are covered by this index so ** this index can be used for sorting. */ @@ -2690,6 +2846,7 @@ static void bestBtreeIndex( Bitmask notReady, /* Mask of cursors not available for indexing */ Bitmask notValid, /* Cursors not available for any purpose */ ExprList *pOrderBy, /* The ORDER BY clause */ + ExprList *pDistinct, /* The select-list if query is DISTINCT */ WhereCost *pCost /* Lowest cost query plan */ ){ int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */ @@ -2830,7 +2987,8 @@ static void bestBtreeIndex( int nInMul = 1; /* Number of distinct equalities to lookup */ int estBound = 100; /* Estimated reduction in search space */ int nBound = 0; /* Number of range constraints seen */ - int bSort = 0; /* True if external sort required */ + int bSort = !!pOrderBy; /* True if external sort required */ + int bDist = !!pDistinct; /* True if index cannot help with DISTINCT */ int bLookup = 0; /* True if not a covering index */ WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 @@ -2894,17 +3052,20 @@ static void bestBtreeIndex( ** naturally scan rows in the required order, set the appropriate flags ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index ** will scan rows in a different order, set the bSort variable. */ - if( pOrderBy ){ - if( (wsFlags & WHERE_COLUMN_IN)==0 - && pProbe->bUnordered==0 - && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, - nEq, wsFlags, &rev) - ){ - wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; - wsFlags |= (rev ? WHERE_REVERSE : 0); - }else{ - bSort = 1; - } + if( isSortingIndex( + pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, nEq, wsFlags, &rev) + ){ + bSort = 0; + wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; + wsFlags |= (rev ? WHERE_REVERSE : 0); + } + + /* If there is a DISTINCT qualifier and this index will scan rows in + ** order of the DISTINCT expressions, clear bDist and set the appropriate + ** flags in wsFlags. */ + if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){ + bDist = 0; + wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT; } /* If currently calculating the cost of using an index (not the IPK @@ -3021,6 +3182,9 @@ static void bestBtreeIndex( if( bSort ){ cost += nRow*estLog(nRow)*3; } + if( bDist ){ + cost += nRow*estLog(nRow)*3; + } /**** Cost of using this index has now been computed ****/ @@ -3166,7 +3330,7 @@ static void bestIndex( }else #endif { - bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); + bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, 0, pCost); } } @@ -4132,7 +4296,7 @@ static Bitmask codeOneLoopStart( if( pOrTerm->leftCursor==iCur || pOrTerm->eOperator==WO_AND ){ WhereInfo *pSubWInfo; /* Info for single OR-term scan */ /* Loop through table entries that match term pOrTerm. */ - pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, + pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, 0, WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE | WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY); if( pSubWInfo ){ @@ -4375,6 +4539,7 @@ WhereInfo *sqlite3WhereBegin( SrcList *pTabList, /* A list of all tables to be scanned */ Expr *pWhere, /* The WHERE clause */ ExprList **ppOrderBy, /* An ORDER BY clause, or NULL */ + ExprList *pDistinct, /* The select-list for DISTINCT queries - or NULL */ u16 wctrlFlags /* One of the WHERE_* flags defined in sqliteInt.h */ ){ int i; /* Loop counter */ @@ -4502,6 +4667,15 @@ WhereInfo *sqlite3WhereBegin( goto whereBeginError; } + /* Check if the DISTINCT qualifier, if there is one, is redundant. + ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to + ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT. + */ + if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){ + pDistinct = 0; + pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + } + /* Chose the best index to use for each table in the FROM clause. ** ** This loop fills in the following fields: @@ -4587,6 +4761,7 @@ WhereInfo *sqlite3WhereBegin( int doNotReorder; /* True if this table should not be reordered */ WhereCost sCost; /* Cost information from best[Virtual]Index() */ ExprList *pOrderBy; /* ORDER BY clause for index to optimize */ + ExprList *pDist; /* DISTINCT clause for index to optimize */ doNotReorder = (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0; if( j!=iFrom && doNotReorder ) break; @@ -4597,6 +4772,7 @@ WhereInfo *sqlite3WhereBegin( } mask = (isOptimal ? m : notReady); pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); + pDist = (i==0 ? pDistinct : 0); if( pTabItem->pIndex==0 ) nUnconstrained++; WHERETRACE(("=== trying table %d with isOptimal=%d ===\n", @@ -4611,7 +4787,7 @@ WhereInfo *sqlite3WhereBegin( #endif { bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy, - &sCost); + pDist, &sCost); } assert( isOptimal || (sCost.used¬Ready)==0 ); @@ -4672,6 +4848,10 @@ WhereInfo *sqlite3WhereBegin( if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){ *ppOrderBy = 0; } + if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){ + assert( pWInfo->eDistinct==0 ); + pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; + } andFlags &= bestPlan.plan.wsFlags; pLevel->plan = bestPlan.plan; testcase( bestPlan.plan.wsFlags & WHERE_INDEXED ); diff --git a/test/collate5.test b/test/collate5.test index 2e4b89d5ca..e5bea7a5a5 100644 --- a/test/collate5.test +++ b/test/collate5.test @@ -57,17 +57,17 @@ do_test collate5-1.1 { execsql { SELECT DISTINCT a FROM collate5t1; } -} {A B N} +} {a b n} do_test collate5-1.2 { execsql { SELECT DISTINCT b FROM collate5t1; } -} {{} Apple apple banana} +} {apple Apple banana {}} do_test collate5-1.3 { execsql { SELECT DISTINCT a, b FROM collate5t1; } -} {A Apple a apple B banana N {}} +} {a apple A Apple b banana n {}} # Ticket #3376 # diff --git a/test/distinct.test b/test/distinct.test new file mode 100644 index 0000000000..c7749efb8d --- /dev/null +++ b/test/distinct.test @@ -0,0 +1,170 @@ +# 2011 July 1 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is the DISTINCT modifier. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +set testprefix distinct + +proc do_execsql_test {testname sql {result {}}} { + uplevel do_test $testname [list "execsql {$sql}"] [list [list {*}$result]] +} + + +proc is_distinct_noop {sql} { + set sql1 $sql + set sql2 [string map {DISTINCT ""} $sql] + + set program1 [list] + set program2 [list] + db eval "EXPLAIN $sql1" { + if {$opcode != "Noop"} { lappend program1 $opcode } + } + db eval "EXPLAIN $sql2" { + if {$opcode != "Noop"} { lappend program2 $opcode } + } + + return [expr {$program1==$program2}] +} + +proc do_distinct_noop_test {tn sql} { + uplevel [list do_test $tn [list is_distinct_noop $sql] 1] +} +proc do_distinct_not_noop_test {tn sql} { + uplevel [list do_test $tn [list is_distinct_noop $sql] 0] +} + +proc do_temptables_test {tn sql temptables} { + uplevel [list do_test $tn [subst -novar { + set ret "" + db eval "EXPLAIN [set sql]" { + if {$opcode == "OpenEphemeral"} { + if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" } + if {$p5 == "10"} { + lappend ret hash + } else { + lappend ret btree + } + } + } + set ret + }] $temptables] +} + + +#------------------------------------------------------------------------- +# The following tests - distinct-1.* - check that the planner correctly +# detects cases where a UNIQUE index means that a DISTINCT clause is +# redundant. Currently the planner only detects such cases when there +# is a single table in the FROM clause. +# +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c, d); + CREATE UNIQUE INDEX i1 ON t1(b, c); + CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase); + + CREATE TABLE t2(x INTEGER PRIMARY KEY, y); + + CREATE TABLE t3(c1 PRIMARY KEY, c2); + CREATE INDEX i3 ON t3(c2); +} +foreach {tn noop sql} { + + 1 1 "SELECT DISTINCT b, c FROM t1" + 2 1 "SELECT DISTINCT c FROM t1 WHERE b = ?" + 3 1 "SELECT DISTINCT rowid FROM t1" + 4 1 "SELECT DISTINCT rowid, a FROM t1" + 5 1 "SELECT DISTINCT x FROM t2" + 6 1 "SELECT DISTINCT * FROM t2" + 7 1 "SELECT DISTINCT * FROM (SELECT * FROM t2)" + + 8 1 "SELECT DISTINCT * FROM t1" + + 8 0 "SELECT DISTINCT a, b FROM t1" + + 9 0 "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)" + 10 0 "SELECT DISTINCT c FROM t1" + 11 0 "SELECT DISTINCT b FROM t1" + + 12 0 "SELECT DISTINCT a, d FROM t1" + 13 0 "SELECT DISTINCT a, b, c COLLATE nocase FROM t1" + 14 1 "SELECT DISTINCT a, d COLLATE nocase FROM t1" + 15 0 "SELECT DISTINCT a, d COLLATE binary FROM t1" + 16 1 "SELECT DISTINCT a, b, c COLLATE binary FROM t1" + + 16 0 "SELECT DISTINCT t1.rowid FROM t1, t2" + 17 0 { /* Technically, it would be possible to detect that DISTINCT + ** is a no-op in cases like the following. But SQLite does not + ** do so. */ + SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid } + + 18 1 "SELECT DISTINCT c1, c2 FROM t3" + 19 1 "SELECT DISTINCT c1 FROM t3" + 20 1 "SELECT DISTINCT * FROM t3" + 21 0 "SELECT DISTINCT c2 FROM t3" + + 22 0 "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" + 23 1 "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" + + 24 0 "SELECT DISTINCT rowid/2 FROM t1" + 25 1 "SELECT DISTINCT rowid/2, rowid FROM t1" + 26 1 "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?" +} { + if {$noop} { + do_distinct_noop_test 1.$tn $sql + } else { + do_distinct_not_noop_test 1.$tn $sql + } +} + +#------------------------------------------------------------------------- +# The following tests - distinct-2.* - test cases where an index is +# used to deliver results in order of the DISTINCT expressions. +# +drop_all_tables +do_execsql_test 2.0 { + CREATE TABLE t1(a, b, c); + + CREATE INDEX i1 ON t1(a, b); + CREATE INDEX i2 ON t1(b COLLATE nocase, c COLLATE nocase); + + INSERT INTO t1 VALUES('a', 'b', 'c'); + INSERT INTO t1 VALUES('A', 'B', 'C'); + INSERT INTO t1 VALUES('a', 'b', 'c'); + INSERT INTO t1 VALUES('A', 'B', 'C'); +} + +foreach {tn sql temptables res} { + 1 "a, b FROM t1" {} {A B a b} + 2 "b, a FROM t1" {} {B A b a} + 3 "a, b, c FROM t1" {hash} {a b c A B C} + 4 "a, b, c FROM t1 ORDER BY a, b, c" {btree} {A B C a b c} + 5 "b FROM t1 WHERE a = 'a'" {} {b} + 6 "b FROM t1" {hash} {b B} + 7 "a FROM t1" {} {A a} + 8 "b COLLATE nocase FROM t1" {} {b} + 9 "b COLLATE nocase FROM t1 ORDER BY b COLLATE nocase" {} {B} +} { + do_execsql_test 2.$tn.1 "SELECT DISTINCT $sql" $res + do_temptables_test 2.$tn.2 "SELECT DISTINCT $sql" $temptables +} + +do_execsql_test 2.A { + SELECT (SELECT DISTINCT o.a FROM t1 AS i) FROM t1 AS o; +} {a A a A} + + + + +finish_test diff --git a/test/insert4.test b/test/insert4.test index 0b069e996d..fa501937ad 100644 --- a/test/insert4.test +++ b/test/insert4.test @@ -112,7 +112,7 @@ do_test insert4-2.4.1 { INSERT INTO t3 SELECT DISTINCT * FROM t2; SELECT * FROM t3; } -} {1 9 9 1} +} {9 1 1 9} xferopt_test insert4-2.4.2 0 do_test insert4-2.4.3 { catchsql { diff --git a/test/misc5.test b/test/misc5.test index 34b2284bc1..b3832f18ae 100644 --- a/test/misc5.test +++ b/test/misc5.test @@ -505,7 +505,7 @@ ifcapable subquery { ) ORDER BY LOWER(artist) ASC; } - } {one} + } {two} } # Ticket #1370. Do not overwrite small files (less than 1024 bytes) diff --git a/test/selectB.test b/test/selectB.test index 3fdf85c0f9..b9d979acb7 100644 --- a/test/selectB.test +++ b/test/selectB.test @@ -355,7 +355,7 @@ for {set ii 3} {$ii <= 4} {incr ii} { SELECT DISTINCT (a/10) FROM t1 UNION ALL SELECT DISTINCT(d%2) FROM t2 ) } - } {0 1 0 1} + } {0 1 1 0} do_test selectB-$ii.20 { execsql { diff --git a/test/tester.tcl b/test/tester.tcl index 92115c55c9..fc9ac1a1d9 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -20,7 +20,7 @@ # Commands to manipulate the db and the file-system at a high level: # # copy_file FROM TO -# drop_all_table ?DB? +# drop_all_tables ?DB? # forcedelete FILENAME # # Test the capability of the SQLite version built into the interpreter to