From: dan Date: Fri, 1 Jul 2011 14:21:38 +0000 (+0000) Subject: Improvements and tests for detection of redundant DISTINCT qualifiers. X-Git-Tag: version-3.7.8~38^2~31^2~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=50118cdfdb27180fdf6913ae0310ef189cc07148;p=thirdparty%2Fsqlite.git Improvements and tests for detection of redundant DISTINCT qualifiers. FossilOrigin-Name: 7337293c87fb563604dd6ad284f2d1e30c938b4c --- diff --git a/manifest b/manifest index 4197d1e43f..7cf6c1516e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Experimental\schanges\sto\simprove\soptimization\sof\sDISTINCT\squeries. -D 2011-06-30T20:17:15.042 +C Improvements\sand\stests\sfor\sdetection\sof\sredundant\sDISTINCT\squalifiers. +D 2011-07-01T14:21:38.743 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -179,7 +179,7 @@ F src/printf.c 585a36b6a963df832cfb69505afa3a34ed5ef8a1 F src/random.c cd4a67b3953b88019f8cd4ccd81394a8ddfaba50 F src/resolve.c 1c0f32b64f8e3f555fe1f732f9d6f501a7f05706 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 -F src/select.c cf259606f91f53f71bafd7a2da47f30cda6efc58 +F src/select.c e9d74f943c195f2673990febd3a455b421964ca0 F src/shell.c 0e0173b3e79d956368013e759f084caa7995ecb1 F src/sqlite.h.in 4b7255c10d39c5faf089dbd29cde7c367ff39f1f F src/sqlite3ext.h 1a1a4f784aa9c3b00edd287940197de52487cd93 @@ -250,7 +250,7 @@ F src/vtab.c 901791a47318c0562cd0c676a2c6ff1bc530e582 F src/wal.c 0c70ad7b1cac6005fa5e2cbefd23ee05e391c290 F src/wal.h 66b40bd91bc29a5be1c88ddd1f5ade8f3f48728a F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 0bdcf6704dd0d8471052712e1d63a02990b7d0bf +F src/where.c 207cf2c12b391cfdfae89ca3c8afe2adbf46f712 F test/8_3_names.test b93687beebd17f6ebf812405a6833bae5d1f4199 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 @@ -368,6 +368,7 @@ F test/descidx1.test b1353c1a15cfbee97b13a1dcedaf0fe78163ba6a F test/descidx2.test 9f1a0c83fd57f8667c82310ca21b30a350888b5d F test/descidx3.test fe720e8b37d59f4cef808b0bf4e1b391c2e56b6f F test/diskfull.test 0cede7ef9d8f415d9d3944005c76be7589bb5ebb +F test/distinct.test 17985484790bbb6d098af682f808f11940ca6375 F test/distinctagg.test 1a6ef9c87a58669438fc771450d7a72577417376 F test/e_createtable.test 4771686a586b6ae414f927c389b2c101cc05c028 F test/e_delete.test e2ae0d3fce5efd70fef99025e932afffc5616fab @@ -949,10 +950,7 @@ F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00 F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262 -P 591de898f41630156cc0fc6ef17dd3ee5e7c479f -R 5673acf06c190a75a743da9daebc8ac9 -T *branch * experimental -T *sym-experimental * -T -sym-trunk * +P f7ba0219ef2f235543c258be736955d91ca5ecce +R 778c4d57ca4312ecfa96b1ac67177d07 U dan -Z ab52fc5a81a2f6dd6b96daa429077988 +Z 94d823140247a8802d583b432d46d049 diff --git a/manifest.uuid b/manifest.uuid index 5aa8e13950..a582d766d2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f7ba0219ef2f235543c258be736955d91ca5ecce \ No newline at end of file +7337293c87fb563604dd6ad284f2d1e30c938b4c \ No newline at end of file diff --git a/src/select.c b/src/select.c index 00107e1de6..45f1c7545d 100644 --- a/src/select.c +++ b/src/select.c @@ -3848,18 +3848,6 @@ int sqlite3Select( } #endif - /* If possible, rewrite the query to use GROUP BY instead of DISTINCT. - ** GROUP BY might use an index, DISTINCT never does. - */ -#if 0 - assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 ); - if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){ - p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); - pGroupBy = p->pGroupBy; - p->selFlags &= ~SF_Distinct; - } -#endif - /* If there is both a GROUP BY and an ORDER BY clause and they are ** identical, then disable the ORDER BY clause since the GROUP BY ** will cause elements to come out in the correct order. This is @@ -3872,6 +3860,30 @@ int sqlite3Select( pOrderBy = 0; } + /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and + ** if the select-list is the same as the ORDER BY list, then this query + ** can be rewritten as a GROUP BY. In other words, this: + ** + ** SELECT DISTINCT xyz FROM ... ORDER BY xyz + ** + ** is transformed to: + ** + ** SELECT xyz FROM ... GROUP BY xyz + ** + ** The second form is preferred as a single index (or temp-table) may be + ** used for both the ORDER BY and DISTINCT processing. As originally + ** written the query must use a temp-table for at least one of the ORDER + ** BY and DISTINCT, and an index or separate temp-table for the other. + */ + if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct + && sqlite3ExprListCompare(pOrderBy, p->pEList)==0 + ){ + p->selFlags &= ~SF_Distinct; + p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); + pGroupBy = p->pGroupBy; + pOrderBy = 0; + } + /* If there is an ORDER BY clause, then this sorting ** index might end up being unused if the data can be ** extracted in pre-sorted order. If that is the case, then the @@ -3935,6 +3947,10 @@ int sqlite3Select( } if( pWInfo->eDistinct ){ + VdbeOp *pOp; /* No longer required OpenEphemeral instr. */ + + pOp = sqlite3VdbeGetOp(v, addrDistinctIndex); + assert( isDistinct ); assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE @@ -3947,11 +3963,9 @@ int sqlite3Select( int iBase = pParse->nMem+1; int iBase2 = iBase + pEList->nExpr; pParse->nMem += (pEList->nExpr*2); - VdbeOp *pOp; /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The ** OP_Integer initializes the "first row" flag. */ - pOp = sqlite3VdbeGetOp(v, addrDistinctIndex); pOp->opcode = OP_Integer; pOp->p1 = 1; pOp->p2 = iFlag; @@ -3970,6 +3984,8 @@ int sqlite3Select( sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag); assert( sqlite3VdbeCurrentAddr(v)==iJump ); sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr); + }else{ + pOp->opcode = OP_Noop; } } diff --git a/src/where.c b/src/where.c index 8aecf9788d..70e9722d8c 100644 --- a/src/where.c +++ b/src/where.c @@ -4302,7 +4302,7 @@ static int whereDistinctRedundant( ){ Table *pTab; Index *pIdx; - int i; + int i; int iBase; /* If there is more than one table or sub-select in the FROM clause of @@ -4312,15 +4312,11 @@ static int whereDistinctRedundant( iBase = pTabList->a[0].iCursor; pTab = pTabList->a[0].pTab; - /* Check if all the expressions in the ExprList are of type TK_COLUMN and - ** on the same table. If this is not the case, return early, since it will - ** not be possible to prove that the DISTINCT qualifier is redundant. - ** If any of the expressions is an IPK column, then return true. - */ + /* If any of the expressions is an IPK column, then return true. */ for(i=0; inExpr; i++){ Expr *p = pDistinct->a[i].pExpr; - if( p->op!=TK_COLUMN || p->iTable!=iBase ) return 0; - if( p->iColumn<0 ) return 1; + assert( p->op!=TK_COLUMN || p->iTable==iBase ); + if( p->op==TK_COLUMN && p->iColumn<0 ) return 1; } /* Loop through all indices on the table, checking each to see if it makes @@ -4343,10 +4339,10 @@ static int whereDistinctRedundant( int j; for(j=0; jnExpr; j++){ Expr *p = pDistinct->a[j].pExpr; - if( p->iColumn==iCol ){ + assert( p->op!=TK_COLUMN || p->iTable==iBase ); + if( p->op==TK_COLUMN && p->iColumn==iCol ){ CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); - const char *zEColl = (pColl ? pColl : pParse->db->pDfltColl)->zName; - if( 0==sqlite3StrICmp(zColl, zEColl) ) break; + if( pColl && 0==sqlite3StrICmp(zColl, pColl->zName) ) break; } } if( j==pDistinct->nExpr ) break; diff --git a/test/distinct.test b/test/distinct.test new file mode 100644 index 0000000000..f855caa833 --- /dev/null +++ b/test/distinct.test @@ -0,0 +1,115 @@ +# 2011 July 1 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is the DISTINCT modifier. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +set testprefix distinct + + +proc is_distinct_noop {sql} { + set sql1 $sql + set sql2 [string map {DISTINCT ""} $sql] + + set program1 [list] + set program2 [list] + db eval "EXPLAIN $sql1" { + if {$opcode != "Noop"} { lappend program1 $opcode } + } + db eval "EXPLAIN $sql2" { + if {$opcode != "Noop"} { lappend program2 $opcode } + } + + return [expr {$program1==$program2}] +} + +proc do_distinct_noop_test {tn sql} { + uplevel [list do_test $tn [list is_distinct_noop $sql] 1] +} +proc do_distinct_not_noop_test {tn sql} { + uplevel [list do_test $tn [list is_distinct_noop $sql] 0] +} + + +#------------------------------------------------------------------------- +# The following tests - distinct-1.* - check that the planner correctly +# detects cases where a UNIQUE index means that a DISTINCT clause is +# redundant. Currently the planner only detects such cases when there +# is a single table in the FROM clause. +# +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c, d); + CREATE UNIQUE INDEX i1 ON t1(b, c); + CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase); + + CREATE TABLE t2(x INTEGER PRIMARY KEY, y); + + CREATE TABLE t3(c1 PRIMARY KEY, c2); + CREATE INDEX i3 ON t3(c2); +} +foreach {tn noop sql} { + + 1 1 "SELECT DISTINCT b, c FROM t1" + 2 1 "SELECT DISTINCT c FROM t1 WHERE b = ?" + 3 1 "SELECT DISTINCT rowid FROM t1" + 4 1 "SELECT DISTINCT rowid, a FROM t1" + 5 1 "SELECT DISTINCT x FROM t2" + 6 1 "SELECT DISTINCT * FROM t2" + 7 1 "SELECT DISTINCT * FROM (SELECT * FROM t2)" + + 8 1 "SELECT DISTINCT * FROM t1" + + 8 0 "SELECT DISTINCT a, b FROM t1" + + 9 0 "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)" + 10 0 "SELECT DISTINCT c FROM t1" + 11 0 "SELECT DISTINCT b FROM t1" + + 12 0 "SELECT DISTINCT a, d FROM t1" + 13 0 "SELECT DISTINCT a, b, c COLLATE nocase FROM t1" + 14 1 "SELECT DISTINCT a, d COLLATE nocase FROM t1" + 15 0 "SELECT DISTINCT a, d COLLATE binary FROM t1" + 16 1 "SELECT DISTINCT a, b, c COLLATE binary FROM t1" + + 16 0 "SELECT DISTINCT t1.rowid FROM t1, t2" + 17 0 { /* Technically, it would be possible to detect that DISTINCT + ** is a no-op in cases like the following. But SQLite does not + ** do so. */ + SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid } + + 18 1 "SELECT DISTINCT c1, c2 FROM t3" + 19 1 "SELECT DISTINCT c1 FROM t3" + 20 1 "SELECT DISTINCT * FROM t3" + 21 0 "SELECT DISTINCT c2 FROM t3" + + 22 0 "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" + 23 1 "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" + + 24 0 "SELECT DISTINCT rowid/2 FROM t1" + 25 1 "SELECT DISTINCT rowid/2, rowid FROM t1" + 26 1 "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?" +} { + if {$noop} { + do_distinct_noop_test 1.$tn $sql + } else { + do_distinct_not_noop_test 1.$tn $sql + } +} + + + + + + +finish_test