Disable the OR optimization if it would conflict with column

author drh <drh@noemail.net>

Fri, 23 Feb 2007 23:13:33 +0000 (23:13 +0000)

committer drh <drh@noemail.net>

Fri, 23 Feb 2007 23:13:33 +0000 (23:13 +0000)
author drh <drh@noemail.net>
Fri, 23 Feb 2007 23:13:33 +0000 (23:13 +0000)
committer drh <drh@noemail.net>
Fri, 23 Feb 2007 23:13:33 +0000 (23:13 +0000)
diff --git a/manifest b/manifest

index 423215d82433db2bb05f4377f03d73fb3d691de2..bfe6bdbb394f2022793d323e201474f4f9cdb8e3 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Clarifications\sto\sthe\sdatatype3.html\sdocument.\s(CVS\s3657)
-D 2007-02-23T14:20:38
+C Disable\sthe\sOR\soptimization\sif\sit\swould\sconflict\swith\scolumn\naffinity\scoercions.\s\sTicket\s#2249.\s\sAdditional\scleanup\sand\stesting\nof\sthe\sOR\soptimization.\s(CVS\s3658)
+D 2007-02-23T23:13:34
  F Makefile.in 1fe3d0b46e40fd684e1e61f8e8056cefed16de9f
  F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
  F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@@ -129,7 +129,7 @@ F src/vdbeaux.c c5324d62f51529bccc5be3b04bac2e4eeae1569a
  F src/vdbefifo.c 9efb94c8c3f4c979ebd0028219483f88e57584f5
  F src/vdbemem.c ff2424bee9eaf7c61d1f28bc0e711bebddebd653
  F src/vtab.c 7fbda947e28cbe7adb3ba752a76ca9ef29936750
-F src/where.c 2a919a3fbaff2e55604119f7c60133db459b404c
+F src/where.c d4a10cf428e5bf855e8a7f6c6a95119ca93489f0
  F tclinstaller.tcl 046e3624671962dc50f0481d7c25b38ef803eb42
  F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
  F test/all.test b62fcd122052efaff1b0979aefa2dd65cfc8ee52
@@ -356,7 +356,7 @@ F test/vtab7.test 5f9ef9fb84733e928d5d0267c821072561b198d5
  F test/vtab9.test 87afba55339b0c255e9697fbfb5bfb6120505d9d
  F test/vtab_err.test 224cc80ad700797c48b9cd2c1e0bd7a8517d8609
  F test/where.test 1d020f50c77f37b2dbab9766ca959e6e3278ecdb
-F test/where2.test 61d5b20d9bedc8788a773bbdc5b2ef887725928e
+F test/where2.test 3249d426b3fc7a106713d784e1628307fc308d2e
  F test/where3.test 0a30fe9808b0fa01c46d0fcf4fac0bf6cf75bb30
  F test/where4.test 3fcf53c5ea7af1db3980b3293c2a45b56605f26a
  F tool/diffdb.c 7524b1b5df217c20cd0431f6789851a4e0cb191b
@@ -432,7 +432,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
  F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
  F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
  F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P da81725ca1cd894b3f2d734767e10cc0dc329566
-R e9a87dbf4aaa39ef37829fb50c058efb
+P 4692a85edbe27d512f1830b8fb3e2b05b92a0cb5
+R dc63788d974b701458838d84fc2750a6
  U drh
-Z be59c6c32a9cdacaed5c099c4e34bcba
+Z 5d96385345b08fab32c52067bf0a8a92
diff --git a/manifest.uuid b/manifest.uuid

index afd9eb67bf2a6d79005bac41863a2083ad24a2c7..e058683dc9e17da25630f75797660a614a376f77 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-4692a85edbe27d512f1830b8fb3e2b05b92a0cb5
-\ No newline at end of file
+908daaa9ab86e0bd1da6d0807d6aaba240c3cee0
+\ No newline at end of file
diff --git a/src/where.c b/src/where.c

index a364ce0e100cce71e0978ba2ad3754a63fc0e379..ad7e12c88a9165b4c9585e796161e9f57fc9d731 100644 (file)
--- a/src/where.c
+++ b/src/where.c
@@ -16,7 +16,7 @@
  ** so is applicable.  Because this module is responsible for selecting
  ** indices, you might also think of this module as the "query optimizer".
  **
-** $Id: where.c,v 1.237 2007/02/06 13:26:33 drh Exp $
+** $Id: where.c,v 1.238 2007/02/23 23:13:34 drh Exp $
  */
  #include "sqliteInt.h"
  
@@ -582,6 +582,92 @@ static void transferJoinMarkings(Expr *pDerived, Expr *pBase){
    pDerived->iRightJoinTable = pBase->iRightJoinTable;
  }
  
+#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY)
+/*
+** Return TRUE if the given term of an OR clause can be converted
+** into an IN clause.  The iCursor and iColumn define the left-hand
+** side of the IN clause.
+**
+** The context is that we have multiple OR-connected equality terms
+** like this:
+**
+**           a=<expr1> OR  a=<expr2> OR b=<expr3>  OR ...
+**
+** The pOrTerm input to this routine corresponds to a single term of
+** this OR clause.  In order for the term to be a condidate for
+** conversion to an IN operator, the following must be true:
+**
+**     *  The left-hand side of the term must be the column which
+**        is identified by iCursor and iColumn.
+**
+**     *  If the right-hand side is also a column, then the affinities
+**        of both right and left sides must be such that no type
+**        conversions are required on the right.  (Ticket #2249)
+**
+** If both of these conditions are true, then return true.  Otherwise
+** return false.
+*/
+static int orTermIsOptCandidate(WhereTerm *pOrTerm, int iCursor, int iColumn){
+  int affLeft, affRight;
+  assert( pOrTerm->eOperator==WO_EQ );
+  if( pOrTerm->leftCursor!=iCursor ){
+    return 0;
+  }
+  if( pOrTerm->leftColumn!=iColumn ){
+    return 0;
+  }
+  affRight = sqlite3ExprAffinity(pOrTerm->pExpr->pRight);
+  if( affRight==0 ){
+    return 1;
+  }
+  affLeft = sqlite3ExprAffinity(pOrTerm->pExpr->pLeft);
+  if( affRight!=affLeft ){
+    return 0;
+  }
+  return 1;
+}
+
+/*
+** Return true if the given term of an OR clause can be ignored during
+** a check to make sure all OR terms are candidates for optimization.
+** In other words, return true if a call to the orTermIsOptCandidate()
+** above returned false but it is not necessary to disqualify the
+** optimization.
+**
+** Suppose the original OR phrase was this:
+**
+**           a=4  OR  a=11  OR  a=b
+**
+** During analysis, the third term gets flipped around and duplicate
+** so that we are left with this:
+**
+**           a=4  OR  a=11  OR  a=b  OR  b=a
+**
+** Since the last two terms are duplicates, only one of them
+** has to qualify in order for the whole phrase to qualify.  When
+** this routine is called, we know that pOrTerm did not qualify.
+** This routine merely checks to see if pOrTerm has a duplicate that
+** might qualify.  If there is a duplicate that has not yet been
+** disqualified, then return true.  If there are no duplicates, or
+** the duplicate has also been disqualifed, return false.
+*/
+static int orTermHasOkDuplicate(WhereClause *pOr, WhereTerm *pOrTerm){
+  if( pOrTerm->flags & TERM_COPIED ){
+    /* This is the original term.  The duplicate is to the left had
+    ** has not yet been analyzed and thus has not yet been disqualified. */
+    return 1;
+  }
+  if( (pOrTerm->flags & TERM_VIRTUAL)!=0
+     && (pOr->a[pOrTerm->iParent].flags & TERM_OR_OK)!=0 ){
+    /* This is a duplicate term.  The original qualified so this one
+    ** does not have to. */
+    return 1;
+  }
+  /* This is either a singleton term or else it is a duplicate for
+  ** which the original did not qualify.  Either way we are done for. */
+  return 0;
+}
+#endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */
  
  /*
  ** The input to this routine is an WhereTerm structure with only the
@@ -716,9 +802,10 @@ static void exprAnalyze(
      whereClauseInit(&sOr, pWC->pParse, pMaskSet);
      whereSplit(&sOr, pExpr, TK_OR);
      exprAnalyzeAll(pSrc, &sOr);
-    assert( sOr.nTerm>0 );
+    assert( sOr.nTerm>=2 );
      j = 0;
      do{
+      assert( j<sOr.nTerm );
        iColumn = sOr.a[j].leftColumn;
        iCursor = sOr.a[j].leftCursor;
        ok = iCursor>=0;
@@ -726,17 +813,15 @@ static void exprAnalyze(
          if( pOrTerm->eOperator!=WO_EQ ){
            goto or_not_possible;
          }
-        if( pOrTerm->leftCursor==iCursor && pOrTerm->leftColumn==iColumn ){
+        if( orTermIsOptCandidate(pOrTerm, iCursor, iColumn) ){
            pOrTerm->flags |= TERM_OR_OK;
-        }else if( (pOrTerm->flags & TERM_COPIED)!=0 ||
-                    ((pOrTerm->flags & TERM_VIRTUAL)!=0 &&
-                     (sOr.a[pOrTerm->iParent].flags & TERM_OR_OK)!=0) ){
+        }else if( orTermHasOkDuplicate(&sOr, pOrTerm) ){
            pOrTerm->flags &= ~TERM_OR_OK;
          }else{
            ok = 0;
          }
        }
-    }while( !ok && (sOr.a[j++].flags & TERM_COPIED)!=0 && j<sOr.nTerm );
+    }while( !ok && (sOr.a[j++].flags & TERM_COPIED)!=0 && j<2 );
      if( ok ){
        ExprList *pList = 0;
        Expr *pNew, *pDup;
diff --git a/test/where2.test b/test/where2.test

index b8eb296d379d47d72f8e485a643a1e66e1ccacd8..d2f4450c294fd76885685e0a972599d731125196 100644 (file)
--- a/test/where2.test
+++ b/test/where2.test
@@ -12,7 +12,7 @@
  # focus of this file is testing the use of indices in WHERE clauses
  # based on recent changes to the optimizer.
  #
-# $Id: where2.test,v 1.10 2006/11/06 15:10:06 drh Exp $
+# $Id: where2.test,v 1.11 2007/02/23 23:13:34 drh Exp $
  
  set testdir [file dirname $argv0]
  source $testdir/tester.tcl
@@ -219,11 +219,16 @@ ifcapable subquery {
  #
  set ::idx {}
  ifcapable subquery {set ::idx i1w}
-do_test where2-6.1 {
+do_test where2-6.1.1 {
    queryplan {
      SELECT * FROM t1 WHERE w=99 OR w=100 ORDER BY +w
    }
  } [list 99 6 10000 10006 100 6 10201 10207 sort t1 $::idx]
+do_test where2-6.1.2 {
+  queryplan {
+    SELECT * FROM t1 WHERE 99=w OR 100=w ORDER BY +w
+  }
+} [list 99 6 10000 10006 100 6 10201 10207 sort t1 $::idx]
  do_test where2-6.2 {
    queryplan {
      SELECT * FROM t1 WHERE w=99 OR w=100 OR 6=w ORDER BY +w
@@ -258,6 +263,126 @@ do_test where2-6.6 {
    }
  } [list 1 0 4 4 2 1 9 10 sort a i1w b $::idx]
  
+# Ticket #2249.  Make sure the OR optimization is not attempted if
+# comparisons between columns of different affinities are needed.
+#
+do_test where2-6.7 {
+  execsql {
+    CREATE TABLE t2249a(a TEXT UNIQUE);
+    CREATE TABLE t2249b(b INTEGER);
+    INSERT INTO t2249a VALUES('0123');
+    INSERT INTO t2249b VALUES(123);
+  }
+  queryplan {
+    -- Because a is type TEXT and b is type INTEGER, both a and b
+    -- will attempt to convert to NUMERIC before the comparison.
+    -- They will thus compare equal.
+    --
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE a=b;
+  }
+} {123 0123 nosort t2249b {} t2249a {}}
+do_test where2-6.9 {
+  queryplan {
+    -- The + operator removes affinity from the rhs.  No conversions
+    -- occur and the comparison is false.  The result is an empty set.
+    --
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE a=+b;
+  }
+} {nosort t2249b {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.9.2 {
+  # The same thing but with the expression flipped around.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE +b=a
+  }
+} {nosort t2249b {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.10 {
+  queryplan {
+    -- Use + on both sides of the comparison to disable indices
+    -- completely.  Make sure we get the same result.
+    --
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE +a=+b;
+  }
+} {nosort t2249b {} t2249a {}}
+do_test where2-6.11 {
+  # This will not attempt the OR optimization because of the a=b
+  # comparison.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE a=b OR a='hello';
+  }
+} {123 0123 nosort t2249b {} t2249a {}}
+do_test where2-6.11.2 {
+  # Permutations of the expression terms.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE b=a OR a='hello';
+  }
+} {123 0123 nosort t2249b {} t2249a {}}
+do_test where2-6.11.3 {
+  # Permutations of the expression terms.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE 'hello'=a OR b=a;
+  }
+} {123 0123 nosort t2249b {} t2249a {}}
+do_test where2-6.11.4 {
+  # Permutations of the expression terms.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE a='hello' OR b=a;
+  }
+} {123 0123 nosort t2249b {} t2249a {}}
+do_test where2-6.12 {
+  # In this case, the +b disables the affinity conflict and allows
+  # the OR optimization to be used again.  The result is now an empty
+  # set, the same as in where2-6.9.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE a=+b OR a='hello';
+  }
+} {nosort t2249b {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.12.2 {
+  # In this case, the +b disables the affinity conflict and allows
+  # the OR optimization to be used again.  The result is now an empty
+  # set, the same as in where2-6.9.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE a='hello' OR +b=a;
+  }
+} {nosort t2249b {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.12.3 {
+  # In this case, the +b disables the affinity conflict and allows
+  # the OR optimization to be used again.  The result is now an empty
+  # set, the same as in where2-6.9.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE +b=a OR a='hello';
+  }
+} {nosort t2249b {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.13 {
+  # The addition of +a on the second term disabled the OR optimization.
+  # But we should still get the same empty-set result as in where2-6.9.
+  queryplan {
+    SELECT * FROM t2249b CROSS JOIN t2249a WHERE a=+b OR +a='hello';
+  }
+} {nosort t2249b {} t2249a {}}
+
+# Variations on the order of terms in a WHERE clause in order
+# to make sure the OR optimizer can recognize them all.
+do_test where2-6.20 {
+  queryplan {
+    SELECT * FROM t2249a x CROSS JOIN t2249a y WHERE x.a=y.a
+  }
+} {0123 0123 nosort x {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.21 {
+  queryplan {
+    SELECT * FROM t2249a x CROSS JOIN t2249a y WHERE x.a=y.a OR y.a='hello'
+  }
+} {0123 0123 nosort x {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.22 {
+  queryplan {
+    SELECT * FROM t2249a x CROSS JOIN t2249a y WHERE y.a=x.a OR y.a='hello'
+  }
+} {0123 0123 nosort x {} {} sqlite_autoindex_t2249a_1}
+do_test where2-6.23 {
+  queryplan {
+    SELECT * FROM t2249a x CROSS JOIN t2249a y WHERE y.a='hello' OR x.a=y.a
+  }
+} {0123 0123 nosort x {} {} sqlite_autoindex_t2249a_1}
+
  # Unique queries (queries that are guaranteed to return only a single
  # row of result) do not call the sorter.  But all tables must give
  # a unique result.  If any one table in the join does not give a unique
author	drh <drh@noemail.net>
	Fri, 23 Feb 2007 23:13:33 +0000 (23:13 +0000)
committer	drh <drh@noemail.net>
	Fri, 23 Feb 2007 23:13:33 +0000 (23:13 +0000)
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/where.c		patch \| blob \| blame \| history
test/where2.test		patch \| blob \| blame \| history