From: drh Date: Thu, 22 Jul 2010 17:49:52 +0000 (+0000) Subject: Enhance the LIKE/GLOB query optimization so that it works as long as there X-Git-Tag: version-3.7.2~102 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8342e49f6299287de14d8c2a4f7d8d7d3c327a81;p=thirdparty%2Fsqlite.git Enhance the LIKE/GLOB query optimization so that it works as long as there is an index with the appropriate collating sequence and even if the default collating sequence of the column is different. Ticket [4711020446da7d93d99]. FossilOrigin-Name: 9f932655f9eb9fdab16d7deed98b7cad414e0ca6 --- diff --git a/manifest b/manifest index 940a1ebc20..374f0963de 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,8 @@ -C Add\stest/threadtest3.c,\scontaining\smulti-thread\stests\simplemented\sin\sC. -D 2010-07-22T15:44:06 +-----BEGIN PGP SIGNED MESSAGE----- +Hash: SHA1 + +C Enhance\sthe\sLIKE/GLOB\squery\soptimization\sso\sthat\sit\sworks\sas\slong\sas\sthere\nis\san\sindex\swith\sthe\sappropriate\scollating\ssequence\sand\seven\sif\sthe\sdefault\ncollating\ssequence\sof\sthe\scolumn\sis\sdifferent.\nTicket\s[4711020446da7d93d99]. +D 2010-07-22T17:49:53 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in ec08dc838fd8110fe24c92e5130bcd91cbb1ff2e F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -122,7 +125,7 @@ F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac F src/ctime.c 4f3aadad62c6c9f0d4e5a96718516ac4e3c598df F src/date.c 5dd8448a0bfea8d31fb14cff487d0c06ff8c8b20 F src/delete.c 41cb4f78557810eecc167b7e2317de7e12d20929 -F src/expr.c b2b053429575bf964c64bdf5459c5cbbe5bf93b8 +F src/expr.c 7490fc3b16414d1a45d5acadf559317f9244891f F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb F src/fkey.c e2116672a6bd610dc888e27df292ebc7999c9bb0 F src/func.c 0c28599430856631216b6c0131c51c89bf516026 @@ -158,7 +161,7 @@ F src/os_unix.c 3109e0e5a0d5551bab2e8c7322b20a3b8b171248 F src/os_win.c 1f8b0a1a5bcf6289e7754d0d3c16cec16d4c93ab F src/pager.c 78ca1e1f3315c8227431c403c04d791dccf242fb F src/pager.h 879fdde5a102d2f21a3135d6f647530b21c2796c -F src/parse.y 220a11ac72e2c9dffbf4cbe5fe462f328bd8d884 +F src/parse.y 12b7ebd61ea54f0e1b1083ff69cc2c8ce9353d58 F src/pcache.c 1e9aa2dbc0845b52e1b51cc39753b6d1e041cb07 F src/pcache.h c683390d50f856d4cd8e24342ae62027d1bb6050 F src/pcache1.c 3a7c28f46a61b43ff0b5c087a7983c154f4b264c @@ -172,7 +175,7 @@ F src/select.c 4903ff1bbd08b55cbce00ea43c645530de41b362 F src/shell.c fd4ccdb37c3b68de0623eb938a649e0990710714 F src/sqlite.h.in 2585fc82c922f2772e201e60a76d5fd1ca18370e F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89 -F src/sqliteInt.h d9e42f2029d4c526f9ba960bda1980ef17429c30 +F src/sqliteInt.h cb9ca3291acfd3f10859bcc4b2bb463e6e874939 F src/sqliteLimit.h 196e2f83c3b444c4548fc1874f52f84fdbda40f3 F src/status.c 4df6fe7dce2d256130b905847c6c60055882bdbe F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e @@ -230,7 +233,7 @@ F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda F src/wal.c 0925601f3299c2941a67c9cfff41ee710f70ca82 F src/wal.h 906c85760598b18584921fe08008435aa4eeeeb2 F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 3ba8350d5a379ca8e22495bc1f65b5bbca1f18a5 +F src/where.c 19a0f80a759ef288ed925e64944bbbbae3215abf F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 6745008c144bd2956d58864d21f7b304689c1cce @@ -476,7 +479,7 @@ F test/jrnlmode3.test cfcdb12b90e640a23b92785a002d96c0624c8710 F test/keyword1.test a2400977a2e4fde43bf33754c2929fda34dbca05 F test/lastinsert.test 474d519c68cb79d07ecae56a763aa7f322c72f51 F test/laststmtchanges.test ae613f53819206b3222771828d024154d51db200 -F test/like.test 1048b39d3bcb0bd116d6a457919a48245c8a5711 +F test/like.test 565d240313f15a8afa8d7098dc9fe303c1e2a496 F test/like2.test 3b2ee13149ba4a8a60b59756f4e5d345573852da F test/limit.test 2db7b3b34fb925b8e847d583d2eb67531d0ce67e F test/loadext.test 0393ce12d9616aa87597dd0ec88181de181f6db0 @@ -838,7 +841,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 613a87d62fd1f8526d53136efae5adae91824bcf -R 4eb5aa4fc422947226941c86eb780e48 -U dan -Z cb1f88a7778fdc4813d9382ac4a0ad2a +P aad88cf54e9e55b757d2ece9e39d436e71dcaa9f +R f3a265c1fe3510a6c260261e20607c66 +U drh +Z 52104ed4afc95211e3a590b305b27931 +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFMSITIoxKgR168RlERAgzNAJ9cXNUZicxUW56VFjj2wMFHtrdPPwCffxG8 +buAvGUkBeiCRzv/I4BN3y5M= +=lESR +-----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 7c66f67894..7e2f5c6ed0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -aad88cf54e9e55b757d2ece9e39d436e71dcaa9f \ No newline at end of file +9f932655f9eb9fdab16d7deed98b7cad414e0ca6 \ No newline at end of file diff --git a/src/expr.c b/src/expr.c index 2046433165..bd28bf9394 100644 --- a/src/expr.c +++ b/src/expr.c @@ -55,6 +55,18 @@ char sqlite3ExprAffinity(Expr *pExpr){ return pExpr->affinity; } +/* +** Set the explicit collating sequence for an expression to the +** collating sequence supplied in the second argument. +*/ +Expr *sqlite3ExprSetColl(Expr *pExpr, CollSeq *pColl){ + if( pExpr && pColl ){ + pExpr->pColl = pColl; + pExpr->flags |= EP_ExpCollate; + } + return pExpr; +} + /* ** Set the collating sequence for expression pExpr to be the collating ** sequence named by pToken. Return a pointer to the revised expression. @@ -62,18 +74,13 @@ char sqlite3ExprAffinity(Expr *pExpr){ ** flag. An explicit collating sequence will override implicit ** collating sequences. */ -Expr *sqlite3ExprSetColl(Parse *pParse, Expr *pExpr, Token *pCollName){ +Expr *sqlite3ExprSetCollByToken(Parse *pParse, Expr *pExpr, Token *pCollName){ char *zColl = 0; /* Dequoted name of collation sequence */ CollSeq *pColl; sqlite3 *db = pParse->db; zColl = sqlite3NameFromToken(db, pCollName); - if( pExpr && zColl ){ - pColl = sqlite3LocateCollSeq(pParse, zColl); - if( pColl ){ - pExpr->pColl = pColl; - pExpr->flags |= EP_ExpCollate; - } - } + pColl = sqlite3LocateCollSeq(pParse, zColl); + sqlite3ExprSetColl(pExpr, pColl); sqlite3DbFree(db, zColl); return pExpr; } diff --git a/src/parse.y b/src/parse.y index c32f278b6f..92abd5ce58 100644 --- a/src/parse.y +++ b/src/parse.y @@ -780,7 +780,7 @@ expr(A) ::= VARIABLE(X). { spanSet(&A, &X, &X); } expr(A) ::= expr(E) COLLATE ids(C). { - A.pExpr = sqlite3ExprSetColl(pParse, E.pExpr, &C); + A.pExpr = sqlite3ExprSetCollByToken(pParse, E.pExpr, &C); A.zStart = E.zStart; A.zEnd = &C.z[C.n]; } @@ -1108,7 +1108,7 @@ idxlist(A) ::= idxlist(X) COMMA nm(Y) collate(C) sortorder(Z). { Expr *p = 0; if( C.n>0 ){ p = sqlite3Expr(pParse->db, TK_COLUMN, 0); - sqlite3ExprSetColl(pParse, p, &C); + sqlite3ExprSetCollByToken(pParse, p, &C); } A = sqlite3ExprListAppend(pParse,X, p); sqlite3ExprListSetName(pParse,A,&Y,1); @@ -1119,7 +1119,7 @@ idxlist(A) ::= nm(Y) collate(C) sortorder(Z). { Expr *p = 0; if( C.n>0 ){ p = sqlite3PExpr(pParse, TK_COLUMN, 0, 0, 0); - sqlite3ExprSetColl(pParse, p, &C); + sqlite3ExprSetCollByToken(pParse, p, &C); } A = sqlite3ExprListAppend(pParse,0, p); sqlite3ExprListSetName(pParse, A, &Y, 1); diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 15fedf52d4..24394aebbe 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -2869,7 +2869,8 @@ int sqlite3ReadSchema(Parse *pParse); CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int); CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName); CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr); -Expr *sqlite3ExprSetColl(Parse *pParse, Expr *, Token *); +Expr *sqlite3ExprSetColl(Expr*, CollSeq*); +Expr *sqlite3ExprSetCollByToken(Parse *pParse, Expr*, Token*); int sqlite3CheckCollSeq(Parse *, CollSeq *); int sqlite3CheckObjectName(Parse *, const char *); void sqlite3VdbeSetChanges(sqlite3 *, int); diff --git a/src/where.c b/src/where.c index 4b2857a2b1..547df68425 100644 --- a/src/where.c +++ b/src/where.c @@ -635,7 +635,6 @@ static int isLikeOrGlob( int c; /* One character in z[] */ int cnt; /* Number of non-wildcard prefix characters */ char wc[3]; /* Wildcard characters */ - CollSeq *pColl; /* Collating sequence for LHS */ sqlite3 *db = pParse->db; /* Database connection */ sqlite3_value *pVal = 0; int op; /* Opcode of pRight */ @@ -654,19 +653,6 @@ static int isLikeOrGlob( return 0; } assert( pLeft->iColumn!=(-1) ); /* Because IPK never has AFF_TEXT */ - pColl = sqlite3ExprCollSeq(pParse, pLeft); - if( pColl==0 ) return 0; /* Happens when LHS has an undefined collation */ - if( (pColl->type!=SQLITE_COLL_BINARY || *pnoCase) && - (pColl->type!=SQLITE_COLL_NOCASE || !*pnoCase) ){ - /* IMP: R-09003-32046 For the GLOB operator, the column must use the - ** default BINARY collating sequence. - ** IMP: R-41408-28306 For the LIKE operator, if case_sensitive_like mode - ** is enabled then the column must use the default BINARY collating - ** sequence, or if case_sensitive_like mode is disabled then the column - ** must use the built-in NOCASE collating sequence. - */ - return 0; - } pRight = pList->a[0].pExpr; op = pRight->op; @@ -1246,6 +1232,7 @@ static void exprAnalyze( Expr *pNewExpr2; int idxNew1; int idxNew2; + CollSeq *pColl; /* Collating sequence to use */ pLeft = pExpr->x.pList->a[1].pExpr; pStr2 = sqlite3ExprDup(db, pStr1, 0); @@ -1266,11 +1253,16 @@ static void exprAnalyze( } *pC = c + 1; } - pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprDup(db,pLeft,0),pStr1,0); + pColl = sqlite3FindCollSeq(db, SQLITE_UTF8, noCase ? "NOCASE" : "BINARY",0); + pNewExpr1 = sqlite3PExpr(pParse, TK_GE, + sqlite3ExprSetColl(sqlite3ExprDup(db,pLeft,0), pColl), + pStr1, 0); idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC); testcase( idxNew1==0 ); exprAnalyze(pSrc, pWC, idxNew1); - pNewExpr2 = sqlite3PExpr(pParse, TK_LT, sqlite3ExprDup(db,pLeft,0),pStr2,0); + pNewExpr2 = sqlite3PExpr(pParse, TK_LT, + sqlite3ExprSetColl(sqlite3ExprDup(db,pLeft,0), pColl), + pStr2, 0); idxNew2 = whereClauseInsert(pWC, pNewExpr2, TERM_VIRTUAL|TERM_DYNAMIC); testcase( idxNew2==0 ); exprAnalyze(pSrc, pWC, idxNew2); diff --git a/test/like.test b/test/like.test index b092045256..229f386880 100644 --- a/test/like.test +++ b/test/like.test @@ -778,5 +778,94 @@ do_test like-10.15 { } } {12 123 scan 5 like 6} +# LIKE and GLOB where the default collating sequence is not appropriate +# but an index with the appropriate collating sequence exists. +# +do_test like-11.0 { + execsql { + CREATE TABLE t11( + a INTEGER PRIMARY KEY, + b TEXT COLLATE nocase, + c TEXT COLLATE binary + ); + INSERT INTO t11 VALUES(1, 'a','a'); + INSERT INTO t11 VALUES(2, 'ab','ab'); + INSERT INTO t11 VALUES(3, 'abc','abc'); + INSERT INTO t11 VALUES(4, 'abcd','abcd'); + INSERT INTO t11 VALUES(5, 'A','A'); + INSERT INTO t11 VALUES(6, 'AB','AB'); + INSERT INTO t11 VALUES(7, 'ABC','ABC'); + INSERT INTO t11 VALUES(8, 'ABCD','ABCD'); + INSERT INTO t11 VALUES(9, 'x','x'); + INSERT INTO t11 VALUES(10, 'yz','yz'); + INSERT INTO t11 VALUES(11, 'X','X'); + INSERT INTO t11 VALUES(12, 'YZ','YZ'); + SELECT count(*) FROM t11; + } +} {12} +do_test like-11.1 { + queryplan { + PRAGMA case_sensitive_like=OFF; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD nosort t11 *} +do_test like-11.2 { + queryplan { + PRAGMA case_sensitive_like=ON; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd nosort t11 *} +do_test like-11.3 { + queryplan { + PRAGMA case_sensitive_like=OFF; + CREATE INDEX t11b ON t11(b); + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11b} +do_test like-11.4 { + queryplan { + PRAGMA case_sensitive_like=ON; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd nosort t11 *} +do_test like-11.5 { + queryplan { + PRAGMA case_sensitive_like=OFF; + DROP INDEX t11b; + CREATE INDEX t11bnc ON t11(b COLLATE nocase); + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11bnc} +do_test like-11.6 { + queryplan { + CREATE INDEX t11bb ON t11(b COLLATE binary); + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11bnc} +do_test like-11.7 { + queryplan { + PRAGMA case_sensitive_like=ON; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd sort {} t11bb} +do_test like-11.8 { + queryplan { + PRAGMA case_sensitive_like=OFF; + SELECT b FROM t11 WHERE b GLOB 'abc*' ORDER BY a; + } +} {abc abcd sort {} t11bb} +do_test like-11.9 { + queryplan { + CREATE INDEX t11cnc ON t11(c COLLATE nocase); + CREATE INDEX t11cb ON t11(c COLLATE binary); + SELECT c FROM t11 WHERE c LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11cnc} +do_test like-11.10 { + queryplan { + SELECT c FROM t11 WHERE c GLOB 'abc*' ORDER BY a; + } +} {abc abcd sort {} t11cb} + finish_test