From: drh Date: Sun, 28 Aug 2005 17:00:23 +0000 (+0000) Subject: The LIKE optimization does the right thing when collating sequences are X-Git-Tag: version-3.6.10~3522 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d64fe2f374f7a278bff67df9968f939b60faa222;p=thirdparty%2Fsqlite.git The LIKE optimization does the right thing when collating sequences are present. LIKE expressions where the left-hand side has COLLATE NOCASE are optimized in the default case. (CVS 2637) FossilOrigin-Name: ef84ff795c85e9d28f1cac84ff42d8d4ef84cfc4 --- diff --git a/manifest b/manifest index 8989268f83..28b69bdb5c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improvements\sto\sthe\sformatting\sand\slayout\sof\sthe\scode\sin\sthe\sprevious\scheckin.\s(CVS\s2636) -D 2005-08-28T01:38:44 +C The\sLIKE\soptimization\sdoes\sthe\sright\sthing\swhen\scollating\ssequences\sare\r\npresent.\s\sLIKE\sexpressions\swhere\sthe\sleft-hand\sside\shas\sCOLLATE\sNOCASE\r\nare\soptimized\sin\sthe\sdefault\scase.\s(CVS\s2637) +D 2005-08-28T17:00:23 F Makefile.in 12784cdce5ffc8dfb707300c34e4f1eb3b8a14f1 F Makefile.linux-gcc 06be33b2a9ad4f005a5f42b22c4a19dab3cbb5c7 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -41,12 +41,12 @@ F src/date.c 7444b0900a28da77e57e3337a636873cff0ae940 F src/delete.c be1fc25c9e109cd8cbab42a43ee696263da7c04b F src/experimental.c 50c1e3b34f752f4ac10c36f287db095c2b61766d F src/expr.c 1916cb22c585e1aa0d1e25a8efe7497004b6ae32 -F src/func.c 34085cf518928c7aa61c2d5029e25b0326108887 +F src/func.c 9da04a6241309a612cf610715944c6a2aaf0f297 F src/hash.c 2b1b13f7400e179631c83a1be0c664608c8f021f F src/hash.h 1b0c445e1c89ff2aaad9b4605ba61375af001e84 F src/insert.c 484c73bc1309f283a31baa0e114f3ee980536397 F src/legacy.c d58ea507bce885298a2c8c3cbb0f4bff5d47830b -F src/main.c dce7e4bf2280e57de1492dec61c7310d14b5e179 +F src/main.c 60eb224fa5fe65e92dcdfdc542c94bae5e4e2e84 F src/md5.c 7ae1c39044b95de2f62e066f47bb1deb880a1070 F src/os.h c4b34bd4d6fea51a420f337468b907f4edecb161 F src/os_common.h 0e7f428ba0a6c40a61bc56c4e96f493231301b73 @@ -66,7 +66,7 @@ F src/random.c 90adff4e73a3b249eb4f1fc2a6ff9cf78c7233a4 F src/select.c f8a9993bcd953eb325c8c3f32985cc52b2947354 F src/shell.c 7fb744da457b0d11e0af7f6a2f6b000fc09fe588 F src/sqlite.h.in a3b75a6b2e66865fba4ec1b698d00c7d95fe27a2 -F src/sqliteInt.h e5fb91af1d607f3bc84bfb7da8534fd3298a38b0 +F src/sqliteInt.h fe9520e940c46fa6970a9cb7813b44c3f8925638 F src/table.c 25b3ff2b39b7d87e8d4a5da0713d68dfc06cbee9 F src/tclsqlite.c e86b5483de6cb1ec1154cc5b76e3427d4b214961 F src/test1.c 6a36fa85e9d0d4f0eaa7eadd087e40ce9cf35074 @@ -87,7 +87,7 @@ F src/vdbeapi.c f1adebb5e3fe4724ed0e1a82c4a61809d7e15e9e F src/vdbeaux.c 192e0dbeaaa0bfa652b0c2579c19894e5e5626fc F src/vdbefifo.c 9efb94c8c3f4c979ebd0028219483f88e57584f5 F src/vdbemem.c 4732fd4d1a75dc38549493d7f9a81d02bf7c59b5 -F src/where.c 485041aa51fb33f43b346e018f7c01422847f364 +F src/where.c 14a2f906f5c6d6353690c4cb9c3702eaf4da8944 F tclinstaller.tcl 046e3624671962dc50f0481d7c25b38ef803eb42 F test/all.test 7f0988442ab811dfa41793b5b550f5828ce316f3 F test/alter.test 9d6837a3d946b73df692b7cef2a7644d2e2f6bc6 @@ -164,7 +164,7 @@ F test/join4.test 1a352e4e267114444c29266ce79e941af5885916 F test/journal1.test 36f2d1bb9bf03f790f43fbdb439e44c0657fab19 F test/lastinsert.test eaa89c6ee1f13062d87139fd32c1e56753d2fd89 F test/laststmtchanges.test 19a6d0c11f7a31dc45465b495f7b845a62cbec17 -F test/like.test b1e77e327add19ac4ddc371cbf4f208b344ab878 +F test/like.test 145382e6a1f3d2edf266ca7d0236ab1b7c0ba66f F test/limit.test 270b076f31c5c32f7187de5727e74da4de43e477 F test/lock.test 9b7afcb24f53d24da502abb33daaad2cd6d44107 F test/lock2.test 59c3dd7d9b24d1bf7ec91b2d1541c37e97939d5f @@ -282,7 +282,7 @@ F www/faq.tcl 49f31a703f74c71ce66da646aaf18b07a5042672 F www/fileformat.tcl 900c95b9633abc3dcfc384d9ddd8eb4876793059 F www/formatchng.tcl 053ddb73646701353a5b1c9ca6274d5900739b45 F www/index.tcl b5eb631c918006cf3ea9b7347d084cc017b1f32a -F www/lang.tcl 9e27ecd2a2d3194191f820068f7bc70714178882 +F www/lang.tcl 422b21b899f6d84dd3fdd2d4b204061b6912efd2 F www/lockingv3.tcl f59b19d6c8920a931f096699d6faaf61c05db55f F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c F www/nulls.tcl ec35193f92485b87b90a994a01d0171b58823fcf @@ -290,7 +290,7 @@ F www/oldnews.tcl 1a808d86882621557774bf7741ed81c7f4ef9f19 F www/omitted.tcl f1e57977299c3ed54fbae55e4b5ea6a64de39e19 F www/opcode.tcl 5bd68059416b223515a680d410a9f7cb6736485f F www/optimizing.tcl f0b2538988d1bbad16cbfe63ec6e8f48c9eb04e5 -F www/pragma.tcl 56d29b0c14b38d61b12c17f3e3e6025a8e15c057 +F www/pragma.tcl 44f7b665ca598ad24724f35991653638a36a6e3f F www/quickstart.tcl 6f6f694b6139be2d967b1492eb9a6bdf7058aa60 F www/speed.tcl 656ed5be8cc9d536353e1a96927b925634a62933 F www/sqlite.tcl b51fd15f0531a54874de785a9efba323eecd5975 @@ -299,7 +299,7 @@ F www/tclsqlite.tcl 3df553505b6efcad08f91e9b975deb2e6c9bb955 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl a99cf5f6d8bd4d5537584a2b342f0fb9fa601d8b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P 07b4892149a436dbd904c781b46e3b9a82a7a744 -R 56bce97242dc7452e7c0c82a7290a18f +P 73b430de0c0f3cd230861fc1a53691818f17de0d +R b31a17c35808ac3ee086098bd4d86db8 U drh -Z c5016f965428049b0b8ec32bf981d8fb +Z 655e339e68d859a37945f6b381f71e9c diff --git a/manifest.uuid b/manifest.uuid index 9e582500ed..d7883782dc 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -73b430de0c0f3cd230861fc1a53691818f17de0d \ No newline at end of file +ef84ff795c85e9d28f1cac84ff42d8d4ef84cfc4 \ No newline at end of file diff --git a/src/func.c b/src/func.c index c7baaa086c..919b3a83a4 100644 --- a/src/func.c +++ b/src/func.c @@ -16,7 +16,7 @@ ** sqliteRegisterBuildinFunctions() found at the bottom of the file. ** All other code has file scope. ** -** $Id: func.c,v 1.105 2005/08/27 13:16:33 drh Exp $ +** $Id: func.c,v 1.106 2005/08/28 17:00:23 drh Exp $ */ #include "sqliteInt.h" #include @@ -1041,11 +1041,11 @@ void sqlite3RegisterBuiltinFunctions(sqlite3 *db){ /* ** Set the LIKEOPT flag on the 2-argument function with the given name. */ -static void setLikeOptFlag(sqlite3 *db, const char *zName){ +static void setLikeOptFlag(sqlite3 *db, const char *zName, int flagVal){ FuncDef *pDef; pDef = sqlite3FindFunction(db, zName, strlen(zName), 2, SQLITE_UTF8, 0); if( pDef ){ - pDef->flags = SQLITE_FUNC_LIKEOPT; + pDef->flags = flagVal; } } @@ -1065,10 +1065,9 @@ void sqlite3RegisterLikeFunctions(sqlite3 *db, int caseSensitive){ sqlite3_create_function(db, "like", 3, SQLITE_UTF8, pInfo, likeFunc, 0, 0); sqlite3_create_function(db, "glob", 2, SQLITE_UTF8, (struct compareInfo*)&globInfo, likeFunc, 0,0); - setLikeOptFlag(db, "glob"); - if( caseSensitive ){ - setLikeOptFlag(db, "like"); - } + setLikeOptFlag(db, "glob", SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE); + setLikeOptFlag(db, "like", + caseSensitive ? (SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE) : SQLITE_FUNC_LIKE); } /* @@ -1078,7 +1077,7 @@ void sqlite3RegisterLikeFunctions(sqlite3 *db, int caseSensitive){ ** return TRUE. If the function is not a LIKE-style function then ** return FALSE. */ -int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, char *aWc){ +int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){ FuncDef *pDef; if( pExpr->op!=TK_FUNCTION ){ return 0; @@ -1088,7 +1087,7 @@ int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, char *aWc){ } pDef = sqlite3FindFunction(db, pExpr->token.z, pExpr->token.n, 2, SQLITE_UTF8, 0); - if( pDef==0 || (pDef->flags & SQLITE_FUNC_LIKEOPT)==0 ){ + if( pDef==0 || (pDef->flags & SQLITE_FUNC_LIKE)==0 ){ return 0; } @@ -1100,6 +1099,6 @@ int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, char *aWc){ assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll ); assert( &((char*)&likeInfoAlt)[1] == (char*)&likeInfoAlt.matchOne ); assert( &((char*)&likeInfoAlt)[2] == (char*)&likeInfoAlt.matchSet ); - + *pIsNocase = (pDef->flags & SQLITE_FUNC_CASE)==0; return 1; } diff --git a/src/main.c b/src/main.c index a84f94f49c..41d1045fc5 100644 --- a/src/main.c +++ b/src/main.c @@ -14,7 +14,7 @@ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. ** -** $Id: main.c,v 1.298 2005/08/14 01:20:39 drh Exp $ +** $Id: main.c,v 1.299 2005/08/28 17:00:23 drh Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -694,6 +694,7 @@ static int openDatabase( ){ sqlite3 *db; int rc, i; + CollSeq *pColl; /* Allocate the sqlite data structure */ db = sqliteMalloc( sizeof(sqlite3) ); @@ -730,6 +731,13 @@ static int openDatabase( /* Also add a UTF-8 case-insensitive collation sequence. */ sqlite3_create_collation(db, "NOCASE", SQLITE_UTF8, 0, nocaseCollatingFunc); + /* Set flags on the built-in collating sequences */ + db->pDfltColl->type = SQLITE_COLL_BINARY; + pColl = sqlite3FindCollSeq(db, SQLITE_UTF8, "NOCASE", 6, 0); + if( pColl ){ + pColl->type = SQLITE_COLL_NOCASE; + } + /* Open the backend database driver */ rc = sqlite3BtreeFactory(db, zFilename, 0, MAX_PAGES, &db->aDb[0].pBt); if( rc!=SQLITE_OK ){ @@ -901,7 +909,7 @@ int sqlite3_create_collation( pColl = sqlite3FindCollSeq(db, (u8)enc, zName, strlen(zName), 1); if( 0==pColl ){ - rc = SQLITE_NOMEM; + rc = SQLITE_NOMEM; }else{ pColl->xCmp = xCompare; pColl->pUser = pCtx; diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 180c0e2871..dba03e1f1b 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.403 2005/08/19 00:14:42 drh Exp $ +** @(#) $Id: sqliteInt.h,v 1.404 2005/08/28 17:00:23 drh Exp $ */ #ifndef _SQLITEINT_H_ #define _SQLITEINT_H_ @@ -511,7 +511,8 @@ struct FuncDef { /* ** Possible values for FuncDef.flags */ -#define SQLITE_FUNC_LIKEOPT 0x01 /* Candidate for the LIKE optimization */ +#define SQLITE_FUNC_LIKE 0x01 /* Candidate for the LIKE optimization */ +#define SQLITE_FUNC_CASE 0x02 /* Case-sensitive LIKE-type function */ /* ** information about each column of an SQL table is held in an instance @@ -551,10 +552,19 @@ struct Column { struct CollSeq { char *zName; /* Name of the collating sequence, UTF-8 encoded */ u8 enc; /* Text encoding handled by xCmp() */ + u8 type; /* One of the SQLITE_COLL_... values below */ void *pUser; /* First argument to xCmp() */ int (*xCmp)(void*,int, const void*, int, const void*); }; +/* +** Allowed values of CollSeq flags: +*/ +#define SQLITE_COLL_BINARY 1 /* The default memcmp() collating sequence */ +#define SQLITE_COLL_NOCASE 2 /* The built-in NOCASE collating sequence */ +#define SQLITE_COLL_REVERSE 3 /* The built-in REVERSE collating sequence */ +#define SQLITE_COLL_USER 0 /* Any other user-defined collating sequence */ + /* ** A sort order can be either ASC or DESC. */ @@ -1583,7 +1593,7 @@ int sqlite3FindDb(sqlite3*, Token*); void sqlite3AnalysisLoad(sqlite3*,int iDB); void sqlite3DefaultRowEst(Index*); void sqlite3RegisterLikeFunctions(sqlite3*, int); -int sqlite3IsLikeFunction(sqlite3*,Expr*,char*); +int sqlite3IsLikeFunction(sqlite3*,Expr*,int*,char*); #ifdef SQLITE_SSE #include "sseInt.h" diff --git a/src/where.c b/src/where.c index 4e355e8022..3e59338ecd 100644 --- a/src/where.c +++ b/src/where.c @@ -16,7 +16,7 @@ ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** -** $Id: where.c,v 1.165 2005/08/24 03:52:19 drh Exp $ +** $Id: where.c,v 1.166 2005/08/28 17:00:25 drh Exp $ */ #include "sqliteInt.h" @@ -479,8 +479,11 @@ static int isLikeOrGlob( Expr *pRight, *pLeft; ExprList *pList; int c, cnt; + int noCase; char wc[3]; - if( !sqlite3IsLikeFunction(db, pExpr, wc) ){ + CollSeq *pColl; + + if( !sqlite3IsLikeFunction(db, pExpr, &noCase, wc) ){ return 0; } pList = pExpr->pList; @@ -492,6 +495,14 @@ static int isLikeOrGlob( if( pLeft->op!=TK_COLUMN ){ return 0; } + pColl = pLeft->pColl; + if( pColl==0 ){ + pColl = db->pDfltColl; + } + if( (pColl->type!=SQLITE_COLL_BINARY || noCase) && + (pColl->type!=SQLITE_COLL_NOCASE || !noCase) ){ + return 0; + } sqlite3DequoteExpr(pRight); z = pRight->token.z; for(cnt=0; (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2]; cnt++){} diff --git a/test/like.test b/test/like.test index 6a951cde23..d73c657df7 100644 --- a/test/like.test +++ b/test/like.test @@ -13,7 +13,7 @@ # in particular the optimizations that occur to help those operators # run faster. # -# $Id: like.test,v 1.2 2005/08/19 00:14:43 drh Exp $ +# $Id: like.test,v 1.3 2005/08/28 17:00:26 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -312,5 +312,58 @@ do_test like-4.6 { set sqlite_like_count } 12 +# Collating sequences on the index disable the LIKE optimization. +# Or if the NOCASE collating sequence is used, the LIKE optimization +# is enabled when case_sensitive_like is OFF. +# +do_test like-5.1 { + execsql {PRAGMA case_sensitive_like=off} + set sqlite_like_count 0 + queryplan { + SELECT x FROM t1 WHERE x LIKE 'abc%' ORDER BY 1 + } +} {ABC {ABC abc xyz} abc abcd nosort {} i1} +do_test like-5.2 { + set sqlite_like_count +} 12 +do_test like-5.3 { + execsql { + CREATE TABLE t2(x COLLATE NOCASE); + INSERT INTO t2 SELECT * FROM t1; + CREATE INDEX i2 ON t2(x COLLATE NOCASE); + } + set sqlite_like_count 0 + queryplan { + SELECT x FROM t2 WHERE x LIKE 'abc%' ORDER BY 1 + } +} {abc ABC {ABC abc xyz} abcd nosort {} i2} +do_test like-5.4 { + set sqlite_like_count +} 0 +do_test like-5.5 { + execsql { + PRAGMA case_sensitive_like=on; + } + set sqlite_like_count 0 + queryplan { + SELECT x FROM t2 WHERE x LIKE 'abc%' ORDER BY 1 + } +} {abc abcd nosort {} i2} +do_test like-5.6 { + set sqlite_like_count +} 12 +do_test like-5.7 { + execsql { + PRAGMA case_sensitive_like=off; + } + set sqlite_like_count 0 + queryplan { + SELECT x FROM t2 WHERE x GLOB 'abc*' ORDER BY 1 + } +} {abc abcd nosort {} i2} +do_test like-5.8 { + set sqlite_like_count +} 12 + finish_test diff --git a/www/lang.tcl b/www/lang.tcl index 714ccb7b27..1c46b01385 100644 --- a/www/lang.tcl +++ b/www/lang.tcl @@ -1,7 +1,7 @@ # # Run this Tcl script to generate the lang-*.html files. # -set rcsid {$Id: lang.tcl,v 1.97 2005/08/23 11:03:03 drh Exp $} +set rcsid {$Id: lang.tcl,v 1.98 2005/08/28 17:00:26 drh Exp $} source common.tcl if {[llength $argv]>0} { @@ -213,7 +213,8 @@ then only indices associated with that one table are analyzed.

table named sqlite_stat1. Future enhancements may create additional tables with the same name pattern except with the "1" changed to a different digit. The sqlite_stat1 table cannot -be DROPped, but it all the content can be DELETEd which as the +be DROPped, +but it all the content can be DELETEd which has the same effect.

} diff --git a/www/pragma.tcl b/www/pragma.tcl index 75221f875b..7ed9d41843 100644 --- a/www/pragma.tcl +++ b/www/pragma.tcl @@ -1,7 +1,7 @@ # # Run this Tcl script to generate the pragma.html file. # -set rcsid {$Id: pragma.tcl,v 1.15 2005/06/07 20:07:24 drh Exp $} +set rcsid {$Id: pragma.tcl,v 1.16 2005/08/28 17:00:26 drh Exp $} source common.tcl header {Pragma statements supported by SQLite} @@ -108,6 +108,16 @@ puts { the default_cache_size pragma to check the cache size permanently.

+ +
  • PRAGMA case_sensitive_like; +
    PRAGMA case_sensitive_like =
    0 | 1;

    +

    The default behavior of the LIKE operator is to ignore case + for latin1 characters. Hence, by default 'a' LIKE 'A' is + true. The case_sensitive_like pragma can be turned on to change + this behavior. When case_sensitive_like is enabled, + 'a' LIKE 'A' is false but 'a' LIKE 'a' is still true.

    +
  • +
  • PRAGMA count_changes;
    PRAGMA count_changes =
    0 | 1;