From: drh Date: Thu, 25 Sep 2014 11:08:57 +0000 (+0000) Subject: Still more performance enhancements to the LIKE and GLOB operators. X-Git-Tag: version-3.8.7~67 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9fdfdc893bb14459ed6cfc142ca7208ecdb3abc8;p=thirdparty%2Fsqlite.git Still more performance enhancements to the LIKE and GLOB operators. FossilOrigin-Name: 6c8924cacc2b875270770fed2cc3b1884f57a655 --- diff --git a/manifest b/manifest index 37342c612c..fb9c0ed010 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C More\sperformance\soptimization\sfor\sthe\sLIKE\sand\sGLOB\soperators. -D 2014-09-25T03:51:37.139 +C Still\smore\sperformance\senhancements\sto\sthe\sLIKE\sand\sGLOB\soperators. +D 2014-09-25T11:08:57.081 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -184,7 +184,7 @@ F src/delete.c fae81cc2eb14b75267d4f47d3cfc9ae02aae726f F src/expr.c f32119248996680aa73c5c37bfdd42820804dc17 F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb F src/fkey.c da985ae673efef2c712caef825a5d2edb087ead7 -F src/func.c 610b18afde750686785cdad9196b9fb1b03dc9b3 +F src/func.c 727a324e87a3392a47e44568b901d2fb96ba0ed4 F src/global.c 5110fa12e09729b84eee0191c984ec4008e21937 F src/hash.c 4263fbc955f26c2e8cdc0cf214bc42435aa4e4f5 F src/hash.h c8f3c31722cf3277d03713909761e152a5b81094 @@ -1200,7 +1200,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ef30e0352b3d4a29749cd0872c10e45a6649ec52 -R 8a27395e5bbddced71b289eca8fe4771 +P 5ab1023d6cfe31fa8a194804b8216058977ac973 +R 3c45ffccf6b8b9761c0cc8bd190b6e11 U drh -Z c372796d929c11ef7d059e5ca2e0eb4a +Z 88a762d5e9ae73a0ed674e2385a2b544 diff --git a/manifest.uuid b/manifest.uuid index 035ef4eeb2..9939e8087e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5ab1023d6cfe31fa8a194804b8216058977ac973 \ No newline at end of file +6c8924cacc2b875270770fed2cc3b1884f57a655 \ No newline at end of file diff --git a/src/func.c b/src/func.c index a2a5c1858d..5b7056b401 100644 --- a/src/func.c +++ b/src/func.c @@ -585,7 +585,7 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; /* ** Compare two UTF-8 strings for equality where the first string can -** potentially be a "glob" expression. Return true (1) if they +** potentially be a "glob" or "like" expression. Return true (1) if they ** are the same and false (0) if they are different. ** ** Globbing rules: @@ -605,11 +605,18 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; ** "[a-z]" matches any single lower-case letter. To match a '-', make ** it the last character in the list. ** -** This routine is usually quick, but can be N**2 in the worst case. +** Like matching rules: +** +** '%' Matches any sequence of zero or more characters +** +*** '_' Matches any one character +** +** Ec Where E is the "esc" character and c is any other +** character, including '%', '_', and esc, match exactly c. ** -** Hints: to match '*' or '?', put them in "[]". Like this: +** The comments through this routine usually assume glob matching. ** -** abc[*]xyz Matches "abc*xyz" only +** This routine is usually quick, but can be N**2 in the worst case. */ static int patternCompare( const u8 *zPattern, /* The glob pattern */ @@ -617,13 +624,12 @@ static int patternCompare( const struct compareInfo *pInfo, /* Information about how to do the compare */ u32 esc /* The escape character */ ){ - u32 c, c2; - int invert; - int seen; - u32 matchOne = pInfo->matchOne; - u32 matchAll = pInfo->matchAll; - u32 matchOther; - u8 noCase = pInfo->noCase; + u32 c, c2; /* Next pattern and input string chars */ + u32 matchOne = pInfo->matchOne; /* "?" or "_" */ + u32 matchAll = pInfo->matchAll; /* "*" or "%" */ + u32 matchOther; /* "[" or the escape character */ + u8 noCase = pInfo->noCase; /* True if uppercase==lowercase */ + const u8 *zEscaped = 0; /* One past the last escaped input char */ /* The GLOB operator does not have an ESCAPE clause. And LIKE does not ** have the matchSet operator. So we either have to look for one or @@ -633,7 +639,10 @@ static int patternCompare( matchOther = esc ? esc : pInfo->matchSet; while( (c = sqlite3Utf8Read(&zPattern))!=0 ){ - if( c==matchAll ){ + if( c==matchAll ){ /* Match "*" */ + /* Skip over multiple "*" characters in the pattern. If there + ** are also "?" characters, skip those as well, but consume a + ** single character of the input string for each "?" skipped */ while( (c=sqlite3Utf8Read(&zPattern)) == matchAll || c == matchOne ){ if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){ @@ -641,12 +650,14 @@ static int patternCompare( } } if( c==0 ){ - return 1; + return 1; /* "*" at the end of the pattern matches */ }else if( c==matchOther ){ if( esc ){ c = sqlite3Utf8Read(&zPattern); if( c==0 ) return 0; }else{ + /* "[...]" immediately follows the "*". We have to do a slow + ** recursive search in this case, but it is an unusual case. */ assert( matchOther<0x80 ); /* '[' is a single-byte character */ while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){ @@ -655,39 +666,45 @@ static int patternCompare( return *zString!=0; } } - while( (c2 = sqlite3Utf8Read(&zString))!=0 ){ - if( noCase && c<0x80 ){ - GlobUpperToLower(c2); - GlobUpperToLowerAscii(c); - while( c2 != 0 && c2 != c ){ - do{ c2 = *(zString++); }while( c2>0x7f ); - GlobUpperToLowerAscii(c2); - } + + /* At this point variable c contains the first character of the + ** pattern string past the "*". Search in the input string for the + ** first matching character and recursively contine the match from + ** that point. + ** + ** For a case-insensitive search, set variable cx to be the same as + ** c but in the other case and search the input string for either + ** c or cx. + */ + if( c<=0x80 ){ + u32 cx; + if( noCase ){ + cx = sqlite3Toupper(c); + c = sqlite3Tolower(c); }else{ - while( c2 != 0 && c2 != c ){ - c2 = sqlite3Utf8Read(&zString); - } + cx = c; + } + while( (c2 = *(zString++))!=0 ){ + if( c2!=c && c2!=cx ) continue; + if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } - if( c2==0 ) return 0; - if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; - } - return 0; - } - if( c==matchOne ){ - if( sqlite3Utf8Read(&zString)==0 ){ - return 0; }else{ - continue; + while( (c2 = sqlite3Utf8Read(&zString))!=0 ){ + if( c2!=c ) continue; + if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; + } } + return 0; } if( c==matchOther ){ if( esc ){ c = sqlite3Utf8Read(&zPattern); if( c==0 ) return 0; + zEscaped = zPattern; }else{ u32 prior_c = 0; - seen = 0; - invert = 0; + int seen = 0; + int invert = 0; c = sqlite3Utf8Read(&zString); if( c==0 ) return 0; c2 = sqlite3Utf8Read(&zPattern); @@ -720,10 +737,11 @@ static int patternCompare( } c2 = sqlite3Utf8Read(&zString); if( c==c2 ) continue; - if( !noCase ) return 0; - GlobUpperToLower(c); - GlobUpperToLower(c2); - if( c!=c2 ) return 0; + if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){ + continue; + } + if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue; + return 0; } return *zString==0; }