From 88b3322f272341f4a8145583d6ba55b2d35da211 Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 25 Sep 2014 03:51:37 +0000 Subject: [PATCH] More performance optimization for the LIKE and GLOB operators. FossilOrigin-Name: 5ab1023d6cfe31fa8a194804b8216058977ac973 --- manifest | 12 ++--- manifest.uuid | 2 +- src/func.c | 136 +++++++++++++++++++++++++++----------------------- 3 files changed, 81 insertions(+), 69 deletions(-) diff --git a/manifest b/manifest index de2a2bc27f..37342c612c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthat\smight\sallow\sSQLite\sto\sbuild\sand\swork\susing\sthe\sEBCDIC\scharacter\nset. -D 2014-09-25T02:44:29.974 +C More\sperformance\soptimization\sfor\sthe\sLIKE\sand\sGLOB\soperators. +D 2014-09-25T03:51:37.139 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -184,7 +184,7 @@ F src/delete.c fae81cc2eb14b75267d4f47d3cfc9ae02aae726f F src/expr.c f32119248996680aa73c5c37bfdd42820804dc17 F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb F src/fkey.c da985ae673efef2c712caef825a5d2edb087ead7 -F src/func.c fd49097fdd74eecbc244e5e64fd288a303db20e9 +F src/func.c 610b18afde750686785cdad9196b9fb1b03dc9b3 F src/global.c 5110fa12e09729b84eee0191c984ec4008e21937 F src/hash.c 4263fbc955f26c2e8cdc0cf214bc42435aa4e4f5 F src/hash.h c8f3c31722cf3277d03713909761e152a5b81094 @@ -1200,7 +1200,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b2c89ef49cd19b8031a8149a2dc47cea07dd04e0 -R 594413b3f59b58a61e72cebd73f880a5 +P ef30e0352b3d4a29749cd0872c10e45a6649ec52 +R 8a27395e5bbddced71b289eca8fe4771 U drh -Z 19c5c0df4d171b8ce37674c647553165 +Z c372796d929c11ef7d059e5ca2e0eb4a diff --git a/manifest.uuid b/manifest.uuid index 7b2fbde36d..035ef4eeb2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ef30e0352b3d4a29749cd0872c10e45a6649ec52 \ No newline at end of file +5ab1023d6cfe31fa8a194804b8216058977ac973 \ No newline at end of file diff --git a/src/func.c b/src/func.c index fc908ded36..a2a5c1858d 100644 --- a/src/func.c +++ b/src/func.c @@ -567,10 +567,12 @@ struct compareInfo { ** whereas only characters less than 0x80 do in ASCII. */ #if defined(SQLITE_EBCDIC) -# define sqlite3Utf8Read(A) (*((*A)++)) -# define GlobUpperToLower(A) A = sqlite3UpperToLower[A] +# define sqlite3Utf8Read(A) (*((*A)++)) +# define GlobUpperToLower(A) A = sqlite3UpperToLower[A] +# define GlobUpperToLowerAscii(A) A = sqlite3UpperToLower[A] #else -# define GlobUpperToLower(A) if( !((A)&~0x7f) ){ A = sqlite3UpperToLower[A]; } +# define GlobUpperToLower(A) if( A<=0x7f ){ A = sqlite3UpperToLower[A]; } +# define GlobUpperToLowerAscii(A) A = sqlite3UpperToLower[A] #endif static const struct compareInfo globInfo = { '*', '?', '[', 0 }; @@ -618,10 +620,17 @@ static int patternCompare( u32 c, c2; int invert; int seen; - u8 matchOne = pInfo->matchOne; - u8 matchAll = pInfo->matchAll; - u8 matchSet = pInfo->matchSet; - u8 noCase = pInfo->noCase; + u32 matchOne = pInfo->matchOne; + u32 matchAll = pInfo->matchAll; + u32 matchOther; + u8 noCase = pInfo->noCase; + + /* The GLOB operator does not have an ESCAPE clause. And LIKE does not + ** have the matchSet operator. So we either have to look for one or + ** the other, never both. Hence the single variable matchOther is used + ** to store the one we have to look for. + */ + matchOther = esc ? esc : pInfo->matchSet; while( (c = sqlite3Utf8Read(&zPattern))!=0 ){ if( c==matchAll ){ @@ -633,26 +642,26 @@ static int patternCompare( } if( c==0 ){ return 1; - }else if( c==esc ){ - c = sqlite3Utf8Read(&zPattern); - if( c==0 ){ - return 0; - } - }else if( c==matchSet ){ - assert( esc==0 ); /* This is GLOB, not LIKE */ - assert( matchSet<0x80 ); /* '[' is a single-byte character */ - while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){ - SQLITE_SKIP_UTF8(zString); + }else if( c==matchOther ){ + if( esc ){ + c = sqlite3Utf8Read(&zPattern); + if( c==0 ) return 0; + }else{ + assert( matchOther<0x80 ); /* '[' is a single-byte character */ + while( *zString + && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){ + SQLITE_SKIP_UTF8(zString); + } + return *zString!=0; } - return *zString!=0; } while( (c2 = sqlite3Utf8Read(&zString))!=0 ){ - if( noCase ){ + if( noCase && c<0x80 ){ GlobUpperToLower(c2); - GlobUpperToLower(c); + GlobUpperToLowerAscii(c); while( c2 != 0 && c2 != c ){ - c2 = sqlite3Utf8Read(&zString); - GlobUpperToLower(c2); + do{ c2 = *(zString++); }while( c2>0x7f ); + GlobUpperToLowerAscii(c2); } }else{ while( c2 != 0 && c2 != c ){ @@ -663,55 +672,58 @@ static int patternCompare( if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } return 0; - }else if( c==matchOne ){ + } + if( c==matchOne ){ if( sqlite3Utf8Read(&zString)==0 ){ return 0; + }else{ + continue; } - }else if( c==matchSet ){ - u32 prior_c = 0; - assert( esc==0 ); /* This only occurs for GLOB, not LIKE */ - seen = 0; - invert = 0; - c = sqlite3Utf8Read(&zString); - if( c==0 ) return 0; - c2 = sqlite3Utf8Read(&zPattern); - if( c2=='^' ){ - invert = 1; - c2 = sqlite3Utf8Read(&zPattern); - } - if( c2==']' ){ - if( c==']' ) seen = 1; + } + if( c==matchOther ){ + if( esc ){ + c = sqlite3Utf8Read(&zPattern); + if( c==0 ) return 0; + }else{ + u32 prior_c = 0; + seen = 0; + invert = 0; + c = sqlite3Utf8Read(&zString); + if( c==0 ) return 0; c2 = sqlite3Utf8Read(&zPattern); - } - while( c2 && c2!=']' ){ - if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){ + if( c2=='^' ){ + invert = 1; c2 = sqlite3Utf8Read(&zPattern); - if( c>=prior_c && c<=c2 ) seen = 1; - prior_c = 0; - }else{ - if( c==c2 ){ - seen = 1; + } + if( c2==']' ){ + if( c==']' ) seen = 1; + c2 = sqlite3Utf8Read(&zPattern); + } + while( c2 && c2!=']' ){ + if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){ + c2 = sqlite3Utf8Read(&zPattern); + if( c>=prior_c && c<=c2 ) seen = 1; + prior_c = 0; + }else{ + if( c==c2 ){ + seen = 1; + } + prior_c = c2; } - prior_c = c2; + c2 = sqlite3Utf8Read(&zPattern); } - c2 = sqlite3Utf8Read(&zPattern); - } - if( c2==0 || (seen ^ invert)==0 ){ - return 0; - } - }else{ - c2 = sqlite3Utf8Read(&zString); - if( c==esc ){ - c = sqlite3Utf8Read(&zPattern); - } - if( noCase ){ - GlobUpperToLower(c); - GlobUpperToLower(c2); - } - if( c!=c2 ){ - return 0; + if( c2==0 || (seen ^ invert)==0 ){ + return 0; + } + continue; } } + c2 = sqlite3Utf8Read(&zString); + if( c==c2 ) continue; + if( !noCase ) return 0; + GlobUpperToLower(c); + GlobUpperToLower(c2); + if( c!=c2 ) return 0; } return *zString==0; } -- 2.47.2