From: dan Date: Fri, 25 May 2012 19:50:12 +0000 (+0000) Subject: Add special fast paths to sqlite3FtsUnicodeTolower() and Isalnum() for codepoints... X-Git-Tag: version-3.7.13~27^2~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1c7016c9a5de2f264c45f7fde69083dcf509ec77;p=thirdparty%2Fsqlite.git Add special fast paths to sqlite3FtsUnicodeTolower() and Isalnum() for codepoints in the ASCII range. FossilOrigin-Name: cf7b25d47687635a04f4347d45f135c686b9d758 --- diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c index 2c3e4580e3..cbdc1099b3 100644 --- a/ext/fts3/fts3_unicode.c +++ b/ext/fts3/fts3_unicode.c @@ -201,6 +201,7 @@ static int unicodeNext( if( !zNew ) return SQLITE_NOMEM; zOut = &zNew[zOut - pCsr->zToken]; pCsr->zToken = zNew; + pCsr->nAlloc += 64; } /* Write the folded case of the last character read to the output */ diff --git a/ext/fts3/fts3_unicode2.c b/ext/fts3/fts3_unicode2.c index 51d60549b7..fb04656d19 100644 --- a/ext/fts3/fts3_unicode2.c +++ b/ext/fts3/fts3_unicode2.c @@ -121,8 +121,13 @@ int sqlite3FtsUnicodeIsalnum(int c){ 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, 0x43FFF401, }; + static const unsigned int aAscii[4] = { + 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, + }; - if( c<(1<<22) ){ + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; @@ -236,7 +241,9 @@ int sqlite3FtsUnicodeTolower(int c){ assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); - if( c<65536 ){ + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 7e93a531b2..7ba7348241 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -105,6 +105,27 @@ proc an_print_range_array {lRange} { puts " \};" } +proc an_print_ascii_bitmap {lRange} { + foreach range $lRange { + foreach {iFirst nRange} $range {} + for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { + if {$i<=127} { set a($i) 1 } + } + } + + set aAscii [list 0 0 0 0] + foreach key [array names a] { + set idx [expr $key >> 5] + lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))] + } + + puts " static const unsigned int aAscii\[4\] = \{" + puts -nonewline " " + foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] } + puts "" + puts " \};" +} + proc print_isalnum {zFunc lRange} { puts "/*" puts "** Return true if the argument corresponds to a unicode codepoint" @@ -115,8 +136,11 @@ proc print_isalnum {zFunc lRange} { puts "*/" puts "int ${zFunc}\(int c)\{" an_print_range_array $lRange + an_print_ascii_bitmap $lRange puts { - if( c<(1<<22) ){ + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; @@ -365,7 +389,9 @@ proc print_tolower {zFunc} { assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); - if( c<65536 ){ + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; diff --git a/manifest b/manifest index c55da8cba4..575fce7aa7 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\scomments\sin\sgenerated\sfile\sfts3_unicode2.c. -D 2012-05-25T18:48:48.456 +C Add\sspecial\sfast\spaths\sto\ssqlite3FtsUnicodeTolower()\sand\sIsalnum()\sfor\scodepoints\sin\sthe\sASCII\srange. +D 2012-05-25T19:50:12.903 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -70,15 +70,15 @@ F ext/fts3/fts3_test.c 348f7d08cae05285794e23dc4fe8b8fdf66e264a F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 -F ext/fts3/fts3_unicode.c 033ee5d10d1a69613890d892829e6d3cf7177e40 -F ext/fts3/fts3_unicode2.c 6989db92aff500ae9795c1b16720ff5a17bfbf0f +F ext/fts3/fts3_unicode.c f487c6ef0ac2cc35aec3c3636c0a35483da6453c +F ext/fts3/fts3_unicode2.c 75fa8f249a5c17d324969e90d7066e7021a90874 F ext/fts3/fts3_write.c cd4af00b3b0512b4d76177a267fcaafab44cbce4 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl 3ff244e41222fa5e43c60739c501131a2395b310 +F ext/fts3/unicode/mkunicode.tcl a7214d1705cb57ff56fb828002fa811192a25524 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 F ext/icu/icu.c eb9ae1d79046bd7871aa97ee6da51eb770134b5a F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1004,7 +1004,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh a8a0a3babda96dfb1ff51adda3cbbf3dfb7266c2 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P 0c13570ec78c6887103dc99b81b470829fa28385 -R 9b33e64eb6dd16074aa14c4bbd40c221 +P 3dc567ef4702d9a63d78d11ff705cb7f7359f7a6 +R a4a319ece993f7d02e2811bac8d8bd2b U dan -Z a6323715198787ea389cc785d187a551 +Z e7946b1fde61e6ce4870009a94d8c133 diff --git a/manifest.uuid b/manifest.uuid index 327ad0f2db..35c2e08768 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3dc567ef4702d9a63d78d11ff705cb7f7359f7a6 \ No newline at end of file +cf7b25d47687635a04f4347d45f135c686b9d758 \ No newline at end of file