From: dan Date: Sat, 26 May 2012 17:57:02 +0000 (+0000) Subject: Change the format of the tables used by sqlite3FtsUnicodeTolower() to make them a... X-Git-Tag: version-3.7.13~27^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=501c74d3e1e578681d44df7511be9a1878c14884;p=thirdparty%2Fsqlite.git Change the format of the tables used by sqlite3FtsUnicodeTolower() to make them a little smaller. FossilOrigin-Name: b89d3834f6690073fca0fc22c18afa1fb280ea7d --- diff --git a/ext/fts3/fts3_unicode2.c b/ext/fts3/fts3_unicode2.c index fb04656d19..64695e3440 100644 --- a/ext/fts3/fts3_unicode2.c +++ b/ext/fts3/fts3_unicode2.c @@ -163,11 +163,14 @@ int sqlite3FtsUnicodeTolower(int c){ ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. ** - ** If bFlag is clear, then all the codepoints in the range are upper - ** case and require folding. Or, if bFlag is set, then only every second - ** codepoint in the range, starting with iCode, requires folding. If a - ** specific codepoint C does require folding, then the lower-case version - ** is ((C + iOff)&0xFFFF). + ** If the least significant bit in flags is clear, then the rule applies + ** to all nRange codepoints (i.e. all nRange codepoints are upper case and + ** need to be folded). Or, if it is set, then the rule only applies to + ** every second codepoint in the range, starting with codepoint C. + ** + ** The 7 most significant bits in flags are an index into the aiOff[] + ** array. If a specific codepoint C does require folding, then its lower + ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). ** ** The contents of this array are generated by parsing the CaseFolding.txt ** file distributed as part of the "Unicode Character Database". See @@ -175,65 +178,76 @@ int sqlite3FtsUnicodeTolower(int c){ */ static const struct TableEntry { unsigned short iCode; - unsigned char bFlag; + unsigned char flags; unsigned char nRange; - unsigned short iOff; } aEntry[] = { - {65, 0, 26, 32}, {181, 0, 1, 775}, {192, 0, 23, 32}, - {216, 0, 7, 32}, {256, 1, 48, 1}, {306, 1, 6, 1}, - {313, 1, 16, 1}, {330, 1, 46, 1}, {376, 0, 1, 65415}, - {377, 1, 6, 1}, {383, 0, 1, 65268}, {385, 0, 1, 210}, - {386, 1, 4, 1}, {390, 0, 1, 206}, {391, 0, 1, 1}, - {393, 0, 2, 205}, {395, 0, 1, 1}, {398, 0, 1, 79}, - {399, 0, 1, 202}, {400, 0, 1, 203}, {401, 0, 1, 1}, - {403, 0, 1, 205}, {404, 0, 1, 207}, {406, 0, 1, 211}, - {407, 0, 1, 209}, {408, 0, 1, 1}, {412, 0, 1, 211}, - {413, 0, 1, 213}, {415, 0, 1, 214}, {416, 1, 6, 1}, - {422, 0, 1, 218}, {423, 0, 1, 1}, {425, 0, 1, 218}, - {428, 0, 1, 1}, {430, 0, 1, 218}, {431, 0, 1, 1}, - {433, 0, 2, 217}, {435, 1, 4, 1}, {439, 0, 1, 219}, - {440, 0, 1, 1}, {444, 0, 1, 1}, {452, 0, 1, 2}, - {453, 0, 1, 1}, {455, 0, 1, 2}, {456, 0, 1, 1}, - {458, 0, 1, 2}, {459, 1, 18, 1}, {478, 1, 18, 1}, - {497, 0, 1, 2}, {498, 1, 4, 1}, {502, 0, 1, 65439}, - {503, 0, 1, 65480}, {504, 1, 40, 1}, {544, 0, 1, 65406}, - {546, 1, 18, 1}, {570, 0, 1, 10795}, {571, 0, 1, 1}, - {573, 0, 1, 65373}, {574, 0, 1, 10792}, {577, 0, 1, 1}, - {579, 0, 1, 65341}, {580, 0, 1, 69}, {581, 0, 1, 71}, - {582, 1, 10, 1}, {837, 0, 1, 116}, {880, 1, 4, 1}, - {886, 0, 1, 1}, {902, 0, 1, 38}, {904, 0, 3, 37}, - {908, 0, 1, 64}, {910, 0, 2, 63}, {913, 0, 17, 32}, - {931, 0, 9, 32}, {962, 0, 1, 1}, {975, 0, 1, 8}, - {976, 0, 1, 65506}, {977, 0, 1, 65511}, {981, 0, 1, 65521}, - {982, 0, 1, 65514}, {984, 1, 24, 1}, {1008, 0, 1, 65482}, - {1009, 0, 1, 65488}, {1012, 0, 1, 65476}, {1013, 0, 1, 65472}, - {1015, 0, 1, 1}, {1017, 0, 1, 65529}, {1018, 0, 1, 1}, - {1021, 0, 3, 65406}, {1024, 0, 16, 80}, {1040, 0, 32, 32}, - {1120, 1, 34, 1}, {1162, 1, 54, 1}, {1216, 0, 1, 15}, - {1217, 1, 14, 1}, {1232, 1, 88, 1}, {1329, 0, 38, 48}, - {4256, 0, 38, 7264}, {4295, 0, 1, 7264}, {4301, 0, 1, 7264}, - {7680, 1, 150, 1}, {7835, 0, 1, 65478}, {7838, 0, 1, 57921}, - {7840, 1, 96, 1}, {7944, 0, 8, 65528}, {7960, 0, 6, 65528}, - {7976, 0, 8, 65528}, {7992, 0, 8, 65528}, {8008, 0, 6, 65528}, - {8025, 1, 8, 65528}, {8040, 0, 8, 65528}, {8072, 0, 8, 65528}, - {8088, 0, 8, 65528}, {8104, 0, 8, 65528}, {8120, 0, 2, 65528}, - {8122, 0, 2, 65462}, {8124, 0, 1, 65527}, {8126, 0, 1, 58363}, - {8136, 0, 4, 65450}, {8140, 0, 1, 65527}, {8152, 0, 2, 65528}, - {8154, 0, 2, 65436}, {8168, 0, 2, 65528}, {8170, 0, 2, 65424}, - {8172, 0, 1, 65529}, {8184, 0, 2, 65408}, {8186, 0, 2, 65410}, - {8188, 0, 1, 65527}, {8486, 0, 1, 58019}, {8490, 0, 1, 57153}, - {8491, 0, 1, 57274}, {8498, 0, 1, 28}, {8544, 0, 16, 16}, - {8579, 0, 1, 1}, {9398, 0, 26, 26}, {11264, 0, 47, 48}, - {11360, 0, 1, 1}, {11362, 0, 1, 54793}, {11363, 0, 1, 61722}, - {11364, 0, 1, 54809}, {11367, 1, 6, 1}, {11373, 0, 1, 54756}, - {11374, 0, 1, 54787}, {11375, 0, 1, 54753}, {11376, 0, 1, 54754}, - {11378, 0, 1, 1}, {11381, 0, 1, 1}, {11390, 0, 2, 54721}, - {11392, 1, 100, 1}, {11499, 1, 4, 1}, {11506, 0, 1, 1}, - {42560, 1, 46, 1}, {42624, 1, 24, 1}, {42786, 1, 14, 1}, - {42802, 1, 62, 1}, {42873, 1, 4, 1}, {42877, 0, 1, 30204}, - {42878, 1, 10, 1}, {42891, 0, 1, 1}, {42893, 0, 1, 23256}, - {42896, 1, 4, 1}, {42912, 1, 10, 1}, {42922, 0, 1, 23228}, - {65313, 0, 26, 32}, + {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, + {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, + {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, + {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, + {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, + {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, + {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, + {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, + {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, + {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, + {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, + {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, + {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, + {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, + {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, + {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, + {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, + {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, + {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, + {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, + {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, + {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, + {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, + {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, + {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, + {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, + {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, + {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, + {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, + {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, + {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, + {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, + {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, + {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, + {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, + {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, + {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, + {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, + {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, + {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, + {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, + {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, + {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, + {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, + {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, + {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, + {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, + {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, + {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, + {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, + {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, + {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, + {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, + {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, + {65313, 14, 26}, + }; + static const unsigned short aiOff[] = { + 1, 2, 8, 15, 16, 26, 28, 32, + 37, 38, 40, 48, 63, 64, 69, 71, + 79, 80, 116, 202, 203, 205, 206, 207, + 209, 210, 211, 213, 214, 217, 218, 219, + 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, + 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, + 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, + 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, + 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, + 65514, 65521, 65527, 65528, 65529, }; int ret = c; @@ -262,8 +276,8 @@ int sqlite3FtsUnicodeTolower(int c){ if( iRes>=0 ){ const struct TableEntry *p = &aEntry[iRes]; - if( c<(p->iCode + p->nRange) && 0==(p->bFlag & (p->iCode ^ c)) ){ - ret = (c + p->iOff) & 0x0000FFFF; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 7ba7348241..9a7854159b 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -303,11 +303,14 @@ proc tl_print_table_header {} { ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. ** - ** If bFlag is clear, then all the codepoints in the range are upper - ** case and require folding. Or, if bFlag is set, then only every second - ** codepoint in the range, starting with iCode, requires folding. If a - ** specific codepoint C does require folding, then the lower-case version - ** is ((C + iOff)&0xFFFF). + ** If the least significant bit in flags is clear, then the rule applies + ** to all nRange codepoints (i.e. all nRange codepoints are upper case and + ** need to be folded). Or, if it is set, then the rule only applies to + ** every second codepoint in the range, starting with codepoint C. + ** + ** The 7 most significant bits in flags are an index into the aiOff[] + ** array. If a specific codepoint C does require folding, then its lower + ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). ** ** The contents of this array are generated by parsing the CaseFolding.txt ** file distributed as part of the "Unicode Character Database". See @@ -316,13 +319,12 @@ proc tl_print_table_header {} { }] puts " static const struct TableEntry \{" puts " unsigned short iCode;" - puts " unsigned char bFlag;" + puts " unsigned char flags;" puts " unsigned char nRange;" - puts " unsigned short iOff;" puts " \} aEntry\[\] = \{" } -proc tl_print_table_entry {togglevar entry} { +proc tl_print_table_entry {togglevar entry liOff} { upvar $togglevar t foreach {iFirst nIncr nRange nOff} $entry {} @@ -335,7 +337,11 @@ proc tl_print_table_entry {togglevar entry} { if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]} if {$nOff<0} { incr nOff [expr (1<<16)] } - set txt "{$iFirst, $flags, $nRange, $nOff}," + set idx [lsearch $liOff $nOff] + if {$idx<0} {error "malfunction generating aiOff"} + set flags [expr $flags + $idx*2] + + set txt "{$iFirst, $flags, $nRange}," if {$t==2} { puts $txt } else { @@ -361,6 +367,32 @@ proc tl_print_if_entry {entry} { puts " \}" } +proc tl_generate_ioff_table {lRecord} { + foreach entry $lRecord { + foreach {iFirst nIncr nRange iOff} $entry {} + if {$iOff<0} { incr iOff [expr (1<<16)] } + if {[info exists a($iOff)]} continue + set a($iOff) 1 + } + + set liOff [lsort -integer [array names a]] + if {[llength $liOff]>128} { error "Too many distinct ioffs" } + return $liOff +} + +proc tl_print_ioff_table {liOff} { + puts -nonewline " static const unsigned short aiOff\[\] = \{" + set i 0 + foreach off $liOff { + if {($i % 8)==0} {puts "" ; puts -nonewline " "} + puts -nonewline [format "% -7s" "$off,"] + incr i + } + puts "" + puts " \};" + +} + proc print_tolower {zFunc} { set lRecord [tl_create_records] @@ -376,13 +408,17 @@ proc print_tolower {zFunc} { puts "** is less than zero." puts "*/" puts "int ${zFunc}\(int c)\{" + + set liOff [tl_generate_ioff_table $lRecord] tl_print_table_header foreach entry $lRecord { - if {[tl_print_table_entry toggle $entry]} { + if {[tl_print_table_entry toggle $entry $liOff]} { lappend lHigh $entry } } tl_print_table_footer toggle + tl_print_ioff_table $liOff + puts { int ret = c; @@ -410,8 +446,8 @@ proc print_tolower {zFunc} { if( iRes>=0 ){ const struct TableEntry *p = &aEntry[iRes]; - if( c<(p->iCode + p->nRange) && 0==(p->bFlag & (p->iCode ^ c)) ){ - ret = (c + p->iOff) & 0x0000FFFF; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } diff --git a/manifest b/manifest index ff6bb4ea6c..2ab322934e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\scoverage\stests\sfor\sfts3_unicode.c. -D 2012-05-26T16:22:56.288 +C Change\sthe\sformat\sof\sthe\stables\sused\sby\ssqlite3FtsUnicodeTolower()\sto\smake\sthem\sa\slittle\ssmaller. +D 2012-05-26T17:57:02.187 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -71,14 +71,14 @@ F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 F ext/fts3/fts3_unicode.c f487c6ef0ac2cc35aec3c3636c0a35483da6453c -F ext/fts3/fts3_unicode2.c 75fa8f249a5c17d324969e90d7066e7021a90874 +F ext/fts3/fts3_unicode2.c e43024fe05db9316b7825bfce809479fad6ce44e F ext/fts3/fts3_write.c 6a6391d6b01114f885e24e1f66bbc11ffba0e9e2 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl a7214d1705cb57ff56fb828002fa811192a25524 +F ext/fts3/unicode/mkunicode.tcl 2775280062b6595dd18e44ddef6327239ab290ab F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 F ext/icu/icu.c eb9ae1d79046bd7871aa97ee6da51eb770134b5a F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1004,7 +1004,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh a8a0a3babda96dfb1ff51adda3cbbf3dfb7266c2 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P ed28c48a3dd7e766e60db0d96ef5460bf9913e6b -R 176e1514c35e0583f83fa067066b2e60 +P 07d3ea8a3cb179fab6c48934fc6751f53b507d36 +R fbb3511eb94203c7d1dd12033a92a0a8 U dan -Z c30e5b012ccf32e4cc1eaae6ee3b132f +Z d24d4148848cb3328b6f3577a1e7fca3 diff --git a/manifest.uuid b/manifest.uuid index eeb3f2bf91..b13186f9c7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -07d3ea8a3cb179fab6c48934fc6751f53b507d36 \ No newline at end of file +b89d3834f6690073fca0fc22c18afa1fb280ea7d \ No newline at end of file