From: dan Date: Fri, 2 Mar 2012 11:48:50 +0000 (+0000) Subject: Fix a bug in merging FTS language tables for languages other than language 0. X-Git-Tag: version-3.7.11~17^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e3ddd1ac42834a0189fa359a32755c2e9b733224;p=thirdparty%2Fsqlite.git Fix a bug in merging FTS language tables for languages other than language 0. FossilOrigin-Name: d281cb8984c911a4c0cce2ec299e1351d8e580e4 --- diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index ef2436531c..b158cc4510 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -488,7 +488,7 @@ int sqlite3Fts3AllSegdirs( /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL); + sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); } } *ppStmt = pStmt; diff --git a/manifest b/manifest index c4b93f56a0..e051362a62 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\s"languageid="\soption\sto\sfts4.\sThis\scode\sis\sstill\slargely\suntested\sand\salsmost\scertainly\sbuggy. -D 2012-03-01T19:44:20.362 +C Fix\sa\sbug\sin\smerging\sFTS\slanguage\stables\sfor\slanguages\sother\sthan\slanguage\s0. +D 2012-03-02T11:48:50.564 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -78,7 +78,7 @@ F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894 F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68 -F ext/fts3/fts3_write.c 489d262b1ee9ab1dbb4da48bd8737fac15d0f58f +F ext/fts3/fts3_write.c 36fc2e3a28f51ee135a344877c1e4be0a9f45e6e F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -496,7 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f -F test/fts4langid.test 3d968b7c0afb8be1609794267f34b89d378a81ea +F test/fts4langid.test 7ab7be619d3acb3727e4bef3230ba3dbcf2e0556 F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a @@ -992,10 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P 16330a2f7262173a32ae48a72c0ee2522b6dc554 -R 86036df8ba11902f17395620671e5794 -T *branch * fts4-languageid -T *sym-fts4-languageid * -T -sym-trunk * +P bea257f70f10dd1111d79cabd1e1462dc651704d +R a3a9247d2c76c9d90f9fc486f3311f0d U dan -Z 6902c01b6e8a000d5e06f8fe8778490f +Z f1e998b56e58f712fe6da1411961b8ef diff --git a/manifest.uuid b/manifest.uuid index d2201717b8..ce8a1c3b82 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bea257f70f10dd1111d79cabd1e1462dc651704d \ No newline at end of file +d281cb8984c911a4c0cce2ec299e1351d8e580e4 \ No newline at end of file diff --git a/test/fts4langid.test b/test/fts4langid.test index a65ccedac3..51c42ddb54 100644 --- a/test/fts4langid.test +++ b/test/fts4langid.test @@ -24,6 +24,27 @@ ifcapable !fts3 { set ::testprefix fts4langid +#--------------------------------------------------------------------------- +# Test plan: +# +# 1.* - Warm-body tests created for specific purposes during development. +# Passing these doesn't really prove much. +# +# 2.* - Test that FTS queries only ever return rows associated with +# the requested language. +# +# 3.* - Test that the 'optimize' and 'rebuild' commands work correctly. +# +# 4.* - Test that if one is provided, the tokenizer xLanguage method +# is called to configure the tokenizer before tokenizing query +# or document text. +# +# 5.* - Test the fts4aux table when the associated FTS4 table contains +# multiple languages. +# +# 6.* - Tests with content= tables. Both where there is a real +# underlying content table and where there is not. +# do_execsql_test 1.1 { @@ -74,5 +95,165 @@ do_catchsql_test 1.17 { INSERT INTO t1(content, lang_id) VALUES('123', -1); } {1 {constraint failed}} +do_execsql_test 1.18 { + DROP TABLE t1; + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); + INSERT INTO t1(content, lang_id) VALUES('A', 13); + INSERT INTO t1(content, lang_id) VALUES('B', 13); + INSERT INTO t1(content, lang_id) VALUES('C', 13); + INSERT INTO t1(content, lang_id) VALUES('D', 13); + INSERT INTO t1(content, lang_id) VALUES('E', 13); + INSERT INTO t1(content, lang_id) VALUES('F', 13); + INSERT INTO t1(content, lang_id) VALUES('G', 13); + INSERT INTO t1(content, lang_id) VALUES('H', 13); + INSERT INTO t1(content, lang_id) VALUES('I', 13); + INSERT INTO t1(content, lang_id) VALUES('J', 13); + INSERT INTO t1(content, lang_id) VALUES('K', 13); + INSERT INTO t1(content, lang_id) VALUES('L', 13); + INSERT INTO t1(content, lang_id) VALUES('M', 13); + INSERT INTO t1(content, lang_id) VALUES('N', 13); + INSERT INTO t1(content, lang_id) VALUES('O', 13); + INSERT INTO t1(content, lang_id) VALUES('P', 13); + INSERT INTO t1(content, lang_id) VALUES('Q', 13); + INSERT INTO t1(content, lang_id) VALUES('R', 13); + INSERT INTO t1(content, lang_id) VALUES('S', 13); + SELECT rowid FROM t1 WHERE t1 MATCH 'A'; +} {} + + +#------------------------------------------------------------------------- +# Test cases 2.* +# + +proc build_multilingual_db_1 {db} { + $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } + + set xwords [list zero one two three four five six seven eight nine ten] + set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] + + for {set i 0} {$i < 1000} {incr i} { + set iLangid [expr $i%9] + set x "" + set y "" + + set x [list] + lappend x [lindex $xwords [expr ($i / 1000) % 10]] + lappend x [lindex $xwords [expr ($i / 100) % 10]] + lappend x [lindex $xwords [expr ($i / 10) % 10]] + lappend x [lindex $xwords [expr ($i / 1) % 10]] + + set y [list] + lappend y [lindex $ywords [expr ($i / 1000) % 10]] + lappend y [lindex $ywords [expr ($i / 100) % 10]] + lappend y [lindex $ywords [expr ($i / 10) % 10]] + lappend y [lindex $ywords [expr ($i / 1) % 10]] + + $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } + } +} + +proc rowid_list_set_langid {langid} { + set ::rowid_list_langid $langid +} +proc rowid_list {pattern} { + set langid $::rowid_list_langid + set res [list] + db eval {SELECT docid, x, y FROM t2 WHERE l = $langid ORDER BY docid ASC} { + if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { + lappend res $docid + } + } + return $res +} + +proc or_merge_list {list1 list2} { + set res [list] + + set i1 0 + set i2 0 + + set n1 [llength $list1] + set n2 [llength $list2] + + while {$i1 < $n1 && $i2 < $n2} { + set e1 [lindex $list1 $i1] + set e2 [lindex $list2 $i2] + + if {$e1==$e2} { + lappend res $e1 + incr i1 + incr i2 + } elseif {$e1 < $e2} { + lappend res $e1 + incr i1 + } else { + lappend res $e2 + incr i2 + } + } + + concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] +} + +proc or_merge_lists {args} { + set res [lindex $args 0] + for {set i 1} {$i < [llength $args]} {incr i} { + set res [or_merge_list $res [lindex $args $i]] + } + set res +} + +proc and_merge_list {list1 list2} { + foreach i $list2 { set a($i) 1 } + set res [list] + foreach i $list1 { + if {[info exists a($i)]} {lappend res $i} + } + set res +} + + +proc and_merge_lists {args} { + set res [lindex $args 0] + for {set i 1} {$i < [llength $args]} {incr i} { + set res [and_merge_list $res [lindex $args $i]] + } + set res +} + +proc filter_list {list langid} { + set res [list] + foreach i $list { + if {($i % 9) == $langid} {lappend res $i} + } + set res +} + +do_test 2.0 { + reset_db + build_multilingual_db_1 db +} {} + +proc do_test_2.1 {tn query res_script} { + for {set langid 0} {$langid < 10} {incr langid} { + rowid_list_set_langid $langid + set res [eval $res_script] + + set actual [ + execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} + ] + do_test 2.1.$tn.$langid [list set {} $actual] $res + } +} + +do_test_2.1 1 {delta} { rowid_list delta } +do_test_2.1 2 {"zero one two"} { rowid_list "zero one two" } +do_test_2.1 3 {zero one two} { + and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] +} +do_test_2.1 4 {"zero one" OR "one two"} { + or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] +} + finish_test