From: dan Date: Fri, 8 Jan 2016 17:21:18 +0000 (+0000) Subject: Begin adding fts5 tests involving synonyms and detail=none/col tables. X-Git-Tag: version-3.11.0~157^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4f40cc6f3dc8f2dca8d7020238b22786a3a541b9;p=thirdparty%2Fsqlite.git Begin adding fts5 tests involving synonyms and detail=none/col tables. FossilOrigin-Name: b3e6f15ec2d9a834e2c80b91ffd7097553816228 --- diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index aadf148716..2c7fedcefe 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -202,15 +202,24 @@ proc fts5_rnddoc {n} { # -near N (NEAR distance. Default 10) # -col C (List of column indexes to match against) # -pc VARNAME (variable in caller frame to use for phrase numbering) +# -dict VARNAME (array in caller frame to use for synonyms) # proc nearset {aCol args} { + + # Process the command line options. + # set O(-near) 10 set O(-col) {} set O(-pc) "" + set O(-dict) "" set nOpt [lsearch -exact $args --] if {$nOpt<0} { error "no -- option" } + # Set $lPhrase to be a list of phrases. $nPhrase its length. + set lPhrase [lrange $args [expr $nOpt+1] end] + set nPhrase [llength $lPhrase] + foreach {k v} [lrange $args 0 [expr $nOpt-1]] { if {[info exists O($k)]==0} { error "unrecognized option $k" } set O($k) $v @@ -222,9 +231,7 @@ proc nearset {aCol args} { upvar $O(-pc) counter } - # Set $phraselist to be a list of phrases. $nPhrase its length. - set phraselist [lrange $args [expr $nOpt+1] end] - set nPhrase [llength $phraselist] + if {$O(-dict)!=""} { upvar $O(-dict) aDict } for {set j 0} {$j < [llength $aCol]} {incr j} { for {set i 0} {$i < $nPhrase} {incr i} { @@ -232,41 +239,54 @@ proc nearset {aCol args} { } } - set iCol -1 - foreach col $aCol { - incr iCol - if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue - set nToken [llength $col] + # Loop through each column of the current row. + for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { - set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] - for { } {$iFL < $nToken} {incr iFL} { - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set B($iPhrase) [list] - } + # If there is a column filter, test whether this column is excluded. If + # so, skip to the next iteration of this loop. Otherwise, set zCol to the + # column value and nToken to the number of tokens that comprise it. + if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue + set zCol [lindex $aCol $iCol] + set nToken [llength $zCol] + + # Each iteration of the following loop searches a substring of the + # column value for phrase matches. The last token of the substring + # is token $iLast of the column value. The first token is: + # + # iFirst = ($iLast - $O(-near) - 1) + # + # where $sz is the length of the phrase being searched for. A phrase + # counts as matching the substring if its first token lies on or before + # $iLast and its last token on or after $iFirst. + # + # For example, if the query is "NEAR(a+b c, 2)" and the column value: + # + # "x x x x A B x x C x" + # 0 1 2 3 4 5 6 7 8 9" + # + # when (iLast==8 && iFirst=5) the range will contain both phrases and + # so both instances can be added to the output poslists. + # + set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] + for { } {$iLast < $nToken} {incr iLast} { + + catch { array unset B } for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set p [lindex $phraselist $iPhrase] + set p [lindex $lPhrase $iPhrase] set nPm1 [expr {[llength $p] - 1}] - set iFirst [expr $iFL - $O(-near) - [llength $p]] - - for {set i $iFirst} {$i <= $iFL} {incr i} { - set lCand [lrange $col $i [expr $i+$nPm1]] + set iFirst [expr $iLast - $O(-near) - [llength $p]] + for {set i $iFirst} {$i <= $iLast} {incr i} { + set lCand [lrange $zCol $i [expr $i+$nPm1]] set bMatch 1 foreach tok $p term $lCand { - if {[string match $tok $term]==0} { - #puts "$tok $term failed" - set bMatch 0 - } + if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break } } - if {$bMatch} { - #puts "match at $i" - lappend B($iPhrase) $i - } - - #if {$lCand == $p} { lappend B($iPhrase) $i } + if {$bMatch} { lappend B($iPhrase) $i } } - if {[llength $B($iPhrase)] == 0} break + + if {![info exists B($iPhrase)]} break } if {$iPhrase==$nPhrase} { @@ -294,6 +314,18 @@ proc nearset {aCol args} { sort_poslist $res } +proc nearset_match {aDictVar tok term} { + if {[string match $tok $term]} { return 1 } + + upvar $aDictVar aDict + if {[info exists aDict($tok)]} { + foreach s $aDict($tok) { + if {[string match $s $term]} { return 1 } + } + } + return 0; +} + #------------------------------------------------------------------------- # Usage: # @@ -405,7 +437,6 @@ proc fts5_poslist2collist {poslist} { } # Comparison function used by fts5_poslist2collist to sort collist entries. -# proc fts5_collist_elem_compare {a b} { foreach {a1 a2} [split $a .] {} foreach {b1 b2} [split $b .] {} @@ -426,17 +457,23 @@ proc fts5_collist_elem_compare {a b} { # FROM $tbl('$expr') # ORDER BY rowid $order; # -proc fts5_query_data {expr tbl {order ASC}} { +proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} { # Figure out the set of columns in the FTS5 table. This routine does # not handle tables with UNINDEXED columns, but if it did, it would # have to be here. db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } + set d "" + if {$aDictVar != ""} { + upvar $aDictVar aDict + set d aDict + } + set cols "" foreach e $lCols { append cols ", '$e'" } set tclexpr [db one [subst -novar { - SELECT fts5_expr_tcl( $expr, 'nearset $cols -pc ::pc' [set cols] ) + SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] ) }]] set res [list] @@ -457,9 +494,17 @@ proc fts5_query_data {expr tbl {order ASC}} { #------------------------------------------------------------------------- # Similar to [fts5_query_data], but omit the collist field. # -proc fts5_poslist_data {expr tbl {order ASC}} { +proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} { set res [list] - foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order] { + + if {$aDictVar!=""} { + upvar $aDictVar aDict + set dict aDict + } else { + set dict "" + } + + foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] { lappend res $rowid $poslist } set res @@ -467,22 +512,15 @@ proc fts5_poslist_data {expr tbl {order ASC}} { #------------------------------------------------------------------------- # -proc nearset_rf {aCol args} { - set idx [lsearch -exact $args --] - if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} { - set ::expr_not_ok 1 - } - list -} - -proc nearset_rc {aCol args} { - nearset_rf $aCol {*}$args - if {[lsearch $args -col]>=0} { - set ::expr_not_ok 1 - } - list -} +# This command will only work inside a [foreach_detail_mode] block. It tests +# whether or not expression $expr run on FTS5 table $tbl is supported by +# the current mode. If so, 1 is returned. If not, 0. +# +# detail=full (all queries supported) +# detail=col (all but phrase queries and NEAR queries) +# detail=none (all but phrase queries, NEAR queries, and column filters) +# proc fts5_expr_ok {expr tbl} { if {![detail_is_full]} { @@ -505,3 +543,21 @@ proc fts5_expr_ok {expr tbl} { return 1 } +# Helper for [fts5_expr_ok] +proc nearset_rf {aCol args} { + set idx [lsearch -exact $args --] + if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} { + set ::expr_not_ok 1 + } + list +} + +# Helper for [fts5_expr_ok] +proc nearset_rc {aCol args} { + nearset_rf $aCol {*}$args + if {[lsearch $args -col]>=0} { + set ::expr_not_ok 1 + } + list +} + diff --git a/ext/fts5/test/fts5synonym2.test b/ext/fts5/test/fts5synonym2.test new file mode 100644 index 0000000000..e3a8003758 --- /dev/null +++ b/ext/fts5/test/fts5synonym2.test @@ -0,0 +1,139 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on custom tokenizers that support synonyms. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5synonym + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +#------------------------------------------------------------------------- +# Code for a simple Tcl tokenizer that supports synonyms at query time. +# +foreach SYNDICT { + {zero 0} + {one 1 i} + {two 2 ii} + {three 3 iii} + {four 4 iv} + {five 5 v} + {six 6 vi} + {seven 7 vii} + {eight 8 viii} + {nine 9 ix} +} { + foreach s $SYNDICT { + set o [list] + foreach x $SYNDICT {if {$x!=$s} {lappend o $x}} + set ::syn($s) $o + } +} + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags == "query"} { + foreach s $::SYNDICT($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } + } + } +} + +proc tcl_create {args} { + return "tcl_tokenize" +} + +# +# End of tokenizer code. +#------------------------------------------------------------------------- + +foreach_detail_mode $testprefix { + +sqlite3_fts5_create_tokenizer db tcl tcl_create +fts5_aux_test_functions db + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize=tcl, detail=%DETAIL%); + + INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i'); + INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi'); + INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3'); + INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7'); + INSERT INTO ss VALUES('1 5 4 eight ii iv iii', 'nine 2 eight ix v vii'); + INSERT INTO ss VALUES('one 7 seven six 2 two', '1 2 four 7 4 3 4'); + INSERT INTO ss VALUES('eight iv 4 nine vii six 1', '5 6 v one zero 4'); + INSERT INTO ss VALUES('v 9 8 iii 4', '9 4 seven two vi vii'); + INSERT INTO ss VALUES('3 ix two 9 0 nine i', 'five ii nine two viii i five'); + INSERT INTO ss VALUES('six iii 9 two eight 2', 'nine i nine vii nine'); + INSERT INTO ss VALUES('6 three zero seven vii five', '8 vii ix 0 7 seven'); + INSERT INTO ss VALUES('8 vii 8 7 3 4', 'eight iii four viii nine iv three'); + INSERT INTO ss VALUES('4 v 7 two 0 one 8', 'vii 1 two five i zero 9'); + INSERT INTO ss VALUES('3 ii vii vi eight', '8 4 ix one three eight'); + INSERT INTO ss VALUES('iv eight seven 6 9 seven', 'one vi two five seven'); + INSERT INTO ss VALUES('i i 5 i v vii eight', '2 seven i 2 2 four'); + INSERT INTO ss VALUES('0 i iii nine 3 ix five', '0 eight iv 0 six 2'); + INSERT INTO ss VALUES('iv vii three 3 9 one 8', '2 ii 6 eight ii six six'); + INSERT INTO ss VALUES('eight one two nine six', '8 9 3 viii vi'); + INSERT INTO ss VALUES('one 0 four ii eight one 3', 'iii eight vi vi vi'); + INSERT INTO ss VALUES('4 0 eight 0 0', '1 four one vii seven ii'); + INSERT INTO ss VALUES('1 zero nine 2 2', 'viii iv two vi nine v iii'); + INSERT INTO ss VALUES('5 five viii four four vi', '8 five 7 vii 6 4'); + INSERT INTO ss VALUES('7 ix four 8 vii', 'nine three nine ii ix vii'); + INSERT INTO ss VALUES('nine iv v i 0 v', 'two iv vii six i ix 4'); + INSERT INTO ss VALUES('one v v one viii 3 8', '2 1 3 five iii'); + INSERT INTO ss VALUES('six ii 5 nine 4 viii seven', 'eight i ix ix 7 four'); + INSERT INTO ss VALUES('9 ii two seven three 7 0', 'six viii seven 7 five'); + INSERT INTO ss VALUES('five two 4 viii nine', '9 7 nine zero 1 two one'); + INSERT INTO ss VALUES('viii 8 iii i ii 8 3', '4 2 7 v 8 8'); + INSERT INTO ss VALUES('four vii 4 iii zero 0 vii', '3 viii iii zero 9 i'); + INSERT INTO ss VALUES('0 seven v five i five v', 'one 4 2 ix 9'); + INSERT INTO ss VALUES('two 5 two two ix 4 1', '3 nine ii v nine 3 five'); + INSERT INTO ss VALUES('five 5 7 4 6 vii', 'three 2 ix 2 8 6'); + INSERT INTO ss VALUES('six iii vi iv seven eight', '8 six 7 0 4'); + INSERT INTO ss VALUES('vi vi iv 3 0 one one', '9 6 eight ix iv'); + INSERT INTO ss VALUES('7 2 2 iii 0', '0 0 seven 1 nine'); + INSERT INTO ss VALUES('8 6 iv six ii', 'iv 6 3 4 ii five'); + INSERT INTO ss VALUES('0 two two seven ii', 'vii ix four 4 zero vi vi'); + INSERT INTO ss VALUES('2 one eight 8 9 7', 'vi 3 0 3 vii'); + INSERT INTO ss VALUES('iii ii ix iv three', 'vi i 6 1 two'); + INSERT INTO ss VALUES('eight four nine 8 seven', 'one three i nine iii one'); + INSERT INTO ss VALUES('iii seven five ix 8', 'ii 7 seven 0 four ii'); + INSERT INTO ss VALUES('four 0 1 5 two', 'iii 9 5 ii ii 2 4'); + INSERT INTO ss VALUES('iii nine four vi 8 five six', 'i i ii seven vi vii'); + INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii'); + INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4'); + INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii'); + INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one'); +} + +foreach {tn expr} { + 1 "eight" +} { + if {[fts5_expr_ok $expr ss]==0} { + do_test 1.$tn.OMITTED { list } [list] + continue + } + + set res [fts5_query_data $expr ss ASC ::SYNDICT] + do_execsql_test 1.$tn.[llength $res].asc { + SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr) + } $res +} + +} + +finish_test + diff --git a/manifest b/manifest index 459ee65528..48daf058c3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sfts5vocab.test\sso\sthat\sit\sworks\swith\sdetail=none\stables. -D 2016-01-08T07:53:09.125 +C Begin\sadding\sfts5\stests\sinvolving\ssynonyms\sand\sdetail=none/col\stables. +D 2016-01-08T17:21:18.901 F Makefile.in 7c8cc4c2f0179efc6fa9492141d1fb65f4807054 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc e45d8b9b56dfa3f2cd860b2c28bd9d304513b042 @@ -114,7 +114,7 @@ F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1 F ext/fts5/fts5_vocab.c da64ecbd217625980a1721fbd588a1e4118a51b6 F ext/fts5/fts5parse.y 1647eba089b9b3fc058b4dc989d9da87d15b9580 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl c9169fe40bf751e1b311271df31aec15732b26c0 +F ext/fts5/test/fts5_common.tcl 393882afb225a21edf033043bbf936951e9198c1 F ext/fts5/test/fts5aa.test 7e814df4a0e6c22a6fe2d84f210fdc0b5068a084 F ext/fts5/test/fts5ab.test 30325a89453280160106be411bba3acf138e6d1b F ext/fts5/test/fts5ac.test d5073ca7bd2d9fe8aab0c82c6c75a7e4b0d70ced @@ -173,6 +173,7 @@ F ext/fts5/test/fts5rowid.test 400384798349d658eaf06aefa1e364957d5d4821 F ext/fts5/test/fts5simple.test 2bc6451cbe887a9215f5b14ae307c70d850344c9 F ext/fts5/test/fts5simple2.test 843f1f7fe439ff32bf74f4fd6430632f9636ef3a F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671 +F ext/fts5/test/fts5synonym2.test 6aa842d0e5bd019db0c3597e0860eb68eb2867e4 F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89 F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841 F ext/fts5/test/fts5unicode2.test c1dd890ba32b7609adba78e420faa847abe43b59 @@ -1409,7 +1410,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P eedd095dc1c81ce45df00093ba237dd7b3cdff3d -R bd0f99ecb522e0859c1abeb3e9044459 +P d9135cc723fc4227aace6dcf4ffa4630c9d23aa0 +R c443735bd0d019a3a4798f3f3350cb78 U dan -Z 0717ee91389ddd999cbabbcc60656596 +Z 48c62f9fa751846d5897bddd7cc5158f diff --git a/manifest.uuid b/manifest.uuid index 042f8c6f9c..dc0f491a48 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d9135cc723fc4227aace6dcf4ffa4630c9d23aa0 \ No newline at end of file +b3e6f15ec2d9a834e2c80b91ffd7097553816228 \ No newline at end of file