){
Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
Fts5Index *pIdx = pCtx->pStorage->pIndex;
- if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
pCtx->szCol++;
}
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
int iEnd /* End offset of token */
){
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
- if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
pCtx->szCol++;
}
pCtx->cksum ^= sqlite3Fts5IndexCksum(
typedef struct F5tTokenizerContext F5tTokenizerContext;
typedef struct F5tTokenizerCb F5tTokenizerCb;
typedef struct F5tTokenizerModule F5tTokenizerModule;
-typedef struct F5tTokenizerModule F5tTokenizerInstance;
+typedef struct F5tTokenizerInstance F5tTokenizerInstance;
struct F5tTokenizerContext {
void *pCtx;
F5tTokenizerContext *pContext;
};
+struct F5tTokenizerInstance {
+ Tcl_Interp *interp;
+ Tcl_Obj *pScript;
+ F5tTokenizerContext *pContext;
+};
+
static int f5tTokenizerCreate(
void *pCtx,
const char **azArg,
int (*xOldToken)(void*, int, const char*, int, int, int);
Tcl_Obj *pEval;
int rc;
+ const char *zFlags;
pOldCtx = pInst->pContext->pCtx;
xOldToken = pInst->pContext->xToken;
+ pInst->pContext->pCtx = pCtx;
+ pInst->pContext->xToken = xToken;
+
+ assert(
+ flags==FTS5_TOKENIZE_DOCUMENT
+ || flags==FTS5_TOKENIZE_AUX
+ || flags==FTS5_TOKENIZE_QUERY
+ || flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)
+ );
pEval = Tcl_DuplicateObj(pInst->pScript);
Tcl_IncrRefCount(pEval);
- rc = Tcl_ListObjAppendElement(
- pInst->interp, pEval, Tcl_NewStringObj(pText, nText)
- );
- if( rc==TCL_OK ){
- rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
+ switch( flags ){
+ case FTS5_TOKENIZE_DOCUMENT:
+ zFlags = "document";
+ break;
+ case FTS5_TOKENIZE_AUX:
+ zFlags = "aux";
+ break;
+ case FTS5_TOKENIZE_QUERY:
+ zFlags = "query";
+ break;
+ case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY):
+ zFlags = "prefixquery";
+ break;
+ default:
+ assert( 0 );
+ zFlags = "invalid";
+ break;
}
+
+ Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1));
+ Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText));
+ rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
Tcl_DecrRefCount(pEval);
pInst->pContext->pCtx = pOldCtx;
}
/*
-** sqlite3_fts5_token TEXT START END POS
+** sqlite3_fts5_token ?-colocated? TEXT START END
*/
static int f5tTokenizerReturn(
void * clientData,
int iStart;
int iEnd;
int nToken;
+ int tflags = 0;
char *zToken;
int rc;
- assert( p );
- if( objc!=4 ){
- Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");
+ if( objc==5 ){
+ int nArg;
+ char *zArg = Tcl_GetStringFromObj(objv[1], &nArg);
+ if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){
+ tflags |= FTS5_TOKEN_COLOCATED;
+ }else{
+ goto usage;
+ }
+ }else if( objc!=4 ){
+ goto usage;
+ }
+
+ zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken);
+ if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart)
+ || Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd)
+ ){
return TCL_ERROR;
}
+
if( p->xToken==0 ){
Tcl_AppendResult(interp,
"sqlite3_fts5_token may only be used by tokenizer callback", 0
return TCL_ERROR;
}
- zToken = Tcl_GetStringFromObj(objv[1], &nToken);
- if( Tcl_GetIntFromObj(interp, objv[2], &iStart)
- || Tcl_GetIntFromObj(interp, objv[3], &iEnd)
- ){
- return TCL_ERROR;
- }
-
- rc = p->xToken(p->pCtx, 0, zToken, nToken, iStart, iEnd);
+ rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd);
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
return TCL_OK;
+
+ usage:
+ Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END");
+ return TCL_ERROR;
}
static void f5tDelTokenizer(void *pCtx){
--- /dev/null
+# 2014 Dec 20
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on custom tokenizers that support synonyms.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5synonym
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+
+proc gobble_whitespace {textvar} {
+ upvar $textvar t
+ regexp {([ ]*)(.*)} $t -> space t
+ return [string length $space]
+}
+
+proc gobble_text {textvar wordvar} {
+ upvar $textvar t
+ upvar $wordvar w
+ regexp {([^ ]*)(.*)} $t -> w t
+ return [string length $w]
+}
+
+proc do_tokenize_split {text} {
+ set token ""
+ set ret [list]
+ set iOff [gobble_whitespace text]
+ while {[set nToken [gobble_text text word]]} {
+ lappend ret $word $iOff [expr $iOff+$nToken]
+ incr iOff $nToken
+ incr iOff [gobble_whitespace text]
+ }
+
+ set ret
+}
+
+proc tcl_tokenize {tflags text} {
+ foreach {w iStart iEnd} [do_tokenize_split $text] {
+ sqlite3_fts5_token $w $iStart $iEnd
+ }
+}
+
+proc tcl_create {args} {
+ return "tcl_tokenize"
+}
+
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+
+#-------------------------------------------------------------------------
+# Warm body test for the code in fts5_tcl.c.
+#
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+ INSERT INTO ft VALUES('abc def ghi');
+ INSERT INTO ft VALUES('jkl mno pqr');
+ SELECT rowid, x FROM ft WHERE ft MATCH 'def';
+ SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
+} {1 {abc def ghi} {jkl mno pqr} 2}
+
+#-------------------------------------------------------------------------
+# Test a tokenizer that supports synonyms by adding extra entries to the
+# FTS index.
+#
+foreach S {
+ {zero 0}
+ {one 1}
+ {two 2}
+ {three 3 iii}
+ {four 4}
+ {five 5}
+ {six 6}
+ {seven 7}
+ {eight 8}
+ {nine 9}
+} {
+ foreach s $S {
+ set o [list]
+ foreach x $S {if {$x!=$s} {lappend o $x}}
+ set ::syn($s) $o
+ }
+}
+
+proc tcl_tokenize {tflags text} {
+ foreach {w iStart iEnd} [do_tokenize_split $text] {
+ sqlite3_fts5_token $w $iStart $iEnd
+ if {$tflags=="document" && [info exists ::syn($w)]} {
+ foreach s $::syn($w) {
+ sqlite3_fts5_token -colo $s $iStart $iEnd
+ }
+ }
+ }
+}
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+
+do_execsql_test 2.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+ INSERT INTO ft VALUES('one two three');
+ INSERT INTO ft VALUES('four five six');
+ INSERT INTO ft VALUES('eight nine ten');
+} {}
+
+foreach {tn expr res} {
+ 1 "3" 1
+ 2 "eight OR 8 OR 5" {2 3}
+ 3 "10" {}
+ 4 "1*" {1}
+} {
+ do_execsql_test 2.1.$tn {
+ SELECT rowid FROM ft WHERE ft MATCH $expr
+ } $res
+}
+
+#-------------------------------------------------------------------------
+# Test some broken tokenizers:
+#
+# 3.1.*: A tokenizer that declares the very first token to be colocated.
+#
+# 3.2.*: A tokenizer that reports two identical tokens at the same position.
+# This is allowed.
+#
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+proc tcl_tokenize {tflags text} {
+ set bColo 1
+ foreach {w iStart iEnd} [do_tokenize_split $text] {
+ if {$bColo} {
+ sqlite3_fts5_token -colo $w $iStart $iEnd
+ set bColo 0
+ } {
+ sqlite3_fts5_token $w $iStart $iEnd
+ }
+ }
+}
+do_execsql_test 3.1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+ INSERT INTO ft VALUES('one two three');
+ CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
+ SELECT * FROM vv;
+} {
+ one 1 1 three 1 1 two 1 1
+}
+
+do_execsql_test 3.1.1 {
+ INSERT INTO ft(ft) VALUES('integrity-check');
+} {}
+
+proc tcl_tokenize {tflags text} {
+ foreach {w iStart iEnd} [do_tokenize_split $text] {
+ sqlite3_fts5_token $w $iStart $iEnd
+ }
+}
+
+do_execsql_test 3.1.2 {
+ SELECT rowid FROM ft WHERE ft MATCH 'one two three'
+} {1}
+
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+proc tcl_tokenize {tflags text} {
+ foreach {w iStart iEnd} [do_tokenize_split $text] {
+ sqlite3_fts5_token $w $iStart $iEnd
+ sqlite3_fts5_token -colo $w $iStart $iEnd
+ }
+}
+do_execsql_test 3.2.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+ INSERT INTO ft VALUES('one one two three');
+ CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
+ SELECT * FROM vv;
+} {
+ one 1 4 three 1 2 two 1 2
+}
+do_execsql_test 3.2.1 {
+ SELECT rowid FROM ft WHERE ft MATCH 'one two three';
+ SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
+} {1 1}
+do_execsql_test 3.2.2 {
+ SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
+ SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
+} {1}
+
+finish_test
+
-C Another\schange\sto\sthe\sfts5\stokenizer\sAPI.
-D 2015-08-29T15:44:27.938
+C Add\sa\stest\sfor\san\sfts5\stokenizer\sthat\ssupports\ssynonyms\sby\sadding\smultiple\sentries\sto\sthe\sfts\sindex.
+D 2015-08-29T18:46:12.456
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in e2218eb228374422969de7b1680eda6864affcef
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
F ext/fts5/fts5_index.c 076c4995bf06a6d1559a6e31f9a86b90f2105374
F ext/fts5/fts5_main.c b00834ac543431dc35edbe18018b4befe0c7fd42
-F ext/fts5/fts5_storage.c 9820e7b53ea12baf3c818485efd66346b73030c3
-F ext/fts5/fts5_tcl.c 058f8da51964458e9859edfc1ee13b1863edaeae
+F ext/fts5/fts5_storage.c c888defbb961d64c12299b3d1725a24a770b047e
+F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37
F ext/fts5/fts5_test_mi.c 80a9e86fb4c5b6b58f8fefac05e9b96d1a6574e1
F ext/fts5/fts5_tokenize.c 710541513ecf3fe6d9365326fc85aee6efe97229
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b
F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17
F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460
+F ext/fts5/test/fts5synonym.test 6f1cfa5022bdae999f018075254e9fc51da2e618
F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89
F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
F ext/fts5/test/fts5unicode2.test c1dd890ba32b7609adba78e420faa847abe43b59
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 90b85b42f2b2dd3e939b129b7df2b822a05e243d
-R f343432805e01f14633e088d58d566cf
+P fc71868496f45f9c7a79ed2bf2d164a7c4718ce1
+R 8d19f3ef91012c48b3ad32684321db98
U dan
-Z e4288542e4294b868813263b0597051d
+Z 14116ac3039ffa0cba068c002654bc88
-fc71868496f45f9c7a79ed2bf2d164a7c4718ce1
\ No newline at end of file
+98d07d16cab92f1e7001afbe370df3ec6343fc1f
\ No newline at end of file