From: dan Date: Fri, 9 Aug 2024 20:59:50 +0000 (+0000) Subject: Ensure tokenizers registered with xCreateTokenizer_v2() can be accessed using xFindTo... X-Git-Tag: version-3.47.0~220^2~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5eba4281b029900a5a4518e5259f52da67a45bad;p=thirdparty%2Fsqlite.git Ensure tokenizers registered with xCreateTokenizer_v2() can be accessed using xFindTokenizer(), and that those registered with xCreateTokenizer() work with xFindTokenizer_v2(). FossilOrigin-Name: a3ef7d47b582a66a85ebe802ab40114f77289d6b44a4b375ea7858c60f59b27f --- diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index fb951291a6..8b676f1325 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -822,6 +822,9 @@ struct F5tTokenizerContext { struct F5tTokenizerModule { Tcl_Interp *interp; Tcl_Obj *pScript; + void *pParentCtx; + fts5_tokenizer_v2 parent_v2; + fts5_tokenizer parent; F5tTokenizerContext *pContext; }; @@ -836,6 +839,8 @@ struct F5tTokenizerModule { struct F5tTokenizerInstance { Tcl_Interp *interp; Tcl_Obj *pScript; + F5tTokenizerModule *pModule; + Fts5Tokenizer *pParent; F5tTokenizerContext *pContext; char zLocale[128]; }; @@ -864,11 +869,20 @@ static int f5tTokenizerCreate( int nArg, Fts5Tokenizer **ppOut ){ + Fts5Tokenizer *pParent = 0; F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_Obj *pEval; int rc = TCL_OK; int i; + assert( pMod->parent_v2.xCreate==0 || pMod->parent.xCreate==0 ); + if( pMod->parent_v2.xCreate ){ + rc = pMod->parent_v2.xCreate(pMod->pParentCtx, 0, 0, &pParent); + } + if( pMod->parent.xCreate ){ + rc = pMod->parent.xCreate(pMod->pParentCtx, 0, 0, &pParent); + } + pEval = Tcl_DuplicateObj(pMod->pScript); Tcl_IncrRefCount(pEval); for(i=0; rc==TCL_OK && iinterp = pMod->interp; pInst->pScript = Tcl_GetObjResult(pMod->interp); pInst->pContext = pMod->pContext; + pInst->pParent = pParent; + pInst->pModule = pMod; Tcl_IncrRefCount(pInst->pScript); *ppOut = (Fts5Tokenizer*)pInst; } @@ -898,11 +914,19 @@ static int f5tTokenizerCreate( static void f5tTokenizerDelete(Fts5Tokenizer *p){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + if( pInst->pParent ){ + if( pInst->pModule->parent_v2.xDelete ){ + pInst->pModule->parent_v2.xDelete(pInst->pParent); + }else{ + pInst->pModule->parent.xDelete(pInst->pParent); + } + } Tcl_DecrRefCount(pInst->pScript); ckfree((char *)pInst); } -static int f5tTokenizerTokenize( + +static int f5tTokenizerReallyTokenize( Fts5Tokenizer *p, void *pCtx, int flags, @@ -963,6 +987,54 @@ static int f5tTokenizerTokenize( return rc; } +typedef struct CallbackCtx CallbackCtx; +struct CallbackCtx { + Fts5Tokenizer *p; + void *pCtx; + int flags; + int (*xToken)(void*, int, const char*, int, int, int); +}; + +static int f5tTokenizeCallback( + void *pCtx, + int tflags, + const char *z, int n, + int iStart, int iEnd +){ + CallbackCtx *p = (CallbackCtx*)pCtx; + return f5tTokenizerReallyTokenize(p->p, p->pCtx, p->flags, z, n, p->xToken); +} + +static int f5tTokenizerTokenize( + Fts5Tokenizer *p, + void *pCtx, + int flags, + const char *pText, int nText, + int (*xToken)(void*, int, const char*, int, int, int) +){ + int rc = SQLITE_OK; + F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + if( pInst->pParent ){ + CallbackCtx ctx; + ctx.p = p; + ctx.pCtx = pCtx; + ctx.flags = flags; + ctx.xToken = xToken; + if( pInst->pModule->parent_v2.xTokenize ){ + rc = pInst->pModule->parent_v2.xTokenize( + pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback + ); + }else{ + rc = pInst->pModule->parent.xTokenize( + pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback + ); + } + }else{ + rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken); + } + return rc; +} + /* ** sqlite3_fts5_locale */ @@ -1078,47 +1150,85 @@ static int SQLITE_TCLAPI f5tCreateTokenizer( F5tTokenizerModule *pMod; int rc; int bV2 = 0; /* True to use _v2 API */ + const char *zParent = 0; /* Name of parent tokenizer, if any */ + int ii = 0; - if( objc==5 ){ - const char *zArg = Tcl_GetString(objv[1]); - if( 0==strcmp(zArg, "-v2") ){ - objv++; - objc--; - bV2 = 1; - } + if( objc<4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "?OPTIONS? DB NAME SCRIPT"); + return TCL_ERROR; } - if( objc!=4 ){ - Tcl_WrongNumArgs(interp, 1, objv, "?-v2? DB NAME SCRIPT"); - return TCL_ERROR; + /* Parse any options. Set stack variables bV2 and zParent. */ + for(ii=1; iiinterp = interp; pMod->pScript = pScript; - pMod->pContext = pContext; Tcl_IncrRefCount(pScript); + pMod->pContext = pContext; + if( zParent ){ + if( bV2 ){ + fts5_tokenizer_v2 *pParent = 0; + rc = pApi->xFindTokenizer_v2(pApi, zParent, &pMod->pParentCtx, &pParent); + if( rc==SQLITE_OK ){ + memcpy(&pMod->parent_v2, pParent, sizeof(fts5_tokenizer_v2)); + } + }else{ + rc = pApi->xFindTokenizer(pApi, zParent, &pMod->pParentCtx,&pMod->parent); + } + } - if( bV2==0 ){ - fts5_tokenizer t; - t.xCreate = f5tTokenizerCreate; - t.xTokenize = f5tTokenizerTokenize; - t.xDelete = f5tTokenizerDelete; - rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer); - }else{ - fts5_tokenizer_v2 t2; - memset(&t2, 0, sizeof(t2)); - t2.iVersion = 2; - t2.xCreate = f5tTokenizerCreate; - t2.xTokenize = f5tTokenizerTokenize; - t2.xDelete = f5tTokenizerDelete; - t2.xSetLocale = f5tTokenizerSetLocale; - rc = pApi->xCreateTokenizer_v2(pApi, zName,(void*)pMod,&t2,f5tDelTokenizer); + if( rc==SQLITE_OK ){ + void *pModCtx = (void*)pMod; + if( bV2==0 ){ + fts5_tokenizer t; + t.xCreate = f5tTokenizerCreate; + t.xTokenize = f5tTokenizerTokenize; + t.xDelete = f5tTokenizerDelete; + rc = pApi->xCreateTokenizer(pApi, zName, pModCtx, &t, f5tDelTokenizer); + }else{ + fts5_tokenizer_v2 t2; + memset(&t2, 0, sizeof(t2)); + t2.iVersion = 2; + t2.xCreate = f5tTokenizerCreate; + t2.xTokenize = f5tTokenizerTokenize; + t2.xDelete = f5tTokenizerDelete; + t2.xSetLocale = f5tTokenizerSetLocale; + rc = pApi->xCreateTokenizer_v2(pApi, zName, pModCtx, &t2,f5tDelTokenizer); + } } if( rc!=SQLITE_OK ){ diff --git a/ext/fts5/test/fts5tokenizer3.test b/ext/fts5/test/fts5tokenizer3.test new file mode 100644 index 0000000000..5cdab743c2 --- /dev/null +++ b/ext/fts5/test/fts5tokenizer3.test @@ -0,0 +1,77 @@ +# 2024 Aug 10 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the built-in fts5 tokenizers. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5tokenizer3 + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + + +proc get_sod {args} { return "split_on_dot" } +proc get_lowercase {args} { return "lowercase" } + +proc lowercase {flags txt} { + set n [string length $txt] + sqlite3_fts5_token [string tolower $txt] 0 $n + return 0 +} + +proc split_on_dot {flags txt} { + set iOff 0 + foreach t [split $txt "."] { + set n [string length $txt] + sqlite3_fts5_token $t $iOff [expr $iOff+$n] + incr iOff [expr {$n+1}] + } + return "" +} + +foreach {tn script} { + 1 { + sqlite3_fts5_create_tokenizer db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod + } + 2 { + sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod + } + 3 { + sqlite3_fts5_create_tokenizer db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod + } + 4 { + sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod + } +} { + reset_db + eval $script + + do_execsql_test 1.$tn.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=split_on_dot); + CREATE VIRTUAL TABLE t1vocab USING fts5vocab(t1, instance); + INSERT INTO t1 VALUES('ABC.Def.ghi'); + } + + do_execsql_test 1.$tn.1 { + SELECT term FROM t1vocab ORDER BY 1 + } {abc def ghi} +} + + +finish_test diff --git a/manifest b/manifest index fdb1dfd115..a95f78f7fb 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthings\sso\sthat\slocale=1\sis\srequired\sto\swrite\sfts5_locale()\svalues\sto\san\sfts5\stable,\sand\sso\sthat\sblobs\smay\snot\sbe\sstored\sin\sindexed\s(i.e.\snot\sUNINDEXED)\scolumns\sof\sthese\stables. -D 2024-08-02T21:06:13.360 +C Ensure\stokenizers\sregistered\swith\sxCreateTokenizer_v2()\scan\sbe\saccessed\susing\sxFindTokenizer(),\sand\sthat\sthose\sregistered\swith\sxCreateTokenizer()\swork\swith\sxFindTokenizer_v2(). +D 2024-08-09T20:59:50.030 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -102,7 +102,7 @@ F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe F ext/fts5/fts5_main.c 79b1d6099d8140afbd930a4e280759728099f584f91e39a8e81f2df0a0e0e839 F ext/fts5/fts5_storage.c 5bf88213ff5911625c142ac332ddba10dcd0869e757f91f2a3d27f27ba595992 -F ext/fts5/fts5_tcl.c 93b705cb87633574983161edc5234f9b91ba03f9fecfbd2c5d401a1da6f93aa5 +F ext/fts5/fts5_tcl.c 20bb08b43f6eeff34f12ba25988f46e3a2500bc441e9885a509d5f3932bc1cdb F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b F ext/fts5/fts5_tokenize.c fa5493075101540270f572038fc1723d44fcc97bfbf237c8530013b8a27860be @@ -232,6 +232,7 @@ F ext/fts5/test/fts5tok1.test 1f7817499f5971450d8c4a652114b3d833393c8134e32422d0 F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2 F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98 F ext/fts5/test/fts5tokenizer2.test 9c1ad8ef0465076cbc9ff5c764782594329b3bce3e0f6a931a026902d006f495 +F ext/fts5/test/fts5tokenizer3.test eea778f7bb7024c3e904e28915f9d53286141671b138722148be22a9c758bdc3 F ext/fts5/test/fts5trigram.test 6c4e37864f3e7d90673db5563d9736d7e40080ab94d10ebdffa94c1b77941da0 F ext/fts5/test/fts5trigram2.test c91f0a94f7e1ff859682228646abeab4c0eba2effc46af2cbc8f0f48b05a0566 F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2 @@ -2201,8 +2202,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P e8a61d5c48073fdd4d99d0b6fc70469b37af009f281336a44e3789e7eeed820d -R 1b40b0a91a44b0bf6782847dce5b9e1d +P c98ccc12169419b8b27ead89ef0665de40320277c5daa748b80869337419e43e +R bfbea59aa50b425cf741e16058f3ea26 U dan -Z 06efbaab9999ac759a0d226937b631dd +Z 43db44ab1958e51c418293959d627cfb # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 09bba22cd4..f82f7047f7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c98ccc12169419b8b27ead89ef0665de40320277c5daa748b80869337419e43e +a3ef7d47b582a66a85ebe802ab40114f77289d6b44a4b375ea7858c60f59b27f