struct F5tTokenizerModule {
Tcl_Interp *interp;
Tcl_Obj *pScript;
+ void *pParentCtx;
+ fts5_tokenizer_v2 parent_v2;
+ fts5_tokenizer parent;
F5tTokenizerContext *pContext;
};
struct F5tTokenizerInstance {
Tcl_Interp *interp;
Tcl_Obj *pScript;
+ F5tTokenizerModule *pModule;
+ Fts5Tokenizer *pParent;
F5tTokenizerContext *pContext;
char zLocale[128];
};
int nArg,
Fts5Tokenizer **ppOut
){
+ Fts5Tokenizer *pParent = 0;
F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
Tcl_Obj *pEval;
int rc = TCL_OK;
int i;
+ assert( pMod->parent_v2.xCreate==0 || pMod->parent.xCreate==0 );
+ if( pMod->parent_v2.xCreate ){
+ rc = pMod->parent_v2.xCreate(pMod->pParentCtx, 0, 0, &pParent);
+ }
+ if( pMod->parent.xCreate ){
+ rc = pMod->parent.xCreate(pMod->pParentCtx, 0, 0, &pParent);
+ }
+
pEval = Tcl_DuplicateObj(pMod->pScript);
Tcl_IncrRefCount(pEval);
for(i=0; rc==TCL_OK && i<nArg; i++){
pInst->interp = pMod->interp;
pInst->pScript = Tcl_GetObjResult(pMod->interp);
pInst->pContext = pMod->pContext;
+ pInst->pParent = pParent;
+ pInst->pModule = pMod;
Tcl_IncrRefCount(pInst->pScript);
*ppOut = (Fts5Tokenizer*)pInst;
}
static void f5tTokenizerDelete(Fts5Tokenizer *p){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+ if( pInst->pParent ){
+ if( pInst->pModule->parent_v2.xDelete ){
+ pInst->pModule->parent_v2.xDelete(pInst->pParent);
+ }else{
+ pInst->pModule->parent.xDelete(pInst->pParent);
+ }
+ }
Tcl_DecrRefCount(pInst->pScript);
ckfree((char *)pInst);
}
-static int f5tTokenizerTokenize(
+
+static int f5tTokenizerReallyTokenize(
Fts5Tokenizer *p,
void *pCtx,
int flags,
return rc;
}
+typedef struct CallbackCtx CallbackCtx;
+struct CallbackCtx {
+ Fts5Tokenizer *p;
+ void *pCtx;
+ int flags;
+ int (*xToken)(void*, int, const char*, int, int, int);
+};
+
+static int f5tTokenizeCallback(
+ void *pCtx,
+ int tflags,
+ const char *z, int n,
+ int iStart, int iEnd
+){
+ CallbackCtx *p = (CallbackCtx*)pCtx;
+ return f5tTokenizerReallyTokenize(p->p, p->pCtx, p->flags, z, n, p->xToken);
+}
+
+static int f5tTokenizerTokenize(
+ Fts5Tokenizer *p,
+ void *pCtx,
+ int flags,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ int rc = SQLITE_OK;
+ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+ if( pInst->pParent ){
+ CallbackCtx ctx;
+ ctx.p = p;
+ ctx.pCtx = pCtx;
+ ctx.flags = flags;
+ ctx.xToken = xToken;
+ if( pInst->pModule->parent_v2.xTokenize ){
+ rc = pInst->pModule->parent_v2.xTokenize(
+ pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback
+ );
+ }else{
+ rc = pInst->pModule->parent.xTokenize(
+ pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback
+ );
+ }
+ }else{
+ rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken);
+ }
+ return rc;
+}
+
/*
** sqlite3_fts5_locale
*/
F5tTokenizerModule *pMod;
int rc;
int bV2 = 0; /* True to use _v2 API */
+ const char *zParent = 0; /* Name of parent tokenizer, if any */
+ int ii = 0;
- if( objc==5 ){
- const char *zArg = Tcl_GetString(objv[1]);
- if( 0==strcmp(zArg, "-v2") ){
- objv++;
- objc--;
- bV2 = 1;
- }
+ if( objc<4 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "?OPTIONS? DB NAME SCRIPT");
+ return TCL_ERROR;
}
- if( objc!=4 ){
- Tcl_WrongNumArgs(interp, 1, objv, "?-v2? DB NAME SCRIPT");
- return TCL_ERROR;
+ /* Parse any options. Set stack variables bV2 and zParent. */
+ for(ii=1; ii<objc-3; ii++){
+ int iOpt = 0;
+ const char *azOpt[] = { "-v2", "-parent", 0 };
+ if( Tcl_GetIndexFromObj(interp, objv[ii], azOpt, "OPTION", 0, &iOpt) ){
+ return TCL_ERROR;
+ }
+ switch( iOpt ){
+ case 0: /* -v2 */ {
+ bV2 = 1;
+ break;
+ }
+ case 1: /* -parent */ {
+ ii++;
+ if( ii==objc-3 ){
+ Tcl_AppendResult(
+ interp, "option requires an argument: -parent", (char*)0
+ );
+ return TCL_ERROR;
+ }
+ zParent = Tcl_GetString(objv[ii]);
+ break;
+ }
+ default:
+ assert( 0 );
+ break;
+ }
}
- if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
+
+ if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ){
return TCL_ERROR;
}
- zName = Tcl_GetString(objv[2]);
- pScript = objv[3];
+ zName = Tcl_GetString(objv[objc-2]);
+ pScript = objv[objc-1];
pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule));
+ memset(pMod, 0, sizeof(F5tTokenizerModule));
pMod->interp = interp;
pMod->pScript = pScript;
- pMod->pContext = pContext;
Tcl_IncrRefCount(pScript);
+ pMod->pContext = pContext;
+ if( zParent ){
+ if( bV2 ){
+ fts5_tokenizer_v2 *pParent = 0;
+ rc = pApi->xFindTokenizer_v2(pApi, zParent, &pMod->pParentCtx, &pParent);
+ if( rc==SQLITE_OK ){
+ memcpy(&pMod->parent_v2, pParent, sizeof(fts5_tokenizer_v2));
+ }
+ }else{
+ rc = pApi->xFindTokenizer(pApi, zParent, &pMod->pParentCtx,&pMod->parent);
+ }
+ }
- if( bV2==0 ){
- fts5_tokenizer t;
- t.xCreate = f5tTokenizerCreate;
- t.xTokenize = f5tTokenizerTokenize;
- t.xDelete = f5tTokenizerDelete;
- rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer);
- }else{
- fts5_tokenizer_v2 t2;
- memset(&t2, 0, sizeof(t2));
- t2.iVersion = 2;
- t2.xCreate = f5tTokenizerCreate;
- t2.xTokenize = f5tTokenizerTokenize;
- t2.xDelete = f5tTokenizerDelete;
- t2.xSetLocale = f5tTokenizerSetLocale;
- rc = pApi->xCreateTokenizer_v2(pApi, zName,(void*)pMod,&t2,f5tDelTokenizer);
+ if( rc==SQLITE_OK ){
+ void *pModCtx = (void*)pMod;
+ if( bV2==0 ){
+ fts5_tokenizer t;
+ t.xCreate = f5tTokenizerCreate;
+ t.xTokenize = f5tTokenizerTokenize;
+ t.xDelete = f5tTokenizerDelete;
+ rc = pApi->xCreateTokenizer(pApi, zName, pModCtx, &t, f5tDelTokenizer);
+ }else{
+ fts5_tokenizer_v2 t2;
+ memset(&t2, 0, sizeof(t2));
+ t2.iVersion = 2;
+ t2.xCreate = f5tTokenizerCreate;
+ t2.xTokenize = f5tTokenizerTokenize;
+ t2.xDelete = f5tTokenizerDelete;
+ t2.xSetLocale = f5tTokenizerSetLocale;
+ rc = pApi->xCreateTokenizer_v2(pApi, zName, pModCtx, &t2,f5tDelTokenizer);
+ }
}
if( rc!=SQLITE_OK ){
--- /dev/null
+# 2024 Aug 10
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on the built-in fts5 tokenizers.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5tokenizer3
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+
+proc get_sod {args} { return "split_on_dot" }
+proc get_lowercase {args} { return "lowercase" }
+
+proc lowercase {flags txt} {
+ set n [string length $txt]
+ sqlite3_fts5_token [string tolower $txt] 0 $n
+ return 0
+}
+
+proc split_on_dot {flags txt} {
+ set iOff 0
+ foreach t [split $txt "."] {
+ set n [string length $txt]
+ sqlite3_fts5_token $t $iOff [expr $iOff+$n]
+ incr iOff [expr {$n+1}]
+ }
+ return ""
+}
+
+foreach {tn script} {
+ 1 {
+ sqlite3_fts5_create_tokenizer db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod
+ }
+ 2 {
+ sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod
+ }
+ 3 {
+ sqlite3_fts5_create_tokenizer db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod
+ }
+ 4 {
+ sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod
+ }
+} {
+ reset_db
+ eval $script
+
+ do_execsql_test 1.$tn.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=split_on_dot);
+ CREATE VIRTUAL TABLE t1vocab USING fts5vocab(t1, instance);
+ INSERT INTO t1 VALUES('ABC.Def.ghi');
+ }
+
+ do_execsql_test 1.$tn.1 {
+ SELECT term FROM t1vocab ORDER BY 1
+ } {abc def ghi}
+}
+
+
+finish_test
-C Change\sthings\sso\sthat\slocale=1\sis\srequired\sto\swrite\sfts5_locale()\svalues\sto\san\sfts5\stable,\sand\sso\sthat\sblobs\smay\snot\sbe\sstored\sin\sindexed\s(i.e.\snot\sUNINDEXED)\scolumns\sof\sthese\stables.
-D 2024-08-02T21:06:13.360
+C Ensure\stokenizers\sregistered\swith\sxCreateTokenizer_v2()\scan\sbe\saccessed\susing\sxFindTokenizer(),\sand\sthat\sthose\sregistered\swith\sxCreateTokenizer()\swork\swith\sxFindTokenizer_v2().
+D 2024-08-09T20:59:50.030
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
F ext/fts5/fts5_main.c 79b1d6099d8140afbd930a4e280759728099f584f91e39a8e81f2df0a0e0e839
F ext/fts5/fts5_storage.c 5bf88213ff5911625c142ac332ddba10dcd0869e757f91f2a3d27f27ba595992
-F ext/fts5/fts5_tcl.c 93b705cb87633574983161edc5234f9b91ba03f9fecfbd2c5d401a1da6f93aa5
+F ext/fts5/fts5_tcl.c 20bb08b43f6eeff34f12ba25988f46e3a2500bc441e9885a509d5f3932bc1cdb
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c fa5493075101540270f572038fc1723d44fcc97bfbf237c8530013b8a27860be
F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98
F ext/fts5/test/fts5tokenizer2.test 9c1ad8ef0465076cbc9ff5c764782594329b3bce3e0f6a931a026902d006f495
+F ext/fts5/test/fts5tokenizer3.test eea778f7bb7024c3e904e28915f9d53286141671b138722148be22a9c758bdc3
F ext/fts5/test/fts5trigram.test 6c4e37864f3e7d90673db5563d9736d7e40080ab94d10ebdffa94c1b77941da0
F ext/fts5/test/fts5trigram2.test c91f0a94f7e1ff859682228646abeab4c0eba2effc46af2cbc8f0f48b05a0566
F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P e8a61d5c48073fdd4d99d0b6fc70469b37af009f281336a44e3789e7eeed820d
-R 1b40b0a91a44b0bf6782847dce5b9e1d
+P c98ccc12169419b8b27ead89ef0665de40320277c5daa748b80869337419e43e
+R bfbea59aa50b425cf741e16058f3ea26
U dan
-Z 06efbaab9999ac759a0d226937b631dd
+Z 43db44ab1958e51c418293959d627cfb
# Remove this line to create a well-formed Fossil manifest.