** Each tokenizer module registered with the FTS5 module is represented
** by an object of the following type. All such objects are stored as part
** of the Fts5Global.pTok list.
+**
+** bV2Native:
+** True if the tokenizer was registered using xCreateTokenizer_v2(), false
+** for xCreateTokenizer(). If this variable is true, then x2 is populated
+** with the routines as supplied by the caller and x1 contains synthesized
+** wrapper routines. In this case the user-data pointer passed to
+** x1.xCreate should be a pointer to the Fts5TokenizerModule structure,
+** not a copy of pUserData.
+**
+** Of course, if bV2Native is false, then x1 contains the real routines and
+** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule
+** object should be passed to x2.xCreate.
*/
struct Fts5TokenizerModule {
char *zName; /* Name of tokenizer */
void *pUserData; /* User pointer passed to xCreate() */
- fts5_tokenizer_v2 x; /* Tokenizer functions */
+ int bV2Native; /* True if v2 native tokenizer */
+ fts5_tokenizer x1; /* Tokenizer functions */
+ fts5_tokenizer_v2 x2; /* Tokenizer functions */
void (*xDestroy)(void*); /* Destructor function */
Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
};
const char *zLocale,
int nLocale
){
- Fts5TokenizerConfig *pT = &pConfig->t;
int rc = SQLITE_OK;
- if( pT->pTokApi->xSetLocale ){
- rc = pT->pTokApi->xSetLocale(pT->pTok, zLocale, nLocale);
- }
+ Fts5TokenizerConfig *pT = &pConfig->t;
+ pT->pLocale = zLocale;
+ pT->nLocale = nLocale;
return rc;
}
return rc;
}
+static int fts5NewTokenizerModule(
+ Fts5Global *pGlobal, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void *pUserData, /* User data for aux. function */
+ void(*xDestroy)(void*), /* Destructor for pUserData */
+ Fts5TokenizerModule **ppNew
+){
+ int rc = SQLITE_OK;
+ Fts5TokenizerModule *pNew;
+ sqlite3_int64 nName; /* Size of zName and its \0 terminator */
+ sqlite3_int64 nByte; /* Bytes of space to allocate */
+
+ nName = strlen(zName) + 1;
+ nByte = sizeof(Fts5TokenizerModule) + nName;
+ *ppNew = pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte);
+ if( pNew ){
+ pNew->zName = (char*)&pNew[1];
+ memcpy(pNew->zName, zName, nName);
+ pNew->pUserData = pUserData;
+ pNew->xDestroy = xDestroy;
+ pNew->pNext = pGlobal->pTok;
+ pGlobal->pTok = pNew;
+ if( pNew->pNext==0 ){
+ pGlobal->pDfltTok = pNew;
+ }
+ }
+
+ return rc;
+}
+
+typedef struct Fts5VtoVTokenizer Fts5VtoVTokenizer;
+struct Fts5VtoVTokenizer {
+ Fts5TokenizerModule *pMod;
+ Fts5Tokenizer *pReal;
+};
+
+static int fts5VtoVCreate(
+ void *pCtx,
+ const char **azArg,
+ int nArg,
+ Fts5Tokenizer **ppOut
+){
+ Fts5TokenizerModule *pMod = (Fts5TokenizerModule*)pCtx;
+ Fts5VtoVTokenizer *pNew = 0;
+ int rc = SQLITE_OK;
+
+ pNew = (Fts5VtoVTokenizer*)sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
+ if( rc==SQLITE_OK ){
+ pNew->pMod = pMod;
+ if( pMod->bV2Native ){
+ rc = pMod->x2.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal);
+ }else{
+ rc = pMod->x1.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal);
+ }
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(pNew);
+ pNew = 0;
+ }
+ }
+
+ *ppOut = (Fts5Tokenizer*)pNew;
+ return rc;
+}
+static void fts5VtoVDelete(Fts5Tokenizer *pTok){
+ Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
+ if( p ){
+ Fts5TokenizerModule *pMod = p->pMod;
+ if( pMod->bV2Native ){
+ pMod->x2.xDelete(p->pReal);
+ }else{
+ pMod->x1.xDelete(p->pReal);
+ }
+ sqlite3_free(p);
+ }
+}
+static int fts5V1toV2Tokenize(
+ Fts5Tokenizer *pTok,
+ void *pCtx, int flags,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
+ Fts5TokenizerModule *pMod = p->pMod;
+ assert( pMod->bV2Native );
+ return pMod->x2.xTokenize(p->pReal, pCtx, flags, pText, nText, 0, 0, xToken);
+}
+static int fts5V2toV1Tokenize(
+ Fts5Tokenizer *pTok,
+ void *pCtx, int flags,
+ const char *pText, int nText,
+ const char *pLocale, int nLocale,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
+ Fts5TokenizerModule *pMod = p->pMod;
+ assert( pMod->bV2Native==0 );
+ return pMod->x1.xTokenize(p->pReal, pCtx, flags, pText, nText, xToken);
+}
+
/*
** Register a new tokenizer. This is the implementation of the
** fts5_api.xCreateTokenizer_v2() method.
if( pTokenizer->iVersion>2 ){
rc = SQLITE_ERROR;
}else{
- Fts5TokenizerModule *pNew;
- sqlite3_int64 nName; /* Size of zName and its \0 terminator */
- sqlite3_int64 nByte; /* Bytes of space to allocate */
-
- nName = strlen(zName) + 1;
- nByte = sizeof(Fts5TokenizerModule) + nName;
- pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte);
+ Fts5TokenizerModule *pNew = 0;
+ rc = fts5NewTokenizerModule(pGlobal, zName, pUserData, xDestroy, &pNew);
if( pNew ){
- pNew->zName = (char*)&pNew[1];
- memcpy(pNew->zName, zName, nName);
- pNew->pUserData = pUserData;
- pNew->x = *pTokenizer;
- pNew->xDestroy = xDestroy;
- pNew->pNext = pGlobal->pTok;
- pGlobal->pTok = pNew;
- if( pNew->pNext==0 ){
- pGlobal->pDfltTok = pNew;
- }
+ pNew->x2 = *pTokenizer;
+ pNew->bV2Native = 1;
+ pNew->x1.xCreate = fts5VtoVCreate;
+ pNew->x1.xTokenize = fts5V1toV2Tokenize;
+ pNew->x1.xDelete = fts5VtoVDelete;
}
}
fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
void(*xDestroy)(void*) /* Destructor for pUserData */
){
- fts5_tokenizer_v2 tok;
-
- memset(&tok, 0, sizeof(tok));
- tok.iVersion = 2;
- tok.xCreate = pTokenizer->xCreate;
- tok.xTokenize = pTokenizer->xTokenize;
- tok.xDelete = pTokenizer->xDelete;
+ Fts5TokenizerModule *pNew = 0;
+ int rc = SQLITE_OK;
- return fts5CreateTokenizer_v2(pApi, zName, pUserData, &tok, xDestroy);
+ rc = fts5NewTokenizerModule(
+ (Fts5Global*)pApi, zName, pUserData, xDestroy, &pNew
+ );
+ if( pNew ){
+ pNew->x1 = *pTokenizer;
+ pNew->x2.xCreate = fts5VtoVCreate;
+ pNew->x2.xTokenize = fts5V2toV1Tokenize;
+ pNew->x2.xDelete = fts5VtoVDelete;
+ }
+ return rc;
}
static Fts5TokenizerModule *fts5LocateTokenizer(
pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
if( pMod ){
- *ppTokenizer = &pMod->x;
- *ppUserData = pMod->pUserData;
+ if( pMod->bV2Native ){
+ *ppUserData = pMod->pUserData;
+ }else{
+ *ppUserData = (void*)pMod;
+ }
+ *ppTokenizer = &pMod->x2;
}else{
*ppTokenizer = 0;
*ppUserData = 0;
void **ppUserData,
fts5_tokenizer *pTokenizer /* Populate this object */
){
- fts5_tokenizer_v2 *pV2 = 0;
int rc = SQLITE_OK;
+ Fts5TokenizerModule *pMod;
- rc = fts5FindTokenizer_v2(pApi, zName, ppUserData, &pV2);
- if( rc==SQLITE_OK ){
- pTokenizer->xCreate = pV2->xCreate;
- pTokenizer->xDelete = pV2->xDelete;
- pTokenizer->xTokenize = pV2->xTokenize;
+ pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
+ if( pMod ){
+ if( pMod->bV2Native==0 ){
+ *ppUserData = pMod->pUserData;
+ }else{
+ *ppUserData = (void*)pMod;
+ }
+ *pTokenizer = pMod->x1;
+ }else{
+ memset(pTokenizer, 0, sizeof(*pTokenizer));
+ *ppUserData = 0;
+ rc = SQLITE_ERROR;
}
return rc;
rc = SQLITE_ERROR;
if( pzErr ) *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
}else{
- rc = pMod->x.xCreate(
- pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok
+ int (*xCreate)(void*, const char**, int, Fts5Tokenizer**) = 0;
+ if( pMod->bV2Native ){
+ xCreate = pMod->x2.xCreate;
+ pConfig->t.pApi2 = &pMod->x2;
+ }else{
+ pConfig->t.pApi1 = &pMod->x1;
+ xCreate = pMod->x1.xCreate;
+ }
+
+ rc = xCreate(pMod->pUserData,
+ (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok
);
- pConfig->t.pTokApi = &pMod->x;
+
if( rc!=SQLITE_OK ){
if( pzErr && rc!=SQLITE_NOMEM ){
*pzErr = sqlite3_mprintf("error in tokenizer constructor");
}
- }else{
+ }else if( pMod->bV2Native==0 ){
pConfig->t.ePattern = sqlite3Fts5TokenizerPattern(
- pMod->x.xCreate, pConfig->t.pTok
+ pMod->x1.xCreate, pConfig->t.pTok
);
}
}
if( rc!=SQLITE_OK ){
- pConfig->t.pTokApi = 0;
+ pConfig->t.pApi1 = 0;
+ pConfig->t.pApi2 = 0;
pConfig->t.pTok = 0;
}
return f5tTokenizerReallyTokenize(p->p, p->pCtx, p->flags, z, n, p->xToken);
}
-static int f5tTokenizerTokenize(
+static int f5tTokenizerTokenize_v2(
Fts5Tokenizer *p,
void *pCtx,
int flags,
const char *pText, int nText,
+ const char *pLoc, int nLoc,
int (*xToken)(void*, int, const char*, int, int, int)
){
int rc = SQLITE_OK;
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+
+ memset(pInst->zLocale, 0, sizeof(pInst->zLocale));
+ if( pLoc && nLoc<sizeof(pInst->zLocale) ){
+ memcpy(pInst->zLocale, pLoc, nLoc);
+ }
+
if( pInst->pParent ){
CallbackCtx ctx;
ctx.p = p;
ctx.xToken = xToken;
if( pInst->pModule->parent_v2.xTokenize ){
rc = pInst->pModule->parent_v2.xTokenize(
- pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback
+ pInst->pParent, (void*)&ctx, flags, pText, nText,
+ pLoc, nLoc, f5tTokenizeCallback
);
}else{
rc = pInst->pModule->parent.xTokenize(
}else{
rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken);
}
+
+ memset(pInst->zLocale, 0, sizeof(pInst->zLocale));
return rc;
}
+static int f5tTokenizerTokenize(
+ Fts5Tokenizer *p,
+ void *pCtx,
+ int flags,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ return f5tTokenizerTokenize_v2(p, pCtx, flags, pText, nText, 0, 0, xToken);
+}
/*
** sqlite3_fts5_locale
memset(&t2, 0, sizeof(t2));
t2.iVersion = 2;
t2.xCreate = f5tTokenizerCreate;
- t2.xTokenize = f5tTokenizerTokenize;
+ t2.xTokenize = f5tTokenizerTokenize_v2;
t2.xDelete = f5tTokenizerDelete;
- t2.xSetLocale = f5tTokenizerSetLocale;
+ // t2.xSetLocale = f5tTokenizerSetLocale;
rc = pApi->xCreateTokenizer_v2(pApi, zName, pModCtx, &t2,f5tDelTokenizer);
}
}
-C Ensure\stokenizers\sregistered\swith\sxCreateTokenizer_v2()\scan\sbe\saccessed\susing\sxFindTokenizer(),\sand\sthat\sthose\sregistered\swith\sxCreateTokenizer()\swork\swith\sxFindTokenizer_v2().
-D 2024-08-09T20:59:50.030
+C Update\sthe\sfts5_tokenizer_v2\sAPI\sso\sthat\sthe\slocale\sis\spassed\sas\sparameter\sto\sxTokenize(),\sinstead\sof\svia\sa\sseparate\scall\sto\sxSetLocale().
+D 2024-08-10T18:59:36.297
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
-F ext/fts5/fts5.h f5451da088d0004c4be7314e2fdb41fda16ce682ce2aa3b54b9474ebe9a013d5
-F ext/fts5/fts5Int.h 64ccc862cbf3be08103ed9435b8670017019f5bd620db4e4317936170008b2b1
+F ext/fts5/fts5.h 1bdf3cde2b00e80f62925931471b6a00da38546eed6f2d9fe6a1d5999cbe3ba6
+F ext/fts5/fts5Int.h b40bb0bd54aaa4ac4712b6c5763b2167764614aaef204dbae81638b4548bca5d
F ext/fts5/fts5_aux.c 598c80fc0faabab91c833cdda99f8e36387bd907f4acb0480a19b612a4add93e
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
-F ext/fts5/fts5_config.c b7924194b5dc035d9f0bd82c74564e133ff38aaa2edf86d8a95c23c9c82ba17f
-F ext/fts5/fts5_expr.c c7336d5f9ecc0e2b014d700be2bec0ea383b0e82c494a7c5c4ac622327c2bfad
+F ext/fts5/fts5_config.c 187f7ffa5eddd6539ffa592de85e95b18be951728491390121bb215549a24a2a
+F ext/fts5/fts5_expr.c 16fdd35ebfd9c21d799ad6c7542a36bb3d25ba5fb97bc2e1092d3d86394267b8
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
-F ext/fts5/fts5_main.c 79b1d6099d8140afbd930a4e280759728099f584f91e39a8e81f2df0a0e0e839
+F ext/fts5/fts5_main.c 6248ba625b6a24c4c5f4706883aa3939b06e7f8e0266e0e8af24f5516e599879
F ext/fts5/fts5_storage.c 5bf88213ff5911625c142ac332ddba10dcd0869e757f91f2a3d27f27ba595992
-F ext/fts5/fts5_tcl.c 20bb08b43f6eeff34f12ba25988f46e3a2500bc441e9885a509d5f3932bc1cdb
+F ext/fts5/fts5_tcl.c f901f32a1be49dc76c6da63d08224e4d971657db0a0783ebc9796ef0757a57a9
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c fa5493075101540270f572038fc1723d44fcc97bfbf237c8530013b8a27860be
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P c98ccc12169419b8b27ead89ef0665de40320277c5daa748b80869337419e43e
-R bfbea59aa50b425cf741e16058f3ea26
+P a3ef7d47b582a66a85ebe802ab40114f77289d6b44a4b375ea7858c60f59b27f
+R c78b035ad007a6bbcb5c77edceb19899
U dan
-Z 43db44ab1958e51c418293959d627cfb
+Z 3bd05c86feeacf019b7ab5899d9e19c7
# Remove this line to create a well-formed Fossil manifest.