** SQLite error code is returned. The final value of the output parameters
** is undefined in this case.
**
-** xTokenizeSetLocale(pFts5, pLocale, nLocale)
-** This API function is used to invoke the fts5_tokenizer_v2.xSetLocale()
-** method of the fts5 table's tokenizer, in the same way as xTokenize() is
-** used to invoke fts5_tokenizer_v2.xTokenize().
-**
-** Parameters pLocale and nLocale may both be 0, in which case the tokenizer
-** is configured to use its default locale. Otherwise, pLocale should point
-** to a buffer containing the utf-8 encoded name of the locale to use.
-** It does not have to be nul-terminated. nLocale must be passed the size
-** of the text in bytes. The buffer indicated by pLocale must remain valid
-** for the duration of any calls made to xTokenize() by the auxiliary
-** function call up until the next invocation of xTokenizeSetLocale(), if
-** any.
-**
-** SQLITE_OK is returned on success, or an SQLite error code otherwise.
+** xTokenize_v2:
+** Tokenize text using the tokenizer belonging to the FTS5 table. This
+** API is the same as the xTokenize() API, except that it allows a tokenizer
+** locale to be specified.
*/
struct Fts5ExtensionApi {
int iVersion; /* Currently always set to 4 */
/* Below this point are iVersion>=4 only */
int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn);
- int (*xTokenizeSetLocale)(Fts5Context*, const char *p, int n);
+ int (*xTokenize_v2)(Fts5Context*,
+ const char *pText, int nText, /* Text to tokenize */
+ const char *pLoc, int nLoc, /* Locale to pass to tokenizer */
+ void *pCtx, /* Context passed to xToken() */
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
+ );
};
/*
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
-** xSetLocale:
-** This function is invoked by FTS5 to configure the locale to use for
-** subsequent calls to xTokenize. The second argument is a pointer to
-** a nul-terminated buffer containing the utf-8 encoded name of the locale
-** to use. The third argument is the size of the buffer in bytes, not
-** including the nul-terminator character. This function may also be
-** invoked with the second and third parameters set to 0 - instructing
-** the tokenizer to use its default locale.
-**
-** FTS5 guarantees that any buffer passed to xSetLocale() will remain
-** valid until either the next call to xSetLocale() or xDelete() on the
-** same tokenizer object.
-**
-** This function should return SQLITE_OK if successful, or an SQLite
-** error code if an error occurs. If an error does occur and an error
-** code is returned, execution of the current statement is abandoned
-** and FTS5 returns the error code to the caller.
-**
-** Often, this function is not required and is never invoked. It is only
-** ever invoked when processing a value that has had a locale associated
-** with it using SQL function fts5_locale().
-**
-** It is not necessary to supply an implementation of this method when
-** registering a tokenizer. If fts5_tokenizer_v2.xSetLocale is set to NULL,
-** then no attempt is made to pass locale information through to the
-** tokenizer.
+** If the tokenizer is registered using an fts5_tokenizer_v2 object,
+** then the xTokenize() method has two additional arguments - pLocale
+** and nLocale. These specify the locale that the tokenizer should use
+** for the current request. If pLocale and nLocale are both 0, then the
+** tokenizer should use its default locale. Otherwise, pLocale points to
+** a buffer containing the name of the locale to use as utf-8 text. nLocale
+** contains the number of bytes in pLocale. pLocale is not nul-terminated.
**
** SYNONYM SUPPORT
**
return rc;
}
-/*
-** Use xTokenizeSetLocale() to configure the tokenizer to use the locale
-** associated with column iCol of the current row.
-*/
-static int fts5ConfigureTokenizer(
- const Fts5ExtensionApi *pApi,
- Fts5Context *pFts,
- int iCol
-){
- int rc = SQLITE_OK;
- const char *zLocale = 0;
- int nLocale = 0;
-
- assert( pApi->iVersion>=4 ); /* Ensure xColumnLocale() is available */
-
- rc = pApi->xColumnLocale(pFts, iCol, &zLocale, &nLocale);
- if( rc==SQLITE_OK ){
- rc = pApi->xTokenizeSetLocale(pFts, zLocale, nLocale);
- }
- return rc;
-}
-
/*
** Implementation of highlight() function.
sqlite3_result_text(pCtx, "", -1, SQLITE_STATIC);
rc = SQLITE_OK;
}else if( ctx.zIn ){
+ const char *pLoc = 0;
+ int nLoc = 0;
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
}
if( rc==SQLITE_OK ){
- rc = fts5ConfigureTokenizer(pApi, pFts, iCol);
+ rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc);
}
if( rc==SQLITE_OK ){
- rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
+ rc = pApi->xTokenize_v2(
+ pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb
+ );
}
if( ctx.bOpen ){
fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
memset(&sFinder, 0, sizeof(Fts5SFinder));
for(i=0; i<nCol; i++){
if( iCol<0 || iCol==i ){
+ const char *pLoc = 0;
+ int nLoc = 0;
int nDoc;
int nDocsize;
int ii;
sFinder.nFirst = 0;
rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
if( rc!=SQLITE_OK ) break;
- rc = fts5ConfigureTokenizer(pApi, pFts, i);
+ rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc);
if( rc!=SQLITE_OK ) break;
- rc = pApi->xTokenize(pFts,
- sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
+ rc = pApi->xTokenize_v2(pFts,
+ sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb
);
if( rc!=SQLITE_OK ) break;
rc = pApi->xColumnSize(pFts, i, &nDocsize);
rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
}
if( ctx.zIn ){
+ const char *pLoc = 0; /* Locale to tokenize in */
+ int nLoc = 0; /* Bytes in pLoc */
+
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
}
}
if( rc==SQLITE_OK ){
- rc = fts5ConfigureTokenizer(pApi, pFts, iBestCol);
+ rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc);
}
if( rc==SQLITE_OK ){
- rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
+ rc = pApi->xTokenize_v2(
+ pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb
+ );
}
if( ctx.bOpen ){
fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
int nInstAlloc; /* Size of aInst[] array (entries / 3) */
int nInstCount; /* Number of phrase instances */
int *aInst; /* 3 integers per phrase instance */
-
- /* Values set by xTokenizeSetLocale() */
- const char *pLocale;
- int nLocale;
};
/*
return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}
-static int fts5ApiTokenize(
+static int fts5ApiTokenize_v2(
Fts5Context *pCtx,
const char *pText, int nText,
+ const char *pLoc, int nLoc,
void *pUserData,
int (*xToken)(void*, int, const char*, int, int, int)
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
int rc = SQLITE_OK;
- const char *pLocale = pCsr->pLocale;
- if( pLocale ){
- rc = fts5SetLocale(pTab->pConfig, pLocale, pCsr->nLocale);
- }
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts5Tokenize(
- pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
- );
- }
- if( pLocale ){
- sqlite3Fts5ClearLocale(pTab->pConfig);
- }
+
+ fts5SetLocale(pTab->pConfig, pLoc, nLoc);
+ rc = sqlite3Fts5Tokenize(pTab->pConfig,
+ FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
+ );
+ fts5SetLocale(pTab->pConfig, 0, 0);
+
return rc;
}
+static int fts5ApiTokenize(
+ Fts5Context *pCtx,
+ const char *pText, int nText,
+ void *pUserData,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken);
+}
static int fts5ApiPhraseCount(Fts5Context *pCtx){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
return rc;
}
-/*
-** The xTokenizeSetLocale() API.
-*/
-static int fts5ApiTokenizeSetLocale(
- Fts5Context *pCtx,
- const char *pLocale,
- int nLocale
-){
- int rc = SQLITE_OK;
- Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
- pCsr->pLocale = pLocale;
- pCsr->nLocale = nLocale;
- return rc;
-}
-
static const Fts5ExtensionApi sFts5Api = {
4, /* iVersion */
fts5ApiUserData,
fts5ApiQueryToken,
fts5ApiInstToken,
fts5ApiColumnLocale,
- fts5ApiTokenizeSetLocale
+ fts5ApiTokenize_v2
};
/*
pCsr->pAux = pAux;
pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
pCsr->pAux = 0;
- pCsr->pLocale = 0;
- pCsr->nLocale = 0;
}
static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
/*
** zLocale:
-** Buffer zLocale contains the current locale, as configured by the most
-** recent call to xSetLocale(). A NULL (default) locale is represented as
-** a 0 byte string - "\0".
-**
-** This can be retrieved by a Tcl tokenize script using [sqlite3_fts5_locale].
+** Within a call to xTokenize_v2(), pLocale/nLocale store the locale
+** passed to the call by fts5. This can be retrieved by a Tcl tokenize
+** script using [sqlite3_fts5_locale].
*/
struct F5tTokenizerInstance {
Tcl_Interp *interp;
F5tTokenizerModule *pModule;
Fts5Tokenizer *pParent;
F5tTokenizerContext *pContext;
- char zLocale[128];
+ const char *pLocale;
+ int nLocale;
};
-static int f5tTokenizerSetLocale(
- Fts5Tokenizer *pTokenizer,
- const char *pLocale,
- int nLocale
-){
- F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)pTokenizer;
- if( nLocale>=sizeof(pInst->zLocale) ){
- return SQLITE_ERROR;
- }
-
- memset(pInst->zLocale, 0, sizeof(pInst->zLocale));
- if( nLocale>0 ){
- memcpy(pInst->zLocale, pLocale, nLocale);
- }
-
- return SQLITE_OK;
-}
-
static int f5tTokenizerCreate(
void *pCtx,
const char **azArg,
int rc = SQLITE_OK;
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
- memset(pInst->zLocale, 0, sizeof(pInst->zLocale));
- if( pLoc && nLoc<sizeof(pInst->zLocale) ){
- memcpy(pInst->zLocale, pLoc, nLoc);
- }
+ pInst->pLocale = pLoc;
+ pInst->nLocale = nLoc;
if( pInst->pParent ){
CallbackCtx ctx;
rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken);
}
- memset(pInst->zLocale, 0, sizeof(pInst->zLocale));
+ pInst->pLocale = 0;
+ pInst->nLocale = 0;
return rc;
}
static int f5tTokenizerTokenize(
return TCL_ERROR;
}
- Tcl_SetObjResult(interp, Tcl_NewStringObj(p->pInst->zLocale, -1));
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj(p->pInst->pLocale, p->pInst->nLocale)
+ );
return TCL_OK;
}
-C Merge\strunk\schanges\sinto\sthis\sbranch.
-D 2024-08-10T20:03:01.148
+C Update\sthe\sauxiliary\sfunction\sAPI\sto\sinclude\sxTokenize_x2()\sinstead\sof\sxSetLocale().
+D 2024-08-12T11:13:56.109
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
-F ext/fts5/fts5.h 1bdf3cde2b00e80f62925931471b6a00da38546eed6f2d9fe6a1d5999cbe3ba6
+F ext/fts5/fts5.h 7f1197009fc0e9822a8a584aa1f90591bdbf04f4503ecfe06949f3afe7a1fe06
F ext/fts5/fts5Int.h b40bb0bd54aaa4ac4712b6c5763b2167764614aaef204dbae81638b4548bca5d
-F ext/fts5/fts5_aux.c 598c80fc0faabab91c833cdda99f8e36387bd907f4acb0480a19b612a4add93e
+F ext/fts5/fts5_aux.c 0d0ee62dfebe93ccf6b293edb0b21ebe5c8bdc85e962a001745f2d13ea3e79d2
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
F ext/fts5/fts5_config.c 187f7ffa5eddd6539ffa592de85e95b18be951728491390121bb215549a24a2a
F ext/fts5/fts5_expr.c ee1949c5c20901cbaca0885902f1d0c136679262dee71b457a34a92e1d16ddac
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
-F ext/fts5/fts5_main.c d6afe9ddaa995d52009d6a3a06b1f4d7481610d7d34b1af6f05bcd4756f7b99a
+F ext/fts5/fts5_main.c cd61abbfd02f0f22e3c124ae2ad10c2a51cdf8acf38177410d44e134c1d1364b
F ext/fts5/fts5_storage.c 5bf88213ff5911625c142ac332ddba10dcd0869e757f91f2a3d27f27ba595992
-F ext/fts5/fts5_tcl.c 7bd4bcd4557814df4fe8412edac34a309da83e90780cdf1d0b6bf5da18e127dd
+F ext/fts5/fts5_tcl.c 50c7e16753fde0c4d80d8abd00a4ed2b0e998d5d3899a484510d01923c5da43b
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c 63ebe9057ed3f4dfc49944bc4aee3d3b745cc2faff73bc152ed3554ed3bf9cf4
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 924d3467cebd71ac8dfef8225965d71eda82ef1f0b43508af5b7ea78e2099cf3 d4014c87ba9b011a6a04c2bf85879b668dc762ebcbbfb50a2f8a417ce594ef88
-R f5c4cc25ad182e75985989cfb0d3dcb3
+P b243007525a825b3daf8aa9bb2d3088efb853bd8b7c9ea3c0924fde193eb5c44
+R ca9e80b1fa88da85d78ec3898ab25337
U dan
-Z c7a930832d2066471c172888c15e20f1
+Z 8ccf549b58c37ea212fa9c3ab517ca5f
# Remove this line to create a well-formed Fossil manifest.
-b243007525a825b3daf8aa9bb2d3088efb853bd8b7c9ea3c0924fde193eb5c44
+f7d56a1f2149f0da117167db62e2c28ec337e8da3403873b64cdfc6a951e2e8e