From: dan Date: Mon, 12 Aug 2024 11:13:56 +0000 (+0000) Subject: Update the auxiliary function API to include xTokenize_x2() instead of xSetLocale(). X-Git-Tag: version-3.47.0~220^2~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=12866777738884e44d561e27f0d3e17ecae04a4e;p=thirdparty%2Fsqlite.git Update the auxiliary function API to include xTokenize_x2() instead of xSetLocale(). FossilOrigin-Name: f7d56a1f2149f0da117167db62e2c28ec337e8da3403873b64cdfc6a951e2e8e --- diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index f87b03a5c7..784511de26 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -327,21 +327,10 @@ struct Fts5PhraseIter { ** SQLite error code is returned. The final value of the output parameters ** is undefined in this case. ** -** xTokenizeSetLocale(pFts5, pLocale, nLocale) -** This API function is used to invoke the fts5_tokenizer_v2.xSetLocale() -** method of the fts5 table's tokenizer, in the same way as xTokenize() is -** used to invoke fts5_tokenizer_v2.xTokenize(). -** -** Parameters pLocale and nLocale may both be 0, in which case the tokenizer -** is configured to use its default locale. Otherwise, pLocale should point -** to a buffer containing the utf-8 encoded name of the locale to use. -** It does not have to be nul-terminated. nLocale must be passed the size -** of the text in bytes. The buffer indicated by pLocale must remain valid -** for the duration of any calls made to xTokenize() by the auxiliary -** function call up until the next invocation of xTokenizeSetLocale(), if -** any. -** -** SQLITE_OK is returned on success, or an SQLite error code otherwise. +** xTokenize_v2: +** Tokenize text using the tokenizer belonging to the FTS5 table. This +** API is the same as the xTokenize() API, except that it allows a tokenizer +** locale to be specified. */ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 4 */ @@ -389,7 +378,12 @@ struct Fts5ExtensionApi { /* Below this point are iVersion>=4 only */ int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn); - int (*xTokenizeSetLocale)(Fts5Context*, const char *p, int n); + int (*xTokenize_v2)(Fts5Context*, + const char *pText, int nText, /* Text to tokenize */ + const char *pLoc, int nLoc, /* Locale to pass to tokenizer */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ + ); }; /* @@ -482,32 +476,13 @@ struct Fts5ExtensionApi { ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** -** xSetLocale: -** This function is invoked by FTS5 to configure the locale to use for -** subsequent calls to xTokenize. The second argument is a pointer to -** a nul-terminated buffer containing the utf-8 encoded name of the locale -** to use. The third argument is the size of the buffer in bytes, not -** including the nul-terminator character. This function may also be -** invoked with the second and third parameters set to 0 - instructing -** the tokenizer to use its default locale. -** -** FTS5 guarantees that any buffer passed to xSetLocale() will remain -** valid until either the next call to xSetLocale() or xDelete() on the -** same tokenizer object. -** -** This function should return SQLITE_OK if successful, or an SQLite -** error code if an error occurs. If an error does occur and an error -** code is returned, execution of the current statement is abandoned -** and FTS5 returns the error code to the caller. -** -** Often, this function is not required and is never invoked. It is only -** ever invoked when processing a value that has had a locale associated -** with it using SQL function fts5_locale(). -** -** It is not necessary to supply an implementation of this method when -** registering a tokenizer. If fts5_tokenizer_v2.xSetLocale is set to NULL, -** then no attempt is made to pass locale information through to the -** tokenizer. +** If the tokenizer is registered using an fts5_tokenizer_v2 object, +** then the xTokenize() method has two additional arguments - pLocale +** and nLocale. These specify the locale that the tokenizer should use +** for the current request. If pLocale and nLocale are both 0, then the +** tokenizer should use its default locale. Otherwise, pLocale points to +** a buffer containing the name of the locale to use as utf-8 text. nLocale +** contains the number of bytes in pLocale. pLocale is not nul-terminated. ** ** SYNONYM SUPPORT ** diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 8695369f5e..fb177d561e 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -226,28 +226,6 @@ static int fts5HighlightCb( return rc; } -/* -** Use xTokenizeSetLocale() to configure the tokenizer to use the locale -** associated with column iCol of the current row. -*/ -static int fts5ConfigureTokenizer( - const Fts5ExtensionApi *pApi, - Fts5Context *pFts, - int iCol -){ - int rc = SQLITE_OK; - const char *zLocale = 0; - int nLocale = 0; - - assert( pApi->iVersion>=4 ); /* Ensure xColumnLocale() is available */ - - rc = pApi->xColumnLocale(pFts, iCol, &zLocale, &nLocale); - if( rc==SQLITE_OK ){ - rc = pApi->xTokenizeSetLocale(pFts, zLocale, nLocale); - } - return rc; -} - /* ** Implementation of highlight() function. @@ -279,15 +257,19 @@ static void fts5HighlightFunction( sqlite3_result_text(pCtx, "", -1, SQLITE_STATIC); rc = SQLITE_OK; }else if( ctx.zIn ){ + const char *pLoc = 0; + int nLoc = 0; if( rc==SQLITE_OK ){ rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); } if( rc==SQLITE_OK ){ - rc = fts5ConfigureTokenizer(pApi, pFts, iCol); + rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc); } if( rc==SQLITE_OK ){ - rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); + rc = pApi->xTokenize_v2( + pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb + ); } if( ctx.bOpen ){ fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); @@ -484,6 +466,8 @@ static void fts5SnippetFunction( memset(&sFinder, 0, sizeof(Fts5SFinder)); for(i=0; ixColumnText(pFts, i, &sFinder.zDoc, &nDoc); if( rc!=SQLITE_OK ) break; - rc = fts5ConfigureTokenizer(pApi, pFts, i); + rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc); if( rc!=SQLITE_OK ) break; - rc = pApi->xTokenize(pFts, - sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb + rc = pApi->xTokenize_v2(pFts, + sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb ); if( rc!=SQLITE_OK ) break; rc = pApi->xColumnSize(pFts, i, &nDocsize); @@ -552,6 +536,9 @@ static void fts5SnippetFunction( rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); } if( ctx.zIn ){ + const char *pLoc = 0; /* Locale to tokenize in */ + int nLoc = 0; /* Bytes in pLoc */ + if( rc==SQLITE_OK ){ rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); } @@ -570,10 +557,12 @@ static void fts5SnippetFunction( } if( rc==SQLITE_OK ){ - rc = fts5ConfigureTokenizer(pApi, pFts, iBestCol); + rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc); } if( rc==SQLITE_OK ){ - rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); + rc = pApi->xTokenize_v2( + pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb + ); } if( ctx.bOpen ){ fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 0aa80093bc..9cc7b5cbf3 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -220,10 +220,6 @@ struct Fts5Cursor { int nInstAlloc; /* Size of aInst[] array (entries / 3) */ int nInstCount; /* Number of phrase instances */ int *aInst; /* 3 integers per phrase instance */ - - /* Values set by xTokenizeSetLocale() */ - const char *pLocale; - int nLocale; }; /* @@ -2128,29 +2124,33 @@ static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } -static int fts5ApiTokenize( +static int fts5ApiTokenize_v2( Fts5Context *pCtx, const char *pText, int nText, + const char *pLoc, int nLoc, void *pUserData, int (*xToken)(void*, int, const char*, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int rc = SQLITE_OK; - const char *pLocale = pCsr->pLocale; - if( pLocale ){ - rc = fts5SetLocale(pTab->pConfig, pLocale, pCsr->nLocale); - } - if( rc==SQLITE_OK ){ - rc = sqlite3Fts5Tokenize( - pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken - ); - } - if( pLocale ){ - sqlite3Fts5ClearLocale(pTab->pConfig); - } + + fts5SetLocale(pTab->pConfig, pLoc, nLoc); + rc = sqlite3Fts5Tokenize(pTab->pConfig, + FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken + ); + fts5SetLocale(pTab->pConfig, 0, 0); + return rc; } +static int fts5ApiTokenize( + Fts5Context *pCtx, + const char *pText, int nText, + void *pUserData, + int (*xToken)(void*, int, const char*, int, int, int) +){ + return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken); +} static int fts5ApiPhraseCount(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; @@ -2720,21 +2720,6 @@ static int fts5ApiColumnLocale( return rc; } -/* -** The xTokenizeSetLocale() API. -*/ -static int fts5ApiTokenizeSetLocale( - Fts5Context *pCtx, - const char *pLocale, - int nLocale -){ - int rc = SQLITE_OK; - Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; - pCsr->pLocale = pLocale; - pCsr->nLocale = nLocale; - return rc; -} - static const Fts5ExtensionApi sFts5Api = { 4, /* iVersion */ fts5ApiUserData, @@ -2759,7 +2744,7 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiQueryToken, fts5ApiInstToken, fts5ApiColumnLocale, - fts5ApiTokenizeSetLocale + fts5ApiTokenize_v2 }; /* @@ -2813,8 +2798,6 @@ static void fts5ApiInvoke( pCsr->pAux = pAux; pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); pCsr->pAux = 0; - pCsr->pLocale = 0; - pCsr->nLocale = 0; } static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 2fcc3815fd..60b3f0e9ef 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -825,11 +825,9 @@ struct F5tTokenizerModule { /* ** zLocale: -** Buffer zLocale contains the current locale, as configured by the most -** recent call to xSetLocale(). A NULL (default) locale is represented as -** a 0 byte string - "\0". -** -** This can be retrieved by a Tcl tokenize script using [sqlite3_fts5_locale]. +** Within a call to xTokenize_v2(), pLocale/nLocale store the locale +** passed to the call by fts5. This can be retrieved by a Tcl tokenize +** script using [sqlite3_fts5_locale]. */ struct F5tTokenizerInstance { Tcl_Interp *interp; @@ -837,27 +835,10 @@ struct F5tTokenizerInstance { F5tTokenizerModule *pModule; Fts5Tokenizer *pParent; F5tTokenizerContext *pContext; - char zLocale[128]; + const char *pLocale; + int nLocale; }; -static int f5tTokenizerSetLocale( - Fts5Tokenizer *pTokenizer, - const char *pLocale, - int nLocale -){ - F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)pTokenizer; - if( nLocale>=sizeof(pInst->zLocale) ){ - return SQLITE_ERROR; - } - - memset(pInst->zLocale, 0, sizeof(pInst->zLocale)); - if( nLocale>0 ){ - memcpy(pInst->zLocale, pLocale, nLocale); - } - - return SQLITE_OK; -} - static int f5tTokenizerCreate( void *pCtx, const char **azArg, @@ -1011,10 +992,8 @@ static int f5tTokenizerTokenize_v2( int rc = SQLITE_OK; F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; - memset(pInst->zLocale, 0, sizeof(pInst->zLocale)); - if( pLoc && nLoczLocale) ){ - memcpy(pInst->zLocale, pLoc, nLoc); - } + pInst->pLocale = pLoc; + pInst->nLocale = nLoc; if( pInst->pParent ){ CallbackCtx ctx; @@ -1036,7 +1015,8 @@ static int f5tTokenizerTokenize_v2( rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken); } - memset(pInst->zLocale, 0, sizeof(pInst->zLocale)); + pInst->pLocale = 0; + pInst->nLocale = 0; return rc; } static int f5tTokenizerTokenize( @@ -1072,7 +1052,9 @@ static int SQLITE_TCLAPI f5tTokenizerLocale( return TCL_ERROR; } - Tcl_SetObjResult(interp, Tcl_NewStringObj(p->pInst->zLocale, -1)); + Tcl_SetObjResult(interp, + Tcl_NewStringObj(p->pInst->pLocale, p->pInst->nLocale) + ); return TCL_OK; } diff --git a/manifest b/manifest index a8ab8d86ed..d3ebedd919 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\strunk\schanges\sinto\sthis\sbranch. -D 2024-08-10T20:03:01.148 +C Update\sthe\sauxiliary\sfunction\sAPI\sto\sinclude\sxTokenize_x2()\sinstead\sof\sxSetLocale(). +D 2024-08-12T11:13:56.109 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -92,17 +92,17 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7 F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e -F ext/fts5/fts5.h 1bdf3cde2b00e80f62925931471b6a00da38546eed6f2d9fe6a1d5999cbe3ba6 +F ext/fts5/fts5.h 7f1197009fc0e9822a8a584aa1f90591bdbf04f4503ecfe06949f3afe7a1fe06 F ext/fts5/fts5Int.h b40bb0bd54aaa4ac4712b6c5763b2167764614aaef204dbae81638b4548bca5d -F ext/fts5/fts5_aux.c 598c80fc0faabab91c833cdda99f8e36387bd907f4acb0480a19b612a4add93e +F ext/fts5/fts5_aux.c 0d0ee62dfebe93ccf6b293edb0b21ebe5c8bdc85e962a001745f2d13ea3e79d2 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 F ext/fts5/fts5_config.c 187f7ffa5eddd6539ffa592de85e95b18be951728491390121bb215549a24a2a F ext/fts5/fts5_expr.c ee1949c5c20901cbaca0885902f1d0c136679262dee71b457a34a92e1d16ddac F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe -F ext/fts5/fts5_main.c d6afe9ddaa995d52009d6a3a06b1f4d7481610d7d34b1af6f05bcd4756f7b99a +F ext/fts5/fts5_main.c cd61abbfd02f0f22e3c124ae2ad10c2a51cdf8acf38177410d44e134c1d1364b F ext/fts5/fts5_storage.c 5bf88213ff5911625c142ac332ddba10dcd0869e757f91f2a3d27f27ba595992 -F ext/fts5/fts5_tcl.c 7bd4bcd4557814df4fe8412edac34a309da83e90780cdf1d0b6bf5da18e127dd +F ext/fts5/fts5_tcl.c 50c7e16753fde0c4d80d8abd00a4ed2b0e998d5d3899a484510d01923c5da43b F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b F ext/fts5/fts5_tokenize.c 63ebe9057ed3f4dfc49944bc4aee3d3b745cc2faff73bc152ed3554ed3bf9cf4 @@ -2207,8 +2207,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 924d3467cebd71ac8dfef8225965d71eda82ef1f0b43508af5b7ea78e2099cf3 d4014c87ba9b011a6a04c2bf85879b668dc762ebcbbfb50a2f8a417ce594ef88 -R f5c4cc25ad182e75985989cfb0d3dcb3 +P b243007525a825b3daf8aa9bb2d3088efb853bd8b7c9ea3c0924fde193eb5c44 +R ca9e80b1fa88da85d78ec3898ab25337 U dan -Z c7a930832d2066471c172888c15e20f1 +Z 8ccf549b58c37ea212fa9c3ab517ca5f # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 2a2534e013..68e7d56fa7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b243007525a825b3daf8aa9bb2d3088efb853bd8b7c9ea3c0924fde193eb5c44 +f7d56a1f2149f0da117167db62e2c28ec337e8da3403873b64cdfc6a951e2e8e