** Of course, if bV2Native is false, then x1 contains the real routines and
** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule
** object should be passed to x2.xCreate.
+**
+** The synthesized wrapper routines are necessary for xFindTokenizer(_v2)
+** calls.
*/
struct Fts5TokenizerModule {
char *zName; /* Name of tokenizer */
void *pUserData; /* User pointer passed to xCreate() */
int bV2Native; /* True if v2 native tokenizer */
fts5_tokenizer x1; /* Tokenizer functions */
- fts5_tokenizer_v2 x2; /* Tokenizer functions */
+ fts5_tokenizer_v2 x2; /* V2 tokenizer functions */
void (*xDestroy)(void*); /* Destructor function */
Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
};
** If the cursor iterates in descending order of rowid, iFirstRowid
** is the upper limit (i.e. the "first" rowid visited) and iLastRowid
** the lower.
-**
-** pLocale, nLocale:
-** These are set by API method xTokenizeSetLocale(). xTokenizeSetLocale()
-** does not actually configure the tokenizer, it just stores the values
-** it is passed in these variables. The fts5_tokenizer_v2.xSetLocale()
-** method is called from within the xTokenize() API method if required.
*/
struct Fts5Cursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
#define BitFlagTest(x,y) (((x) & (y))!=0)
/*
-** The subtype values returned by fts5_locale() are tagged with.
+** The subtype value and header bytes used by fts5_locale().
*/
#define FTS5_LOCALE_SUBTYPE ((unsigned int)'L')
#define FTS5_LOCALE_HEADER "\x00\xE0\xB2\xEB"
}
/*
-** Configure the tokenizer to use the locale specified by nLocale byte
-** buffer zLocale. Return SQLITE_OK if successful, or an SQLite error
-** code otherwise.
+** Arrange for subsequent calls to sqlite3Fts5Tokenize() to use the locale
+** specified by pLocale/nLocale. The buffer indicated by pLocale must remain
+** valid until after the final call to sqlite3Fts5Tokenize() that will use
+** the locale.
*/
-static int fts5SetLocale(
+static void fts5SetLocale(
Fts5Config *pConfig,
const char *zLocale,
int nLocale
){
- int rc = SQLITE_OK;
Fts5TokenizerConfig *pT = &pConfig->t;
pT->pLocale = zLocale;
pT->nLocale = nLocale;
- return rc;
}
/*
-** Reset the locale of the tokenizer to its default.
+** Clear any locale configured by an earlier call to fts5SetLocale() or
+** sqlite3Fts5ExtractText().
*/
void sqlite3Fts5ClearLocale(Fts5Config *pConfig){
fts5SetLocale(pConfig, 0, 0);
** 1) Ordinary values. The text can be extracted from these using
** sqlite3_value_text().
**
-** 2) Blobs tagged with sub-type FTS5_LOCALE_SUBTYPE, or those read from
-** the content table of a normal content or external-conten table
-** with locale=1 set.
+** 2) Combination text/locale blobs created by fts5_locale(). There
+** are several cases for these:
+**
+** * Blobs tagged with FTS5_LOCALE_SUBTYPE.
+** * Blobs read from the content table of a locale=1 external-content
+** table, and
+** * Blobs read from the content table of a locale=1 regular
+** content table.
+**
+** The first two cases above should have the 4 byte FTS5_LOCALE_HEADER
+** header. It is an error if a blob with the subtype or a blob read
+** from the content table of an external content table does not have
+** the required header. A blob read from the content table of a regular
+** locale=1 table does not have the header. This is to save space.
**
** If successful, SQLITE_OK is returned and output parameters (*ppText)
** and (*pnText) are set to point to a buffer containing the extracted utf-8
** Parameter bContent must be true if the value was read from an indexed
** column (i.e. not UNINDEXED) of the on disk content.
**
-** If pbResetTokenizer is not NULL and if case (2) is used, then the
-** tokenizer is configured to use the locale. In this case (*pbResetTokenizer)
-** is set to true before returning, to indicate that the caller must
-** call sqlite3Fts5ClearLocale() to reset the tokenizer after tokenizing
-** the text.
+** If pbResetTokenizer is not NULL and if case (2) is used, then
+** fts5SetLocale() is called to ensure subsequent sqlite3Fts5Tokenize() calls
+** use the locale. In this case (*pbResetTokenizer) is set to true before
+** returning, to indicate that the caller must call sqlite3Fts5ClearLocale()
+** to clear the locale after tokenizing the text.
*/
int sqlite3Fts5ExtractText(
Fts5Config *pConfig,
nText = nBlob-nLocale-1;
if( pbResetTokenizer ){
- rc = fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
+ fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
*pbResetTokenizer = 1;
}
}
** the text of the expression, and sets output variable (*pzText) to
** point to a nul-terminated buffer containing the expression.
**
-** If pVal was an fts5_locale() value, then the tokenizer has been
-** configured to us the required locale.
+** If pVal was an fts5_locale() value, then fts5SetLocale() is called to
+** set the tokenizer to use the specified locale.
**
** If output variable (*pbFreeAndReset) is set to true, then the caller
** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer
** locale, and (b) call sqlite3_free() to free (*pzText).
*/
static int fts5ExtractExprText(
- Fts5FullTable *pTab,
- sqlite3_value *pVal,
- char **pzText,
- int *pbFreeAndReset
+ Fts5Config *pConfig, /* Fts5 configuration */
+ sqlite3_value *pVal, /* Value to extract expression text from */
+ char **pzText, /* OUT: nul-terminated buffer of text */
+ int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */
){
const char *zText = 0;
int nText = 0;
int bReset = 0;
*pbFreeAndReset = 0;
- rc = sqlite3Fts5ExtractText(pTab->p.pConfig, pVal, 0, &bReset, &zText,&nText);
+ rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, &bReset, &zText, &nText);
if( rc==SQLITE_OK ){
if( bReset ){
*pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, zText);
if( rc!=SQLITE_OK ){
- sqlite3Fts5ClearLocale(pTab->p.pConfig);
+ sqlite3Fts5ClearLocale(pConfig);
}else{
*pbFreeAndReset = 1;
}
int bFreeAndReset = 0;
int bInternal = 0;
- rc = fts5ExtractExprText(pTab, apVal[i], &zText, &bFreeAndReset);
+ rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset);
if( rc!=SQLITE_OK ) goto filter_out;
if( zText==0 ) zText = "";
return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}
+/*
+** Implementation of xTokenize_v2() API.
+*/
static int fts5ApiTokenize_v2(
Fts5Context *pCtx,
const char *pText, int nText,
return rc;
}
+
+/*
+** Implementation of xTokenize() API. This is just xTokenize_v2() with NULL/0
+** passed as the locale.
+*/
static int fts5ApiTokenize(
Fts5Context *pCtx,
const char *pText, int nText,
return rc;
}
+/*
+** This is called by various API functions - xInst, xPhraseFirst,
+** xPhraseFirstColumn etc. - to obtain the position list for phrase iPhrase
+** of the current row. This function works for both detail=full tables (in
+** which case the position-list was read from the fts index) or for other
+** detail= modes if the row content is available.
+*/
static int fts5CsrPoslist(
- Fts5Cursor *pCsr,
- int iPhrase,
- const u8 **pa,
- int *pn
+ Fts5Cursor *pCsr, /* Fts5 cursor object */
+ int iPhrase, /* Phrase to find position list for */
+ const u8 **pa, /* OUT: Pointer to position list buffer */
+ int *pn /* OUT: Size of (*pa) in bytes */
){
Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
int rc = SQLITE_OK;
*pn = 0;
}
-
return rc;
}
return pCsr;
}
+/*
+** Parameter zFmt is a printf() style formatting string. This function
+** formats it using the trailing arguments and returns the result as
+** an error message to the context passed as the first argument.
+*/
static void fts5ResultError(sqlite3_context *pCtx, const char *zFmt, ...){
char *zErr = 0;
va_list ap;
/*
** Value pVal was read from column iCol of the FTS5 table. This function
** returns it to the owner of pCtx via a call to an sqlite3_result_xxx()
-** function. This function deals with the same 3 cases as
+** function. This function deals with the same cases as
** sqlite3Fts5ExtractText():
**
** 1) Ordinary values. These can be returned using sqlite3_result_value().
**
-** 2) Blobs from fts5_locale().
+** 2) Blobs from fts5_locale(). The text is extracted from these and
+** returned via sqlite3_result_text(). The locale is discarded.
*/
static void fts5ExtractValueFromColumn(
sqlite3_context *pCtx,
return rc;
}
+/*
+** This function is used by xCreateTokenizer_v2() and xCreateTokenizer().
+** It allocates and partially populates a new Fts5TokenizerModule object.
+** The new object is already linked into the Fts5Global context before
+** returning.
+**
+** If successful, SQLITE_OK is returned and a pointer to the new
+** Fts5TokenizerModule object returned via output parameter (*ppNew). All
+** that is required is for the caller to fill in the methods in
+** Fts5TokenizerModule.x1 and x2, and to set Fts5TokenizerModule.bV2Native
+** as appropriate.
+**
+** If an error occurs, an SQLite error code is returned and the final value
+** of (*ppNew) undefined.
+*/
static int fts5NewTokenizerModule(
Fts5Global *pGlobal, /* Global context (one per db handle) */
const char *zName, /* Name of new function */
-C Update\sthe\sporter\stokenizer\sto\suse\slocales.
-D 2024-08-12T11:46:09.154
+C Fix\sfurther\sissues\sto\sdo\swith\sfts5\slocale\ssupport.
+D 2024-08-12T17:03:37.726
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
-F ext/fts5/fts5.h 7f1197009fc0e9822a8a584aa1f90591bdbf04f4503ecfe06949f3afe7a1fe06
-F ext/fts5/fts5Int.h b40bb0bd54aaa4ac4712b6c5763b2167764614aaef204dbae81638b4548bca5d
-F ext/fts5/fts5_aux.c 0d0ee62dfebe93ccf6b293edb0b21ebe5c8bdc85e962a001745f2d13ea3e79d2
+F ext/fts5/fts5.h 4c6998c6186268b4dbe9baef2c0d2ab974bd90996d61d4dbe801367249be6de4
+F ext/fts5/fts5Int.h 776b21159eef8d30379e5bc4627eae9e841d36e43f19dc8908c786e62aaf9e38
+F ext/fts5/fts5_aux.c 12cd2512f869217c38b70c31de5b5f741812734fafa80f55b32ea9bbd96e2152
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
F ext/fts5/fts5_config.c 187f7ffa5eddd6539ffa592de85e95b18be951728491390121bb215549a24a2a
-F ext/fts5/fts5_expr.c ee1949c5c20901cbaca0885902f1d0c136679262dee71b457a34a92e1d16ddac
+F ext/fts5/fts5_expr.c 3a24c6ab5b7545312a5ec03085ae705ede820a08f9a63f1d72829ed4a35da6f6
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
-F ext/fts5/fts5_main.c cd61abbfd02f0f22e3c124ae2ad10c2a51cdf8acf38177410d44e134c1d1364b
+F ext/fts5/fts5_main.c 4fe8349b812a9fde8e44ac5568f19d713ccc4790eb3ecb692f6551729c481b2b
F ext/fts5/fts5_storage.c 5bf88213ff5911625c142ac332ddba10dcd0869e757f91f2a3d27f27ba595992
F ext/fts5/fts5_tcl.c 50c7e16753fde0c4d80d8abd00a4ed2b0e998d5d3899a484510d01923c5da43b
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P f7d56a1f2149f0da117167db62e2c28ec337e8da3403873b64cdfc6a951e2e8e
-R 7151af5ed6816182b47b60322cc8dcba
+P 3291ce3a3359a80e51e4546a3d7a187cbe4c7530fca6632f0bb2728525efe212
+R 5bdde041363e74c68796cadade4d8480
U dan
-Z a17240af0068f64d6da9a8176108962c
+Z 098e7ed7a851f6658bf54618988e8ebd
# Remove this line to create a well-formed Fossil manifest.