** with column iCol of the current row. Usually, there is no associated
** locale, and output parameters (*pzLocale) and (*pnLocale) are set
** to NULL and 0, respectively. However, if the fts5_locale() function
-** was used to associated a locale with the value when it was inserted
-** into the fts5 table, then (*pzLocale) is set to point to a buffer
-** containing the name of the locale in utf-8 encoding. (*pnLocale) is
-** set to the size in bytes of the buffer.
+** was used to associate a locale with the value when it was inserted
+** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated
+** buffer containing the name of the locale in utf-8 encoding. (*pnLocale)
+** is set to the size in bytes of the buffer, not including the
+** nul-terminator.
**
** If successful, SQLITE_OK is returned. Or, if an error occurs, an
** SQLite error code is returned. The final value of the output parameters
**
** Parameters pLocale and nLocale may both be 0, in which case the tokenizer
** is configured to use its default locale. Otherwise, pLocale should point
-** to a buffer containing the name of the locale to use encoded as utf-8.
+** to a buffer containing the utf-8 encoded name of the locale to use.
** It does not have to be nul-terminated. nLocale must be passed the size
** of the text in bytes. The buffer indicated by pLocale must remain valid
** for the duration of any calls made to xTokenize() by the auxiliary
/* Below this point are iVersion>=4 only */
int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn);
-
- int (*xTokenizeSetLocale)(Fts5Context*, const char *z, int n);
+ int (*xTokenizeSetLocale)(Fts5Context*, const char *p, int n);
};
/*
**
** Applications may also register custom tokenizer types. A tokenizer
** is registered by providing fts5 with a populated instance of the
-** following structure. All structure methods must be defined, setting
-** any member of the fts5_tokenizer struct to NULL leads to undefined
-** behaviour. The structure methods are expected to function as follows:
+** following structure. Of the three structure methods, xCreate, xDelete and
+** xTokenize must be supplied, any fo these three members of the
+** fts5_tokenizer_v2 struct to NULL leads to undefined behaviour. The
+** structure methods are expected to function as follows:
**
** xCreate:
** This function is used to allocate and initialize a tokenizer instance.
** A tokenizer instance is required to actually tokenize text.
**
** The first argument passed to this function is a copy of the (void*)
-** pointer provided by the application when the fts5_tokenizer object
+** pointer provided by the application when the fts5_tokenizer_v2 object
** was registered with FTS5 (the third argument to xCreateTokenizer()).
** The second and third arguments are an array of nul-terminated strings
** containing the tokenizer arguments, if any, specified following the
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
+** xSetLocale:
+** This function is invoked by FTS5 to configure the locale to use for
+** subsequent calls to xTokenize. The second argument is a pointer to
+** a nul-terminated buffer containing the utf-8 encoded name of the locale
+** to use. The third argument is the size of the buffer in bytes, not
+** including the nul-terminator character. This function may also be
+** invoked with the second and third parameters set to 0 - instructing
+** the tokenizer to use its default locale.
+**
+** FTS5 guarantees that any buffer passed to xSetLocale() will remain
+** valid until either the next call to xSetLocale() or xDelete() on the
+** same tokenizer object.
+**
+** This function should return SQLITE_OK if successful, or an SQLite
+** error code if an error occurs. If an error does occur and an error
+** code is returned, execution of the current statement is abandoned
+** and FTS5 returns the error code to the caller.
+**
+** Often, this function is not required and is never invoked. It is only
+** ever invoked when processing a value that has had a locale associated
+** with it using SQL function fts5_locale().
+**
+** It is not necessary to supply an implementation of this method when
+** registering a tokenizer. If fts5_tokenizer_v2.xSetLocale is set to NULL,
+** then no attempt is made to pass locale information through to the
+** tokenizer.
+**
** SYNONYM SUPPORT
**
** Custom tokenizers may also support synonyms. Consider a case in which a
** inefficient.
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
-typedef struct fts5_tokenizer fts5_tokenizer;
-struct fts5_tokenizer {
+typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2;
+struct fts5_tokenizer_v2 {
+ int iVersion; /* Currently always 2 */
+
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
void (*xDelete)(Fts5Tokenizer*);
int (*xTokenize)(Fts5Tokenizer*,
int iEnd /* Byte offset of end of token within input text */
)
);
-};
-typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2;
-struct fts5_tokenizer_v2 {
- int iVersion; /* Currently always 2 */
+ int (*xSetLocale)(Fts5Tokenizer*, const char *pLocale, int nLocale);
+};
+/*
+** New code should use the fts5_tokenizer_v2 type to define tokenizer
+** implementations. The following type is included for legacy applications
+** that still use it.
+*/
+typedef struct fts5_tokenizer fts5_tokenizer;
+struct fts5_tokenizer {
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
void (*xDelete)(Fts5Tokenizer*);
int (*xTokenize)(Fts5Tokenizer*,
int iEnd /* Byte offset of end of token within input text */
)
);
-
- int (*xSetLocale)(Fts5Tokenizer*, const char *pLocale, int nLocale);
};
+
/* Flags that may be passed as the third argument to xTokenize() */
#define FTS5_TOKENIZE_QUERY 0x0001
#define FTS5_TOKENIZE_PREFIX 0x0002
-C Merge\slatest\schanges\sfrom\strunk\sinto\sthis\sbranch.
-D 2024-07-31T10:52:38.878
+C Add\smissing\sdocumentation\sfor\snew\sfeatures\sto\sfts5.h.
+D 2024-07-31T15:46:41.319
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
-F ext/fts5/fts5.h 7cd47e5dfe68d798e667caa76722374f0c909f2db05bb4d42b1ec5300d18e658
+F ext/fts5/fts5.h f5451da088d0004c4be7314e2fdb41fda16ce682ce2aa3b54b9474ebe9a013d5
F ext/fts5/fts5Int.h 833a2fe729f926ebcde47e21e495d141b99ede9a188fc577873f24bea0f0bfa2
F ext/fts5/fts5_aux.c 652f839dc0c77431295f10b08f268631560bb5630e65fd701de7a58744428a82
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
F ext/wasm/SQLTester/index.html 3f8a016df0776be76605abf20e815ecaafbe055abac0e1fe5ea080e7846b760d
F ext/wasm/SQLTester/touint8array.c 2d5ece04ec1393a6a60c4bf96385bda5e1a10ad49f3038b96460fc5e5aa7e536
F ext/wasm/api/EXPORTED_FUNCTIONS.sqlite3-auth 7ac80cc3b6a6d52e041bb295e85555ce797be78c15ef2008a64ae58815014080
-F ext/wasm/api/EXPORTED_FUNCTIONS.sqlite3-core 400213eb52a7e5ad5f448053d375cacf4dac2cf45d134f3edfe485ae4a49a183 w ext/wasm/api/EXPORTED_FUNCTIONS.sqlite3-api
+F ext/wasm/api/EXPORTED_FUNCTIONS.sqlite3-core 400213eb52a7e5ad5f448053d375cacf4dac2cf45d134f3edfe485ae4a49a183
F ext/wasm/api/EXPORTED_FUNCTIONS.sqlite3-preupdate d1d62a2212099f2c0782d730beb8cb84a7a52d99c15ead2cb9b1411fff5fd6b1
F ext/wasm/api/EXPORTED_FUNCTIONS.sqlite3-see fb29e62082a658f0d81102488414d422c393c4b20cc2f685b216bc566237957b
F ext/wasm/api/EXPORTED_FUNCTIONS.sqlite3-session 213b6c04267cb9bd760172db011eb1650732805fb3d01f9395478a8ceec18eb0
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P ae435aff1785d5832821c19dc88ccf6c496a7ff55be80276b31e5c9abee723db 86de4e755e37dc1cbcbd59018927aa87ff49fc15f706a36187631d8f14075c12
-R 854c24bb1cc78c16c421b7985fead9f3
+P ff64939ec8399949393f1029fa6d514892fbf2bf1498545300cc5e120b9622a5
+R f7dbcf0c98ed562d42b896f7c143383b
U dan
-Z 7ee036ea576e2778dfe1b42f432458d4
+Z 344ab71a9372cfa91d638c7c0c606714
# Remove this line to create a well-formed Fossil manifest.