**
** This API can be quite slow if used with an FTS5 table created with the
** "detail=none" or "detail=column" option.
+**
+** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale)
+** If parameter iCol is less than zero, or greater than or equal to the
+** number of columns in the table, SQLITE_RANGE is returned.
+**
+** Otherwise, this function attempts to retrieve the locale associated
+** with column iCol of the current row. Usually, there is no associated
+** locale, and output parameters (*pzLocale) and (*pnLocale) are set
+** to NULL and 0, respectively. However, if the fts5_locale() function
+** was used to associated a locale with the value when it was inserted
+** into the fts5 table, then (*pzLocale) is set to point to a buffer
+** containing the name of the locale in utf-8 encoding. (*pnLocale) is
+** set to the size in bytes of the buffer.
+**
+** If successful, SQLITE_OK is returned. Or, if an error occurs, an
+** SQLite error code is returned. The final value of the output parameters
+** is undefined in this case.
+**
+** xTokenizeSetLocale(pFts5, pLocale, nLocale)
+** This API function is used to invoke the fts5_tokenizer_v2.xSetLocale()
+** method of the fts5 table's tokenizer, in the same way as xTokenize() is
+** used to invoke fts5_tokenizer_v2.xTokenize().
+**
+** Parameters pLocale and nLocale may both be 0, in which case the tokenizer
+** is configured to use its default locale. Otherwise, pLocale should point
+** to a buffer containing the name of the locale to use encoded as utf-8.
+** It does not have to be nul-terminated. nLocale must be passed the size
+** of the text in bytes. The buffer indicated by pLocale must remain valid
+** for the duration of any calls made to xTokenize() by the auxiliary
+** function call up until the next invocation of xTokenizeSetLocale(), if
+** any.
+**
+** SQLITE_OK is returned on success, or an SQLite error code otherwise.
*/
struct Fts5ExtensionApi {
- int iVersion; /* Currently always set to 3 */
+ int iVersion; /* Currently always set to 4 */
void *(*xUserData)(Fts5Context*);
const char **ppToken, int *pnToken
);
int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*);
+
+ /* Below this point are iVersion>=4 only */
+ int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn);
+
+ int (*xTokenizeSetLocale)(Fts5Context*, const char *z, int n);
};
/*
Fts5Auxiliary *pAux; /* Currently executing extension function */
Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
- /* Cache used by auxiliary functions xInst() and xInstCount() */
+ /* Cache used by auxiliary API functions xInst() and xInstCount() */
Fts5PoslistReader *aInstIter; /* One for each phrase */
int nInstAlloc; /* Size of aInst[] array (entries / 3) */
int nInstCount; /* Number of phrase instances */
int *aInst; /* 3 integers per phrase instance */
+
+ /* Values set by xTokenizeSetLocale() */
+ const char *pLocale;
+ int nLocale;
};
/*
){
const char *pText = 0;
int nText = 0;
- int bResetTokenizer = 0;
int rc = SQLITE_OK;
-
int bDecodeBlob = 0;
+
+ assert( pbResetTokenizer==0 || *pbResetTokenizer==0 );
+
if( sqlite3_value_type(pVal)==SQLITE_BLOB ){
- if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
- || (bContent && pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL)
- ){
- bDecodeBlob = 1;
- }
+ if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
+ || (bContent && pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL)
+ ){
+ bDecodeBlob = 1;
+ }
}
if( bDecodeBlob ){
pText = (const char*)&pBlob[nLocale+1];
nText = nBlob-nLocale-1;
- rc = fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
- bResetTokenizer = 1;
+ if( pbResetTokenizer ){
+ rc = fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
+ *pbResetTokenizer = 1;
+ }
}else{
pText = (const char*)sqlite3_value_text(pVal);
*ppText = pText;
*pnText = nText;
- *pbResetTokenizer = bResetTokenizer;
return rc;
}
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
- return sqlite3Fts5Tokenize(
- pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
- );
+ int rc = SQLITE_OK;
+ const char *pLocale = pCsr->pLocale;
+ if( pLocale ){
+ rc = fts5SetLocale(pTab->pConfig, pLocale, pCsr->nLocale);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(
+ pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
+ );
+ }
+ if( pLocale ){
+ sqlite3Fts5ClearLocale(pTab->pConfig);
+ }
+ return rc;
}
static int fts5ApiPhraseCount(Fts5Context *pCtx){
}else{
rc = fts5SeekCursor(pCsr, 0);
if( rc==SQLITE_OK ){
- *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1);
- *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
+ Fts5Config *pConfig = pTab->pConfig;
+ int bContent = (
+ pConfig->bLocale && pConfig->abUnindexed[iCol]==0 &&
+ pConfig->eContent==FTS5_CONTENT_NORMAL
+ );
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1);
+ sqlite3Fts5ExtractText(pConfig, bContent, pVal, 0, pz, pn);
}
}
return rc;
}
}else{
int i;
+ rc = fts5SeekCursor(pCsr, 0);
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
if( pConfig->abUnindexed[i]==0 ){
- const char *z; int n;
- void *p = (void*)(&pCsr->aColumnSize[i]);
+ const int bContent = (pConfig->eContent==FTS5_CONTENT_NORMAL);
+ const char *z = 0;
+ int n = 0;
+ int bReset = 0;
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1);
+
pCsr->aColumnSize[i] = 0;
- rc = fts5ApiColumnText(pCtx, i, &z, &n);
+ rc = sqlite3Fts5ExtractText(pConfig, bContent, pVal, &bReset, &z, &n);
if( rc==SQLITE_OK ){
- rc = sqlite3Fts5Tokenize(
- pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
+ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX,
+ z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb
);
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
}
}
}
int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
);
+static int fts5ApiColumnLocale(
+ Fts5Context *pCtx,
+ int iCol,
+ const char **pzLocale,
+ int *pnLocale
+){
+ int rc = SQLITE_OK;
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
+
+ *pzLocale = 0;
+ *pnLocale = 0;
+
+ if( iCol<0 || iCol>=pConfig->nCol ){
+ rc = SQLITE_RANGE;
+ }else{
+ int bNormal = (pConfig->eContent==FTS5_CONTENT_NORMAL);
+ if( pConfig->abUnindexed[iCol]==0
+ && pCsr->ePlan!=FTS5_PLAN_SPECIAL
+ && pConfig->eContent!=FTS5_CONTENT_NONE
+ && (bNormal==0 || pConfig->bLocale)
+ ){
+ rc = fts5SeekCursor(pCsr, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1);
+ if( sqlite3_value_type(pVal)==SQLITE_BLOB
+ && (bNormal || sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE)
+ ){
+ const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+ int nLocale = 0;
+ for(nLocale=0; nLocale<nBlob && pBlob[nLocale]!=0x00; nLocale++);
+ if( nLocale!=0 && nLocale!=nBlob ){
+ *pzLocale = (const char*)pBlob;
+ *pnLocale = nLocale;
+ }
+ }
+ }
+ }
+ }
+
+ return rc;
+}
+
+static int fts5ApiTokenizeSetLocale(
+ Fts5Context *pCtx,
+ const char *pLocale,
+ int nLocale
+){
+ int rc = SQLITE_OK;
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ pCsr->pLocale = pLocale;
+ pCsr->nLocale = nLocale;
+ return rc;
+}
+
static const Fts5ExtensionApi sFts5Api = {
- 3, /* iVersion */
+ 4, /* iVersion */
fts5ApiUserData,
fts5ApiColumnCount,
fts5ApiRowCount,
fts5ApiPhraseFirstColumn,
fts5ApiPhraseNextColumn,
fts5ApiQueryToken,
- fts5ApiInstToken
+ fts5ApiInstToken,
+ fts5ApiColumnLocale,
+ fts5ApiTokenizeSetLocale
};
/*
pCsr->pAux = pAux;
pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
pCsr->pAux = 0;
+ pCsr->pLocale = 0;
+ pCsr->nLocale = 0;
}
static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
sqlite3_value *pVal = apVal[i];
if( sqlite3_value_nochange(pVal) && p->pSavedRow ){
pVal = sqlite3_column_value(p->pSavedRow, i-1);
+ }else if( i>1 && pConfig->abUnindexed[i-2]
+ && pConfig->bLocale
+ && sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
+ ){
+ /* At attempt to insert an fts5_locale() value into an UNINDEXED
+ ** column. Strip the locale away and just bind the text. */
+ const char *pText = 0;
+ int nText = 0;
+ rc = sqlite3Fts5ExtractText(pConfig, 0, pVal, 0, &pText, &nText);
+ sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT);
+ continue;
}
+
rc = sqlite3_bind_value(pInsert, i, pVal);
}
if( rc==SQLITE_OK ){
{ "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */
{ "xInstToken", 2, "IDX ITERM" }, /* 19 */
+ { "xColumnLocale", 1, "COL" }, /* 20 */
{ 0, 0, 0}
};
break;
}
+ CASE(20, "xColumnLocale") {
+ const char *z = 0;
+ int n = 0;
+ int iCol;
+ if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){
+ return TCL_ERROR;
+ }
+ rc = p->pApi->xColumnLocale(p->pFts, iCol, &z, &n);
+ if( rc==SQLITE_OK && z ){
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n));
+ }
+ break;
+ }
+
default:
assert( 0 );
break;
proc fts5_columntext {cmd iCol} {
$cmd xColumnText $iCol
}
+proc fts5_columnlocale {cmd iCol} {
+ $cmd xColumnLocale $iCol
+}
proc fts5_test_columntext {cmd} {
set res [list]
set res
}
+proc fts5_test_columnlocale {cmd} {
+ set res [list]
+ for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
+ lappend res [$cmd xColumnLocale $i]
+ }
+ set res
+}
+
proc fts5_test_columntotalsize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
foreach f {
fts5_test_columnsize
fts5_test_columntext
+ fts5_test_columnlocale
fts5_test_columntotalsize
fts5_test_poslist
fts5_test_poslist2
fts5_test_queryphrase
fts5_test_phrasecount
fts5_columntext
+ fts5_columnlocale
fts5_queryphrase
fts5_collist
} {
proc tcl_create {args} { return "tcl_tokenize" }
proc tcl_tokenize {tflags text} {
+ set iToken 1
+ set bSkip 0
+ if {[sqlite3_fts5_locale]=="second"} { set bSkip 1 }
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+ incr iToken
+ if {(($iToken) % ($bSkip + 1))} continue
+
set w [transform_token [sqlite3_fts5_locale] $w]
sqlite3_fts5_token $w $iStart $iEnd
}
4 jkl hello
}
+fts5_aux_test_functions db
+
+do_execsql_test 4.8.1 {
+ SELECT fts5_test_columntext(d1) FROM d1('jkl')
+} {{jkl hello}}
+do_execsql_test 4.8.2 {
+ SELECT fts5_test_columntext(d1) FROM d1(fts5_locale('reverse', 'hello'))
+} {{jkl hello}}
+
+do_execsql_test 4.9 {
+ SELECT fts5_test_columnlocale(d1) FROM d1(fts5_locale('reverse', 'hello'))
+} {{{} reverse}}
+
+do_execsql_test 4.10 {
+ SELECT fts5_test_columnlocale(d1) FROM d1
+} {
+ {{} {}}
+ {{} reverse}
+}
+
+#-------------------------------------------------------------------------
+# Test that if an fts5_locale() value is written to an UNINDEXED
+# column it is stored as text. This is so that blobs and other values
+# can also be stored as is.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 5.1 {
+ CREATE VIRTUAL TABLE t1 USING fts5(
+ x, y UNINDEXED, locale=1, tokenize=tcl
+ );
+
+ INSERT INTO t1(rowid, x, y) VALUES(111,
+ fts5_locale('reverse', 'one two three'),
+ fts5_locale('reverse', 'four five six')
+ );
+}
+
+do_execsql_test 5.2 {
+ SELECT rowid, x, y FROM t1
+} {
+ 111 {one two three} {four five six}
+}
+
+do_execsql_test 5.3 {
+ SELECT typeof(c0), typeof(c1) FROM t1_content
+} {
+ blob text
+}
+
+#-------------------------------------------------------------------------
+
+foreach {tn opt} {
+ 1 {}
+ 2 {, columnsize=0}
+} {
+ reset_db
+ sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+ do_execsql_test 6.$tn.1 "
+ CREATE VIRTUAL TABLE y1 USING fts5(t, locale=1, tokenize=tcl $opt);
+ "
+
+ do_execsql_test 6.$tn.2 {
+ INSERT INTO y1(rowid, t) VALUES
+ (1, fts5_locale('second', 'the city of London')),
+ (2, fts5_locale('second', 'shall have all the old')),
+ (3, fts5_locale('second', 'Liberties and Customs')),
+ (4, fts5_locale('second', 'which it hath been used to have'));
+ }
+
+ fts5_aux_test_functions db
+
+ do_execsql_test 5.$tn.3 {
+ SELECT fts5_test_columnsize(y1) FROM y1
+ } {
+ 2 3 2 4
+ }
+}
+
+
finish_test
-C Fix\sa\scouple\sof\sasan\sproblems\son\sthis\sbranch.
-D 2024-07-29T15:00:11.833
+C Fix\sxColumnText()\sand\sxColumnSize()\sAPIs.\sAdd\sxColumnLocale()\sand\sxTokenizeSetLocale().
+D 2024-07-29T20:31:17.644
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
-F ext/fts5/fts5.h 38a9553791828b3cf677b9347735fc531d54015ce4f5229d5cf1e2a5c1d3955a
+F ext/fts5/fts5.h 7cd47e5dfe68d798e667caa76722374f0c909f2db05bb4d42b1ec5300d18e658
F ext/fts5/fts5Int.h 330b1e2dad9ea9cccc9fa93817062fa21e89f00e7eac9a84be440f7e93bf7c3c
F ext/fts5/fts5_aux.c 4584e88878e54828bf7d4d0d83deedd232ec60628b7731be02bad6adb62304b1
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
F ext/fts5/fts5_expr.c c7336d5f9ecc0e2b014d700be2bec0ea383b0e82c494a7c5c4ac622327c2bfad
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
-F ext/fts5/fts5_main.c 71ea9fe3aba423ad325a7ac600b83bb79e2a640f62094f6df356f978245c78db
-F ext/fts5/fts5_storage.c 6beefaa9229193e85fe9b920aa62f46a79cac867feb8a5af8ce6df28dec90e8d
-F ext/fts5/fts5_tcl.c e22e5791076fdfcbcd2dd5e45aff9d3615f7840bae5d984088ce2c24cfdf77b4
+F ext/fts5/fts5_main.c ee4bba42d8d093cd870c55a342ab85a647ec39dc79985e2b1084514d990de82d
+F ext/fts5/fts5_storage.c 2118a5bd2ae617367259f6cf2412b6ff26bc1cd1cc29bf64a6249edcc1572f19
+F ext/fts5/fts5_tcl.c 93b705cb87633574983161edc5234f9b91ba03f9fecfbd2c5d401a1da6f93aa5
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c fa5493075101540270f572038fc1723d44fcc97bfbf237c8530013b8a27860be
F ext/fts5/fts5_vocab.c e4830b00809e5da53bc10f93adc59e321407b0f801c7f4167c0e47f5552267e0
F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
-F ext/fts5/test/fts5_common.tcl bc33c6cc65e5d390f28a68aeeb3a526dadd2c3a947d2466ee1986c1a4002df56
+F ext/fts5/test/fts5_common.tcl 48c0855d235a2f141e5519d3170bad7cddcc53ad3a16ac8ef952ba61048c2958
F ext/fts5/test/fts5aa.test 4db81519863244a3cab35795fe65ab6b592e7970c7409eba098b23ebbfc08d95
F ext/fts5/test/fts5ab.test 4bdb619fee409e11417e8827e320b857e42e926a01a0408fc9f143ec870a6ced
F ext/fts5/test/fts5ac.test 4a73626de86f3d17c95738034880c4f0de8d54741fb943d819b528373657e59b
F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1
F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad
F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c
-F ext/fts5/test/fts5locale.test 60217c6f67331e2b3218b4da3f96f54b856a341b23e7328c9f30efe9ca343130
+F ext/fts5/test/fts5locale.test 14e46de0fbab3a01e16d8f61143260a7865cbe9d1759a564927635a1b08e9310
F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3
F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082
F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 69205264debd829573b1c777a5a493cfeb6083c4cdec106b1f819989f859ac75
-R c565bdc34845dfb3802fedc964dd8edd
+P 4e3e529313c86b42d5f1ba113d0714d1545bc6641e09e4537d622057f0352989
+R 76149d45da5064496ccc1cbc5f33d6bf
U dan
-Z 92c34b492aef5c04b9af8f99ed4665b1
+Z 4addb1a2e7bbe8797df686b1934d0dc8
# Remove this line to create a well-formed Fossil manifest.
-4e3e529313c86b42d5f1ba113d0714d1545bc6641e09e4537d622057f0352989
+14006711d83d098e665c540b978b0e29aa8f458da1c2c8e9c2baf2ad5ebd502c