char *zContentExprlist;
Fts5TokenizerConfig t;
int bLock; /* True when table is preparing statement */
+ int eEnc; /* An FTS5_ENCODING_XXX constant */
+
/* Values loaded from the %_config table */
int iVersion; /* fts5 file format 'version' */
Fts5Global *pGlobal; /* Global (connection wide) data */
Fts5Cursor *pSortCsr; /* Sort data from this cursor */
int iSavepoint; /* Successful xSavepoint()+1 */
-
+
#ifdef SQLITE_DEBUG
struct Fts5TransactionState ts;
#endif
};
+#define FTS5_ENCODING_UNKNOWN 0
+#define FTS5_ENCODING_UTF8 1
+#define FTS5_ENCODING_UTF16 2
+
struct Fts5MatchPhrase {
Fts5Buffer *pPoslist; /* Pointer to current poslist */
int nTerm; /* Size of phrase in terms */
fts5SetLocale(pConfig, 0, 0);
}
+static int fts5IsUtf16(Fts5Config *pConfig, int *pbIs){
+ if( pConfig->eEnc==FTS5_ENCODING_UNKNOWN ){
+ sqlite3_stmt *pPragma = 0;
+ int rc = fts5PrepareStatement(&pPragma, pConfig,
+ "SELECT (encoding LIKE '%%16%%') FROM pragma_encoding"
+ );
+ if( rc==SQLITE_OK ){
+ int val;
+ sqlite3_step(pPragma);
+ val = sqlite3_column_int(pPragma, 0);
+ rc = sqlite3_finalize(pPragma);
+ if( rc!=SQLITE_OK ) return rc;
+ if( val ){
+ pConfig->eEnc = FTS5_ENCODING_UTF16;
+ }else{
+ pConfig->eEnc = FTS5_ENCODING_UTF8;
+ }
+ }
+ }
+
+ *pbIs = (pConfig->eEnc==FTS5_ENCODING_UTF16);
+ return SQLITE_OK;
+}
+
int sqlite3Fts5ExtractText(
Fts5Config *pConfig,
int bContent,
int nBlob = sqlite3_value_bytes(pVal);
int nLocale = 0;
- for(nLocale=0; nLocale<nBlob; nLocale++){
- if( pBlob[nLocale]==0x00 ) break;
- }
+ if( nBlob>=4 && memcmp(pBlob, "\0\0\0\0", 4)==0 ){
+ int bIs16 = 0;
+ pText = (const char*)sqlite3_value_text(pVal);
+ nText = sqlite3_value_bytes(pVal);
+ rc = fts5IsUtf16(pConfig, &bIs16);
+
+ if( bIs16 ){
+ pText += 2;
+ nText -= 2;
+ }else{
+ pText += 4;
+ nText -= 4;
+ }
+
+ }else{
+ for(nLocale=0; nLocale<nBlob; nLocale++){
+ if( pBlob[nLocale]==0x00 ) break;
+ }
- if( nLocale==nBlob ) return SQLITE_ERROR;
- pText = (const char*)&pBlob[nLocale+1];
- nText = nBlob-nLocale-1;
+ if( nLocale==nBlob || nLocale==0 ) return SQLITE_ERROR;
+ pText = (const char*)&pBlob[nLocale+1];
+ nText = nBlob-nLocale-1;
- if( pbResetTokenizer ){
- rc = fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
- *pbResetTokenizer = 1;
+ if( pbResetTokenizer ){
+ rc = fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
+ *pbResetTokenizer = 1;
+ }
}
}else{
zText = (const char*)sqlite3_value_text(apArg[1]);
nText = sqlite3_value_bytes(apArg[1]);
- nBlob = nLocale + 1 + nText;
- pBlob = (u8*)sqlite3_malloc(nBlob);
- if( pBlob==0 ){
- sqlite3_result_error_nomem(pCtx);
- return;
- }
+ if( zLocale==0 || zLocale[0]=='\0' ){
+ sqlite3_result_text(pCtx, zText, nText, SQLITE_TRANSIENT);
+ }else{
+ nBlob = nLocale + 1 + nText;
+ pBlob = (u8*)sqlite3_malloc(nBlob);
+ if( pBlob==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ return;
+ }
- if( zLocale ) memcpy(pBlob, zLocale, nLocale);
- pBlob[nLocale] = 0x00;
- if( zText ) memcpy(&pBlob[nLocale+1], zText, nText);
+ memcpy(pBlob, zLocale, nLocale);
+ pBlob[nLocale] = 0x00;
+ if( zText ) memcpy(&pBlob[nLocale+1], zText, nText);
- sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free);
- sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE);
+ sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free);
+ sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE);
+ }
}
/*
sqlite3_value *pVal = apVal[i];
if( sqlite3_value_nochange(pVal) && p->pSavedRow ){
pVal = sqlite3_column_value(p->pSavedRow, i-1);
- }else if( i>1 && pConfig->abUnindexed[i-2]
- && pConfig->bLocale
- && sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
- ){
- /* At attempt to insert an fts5_locale() value into an UNINDEXED
- ** column. Strip the locale away and just bind the text. */
- const char *pText = 0;
- int nText = 0;
- rc = sqlite3Fts5ExtractText(pConfig, 0, pVal, 0, &pText, &nText);
- sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT);
+ }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){
+ if( pConfig->bLocale==0 ){
+ sqlite3Fts5ConfigErrmsg(pConfig,
+ "fts5_locale() may not be used without locale=1"
+ );
+ rc = SQLITE_ERROR;
+ break;
+ }else if( i>1 && pConfig->abUnindexed[i-2] ){
+ /* At attempt to insert an fts5_locale() value into an UNINDEXED
+ ** column. Strip the locale away and just bind the text. */
+ const char *pText = 0;
+ int nText = 0;
+ rc = sqlite3Fts5ExtractText(pConfig, 0, pVal, 0, &pText, &nText);
+ sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT);
+ continue;
+ }
+ }else if( pConfig->bLocale && sqlite3_value_type(pVal)==SQLITE_BLOB ){
+ /* Inserting a blob into a normal content table with locale=1. */
+ int n = sqlite3_value_bytes(pVal);
+ u8 *pBlob = sqlite3Fts5MallocZero(&rc, n+4);
+ if( pBlob ){
+ memcpy(&pBlob[4], sqlite3_value_blob(pVal), n);
+ rc = sqlite3_bind_blob(pInsert, i, pBlob, n+4, SQLITE_TRANSIENT);
+ sqlite3_free(pBlob);
+ }
continue;
}
--- /dev/null
+# 2024 July 30
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5blob
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+foreach {tn enc locale} {
+ 1 utf8 0
+ 2 utf8 1
+ 3 utf16 0
+ 4 utf16 1
+} {
+ reset_db
+ fts5_aux_test_functions db
+
+ execsql "PRAGMA encoding = $enc"
+
+ if {$tn==3 || $tn==4} breakpoint
+
+ execsql "
+ CREATE VIRTUAL TABLE t1 USING fts5(x, y, locale=$locale);
+ "
+ do_execsql_test 1.$tn.0 {
+ CREATE VIRTUAL TABLE tt USING fts5vocab('t1', 'instance');
+ INSERT INTO t1(rowid, x, y) VALUES(1, 555, X'0000000041424320444546');
+ INSERT INTO t1(rowid, x, y) VALUES(2, 666, X'41424300444546');
+ INSERT INTO t1(rowid, x, y) VALUES(3, 777, 'xyz');
+ }
+
+ do_execsql_test 1.$tn.1 {
+ SELECT rowid, quote(x), quote(y) FROM t1
+ } {
+ 1 555 X'0000000041424320444546'
+ 2 666 X'41424300444546'
+ 3 777 'xyz'
+ }
+
+ set T($enc,$locale) [execsql { SELECT * FROM tt }]
+ set U($enc,$locale) [execsql { SELECT fts5_test_columntext(t1) FROM t1 }]
+
+ do_execsql_test 1.$tn.2 {
+ DELETE FROM t1 WHERE rowid=2;
+ DELETE FROM t1 WHERE rowid=1;
+ }
+
+ do_execsql_test 1.$tn.3 {
+ PRAGMA integrity_check;
+ } {ok}
+}
+
+do_test 1.5.1 { set T(utf8,1) } $T(utf8,0)
+do_test 1.5.2 { set T(utf16,1) } $T(utf16,0)
+
+do_test 1.6.1 { set U(utf8,1) } $U(utf8,0)
+do_test 1.6.2 { set U(utf16,1) } $U(utf16,0)
+
+
+
+finish_test
+
+
}
}
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 6.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(x);
+}
+do_catchsql_test 6.1 {
+ INSERT INTO x1(rowid, x) VALUES(123, fts5_locale('en_AU', 'hello world'));
+} {1 {fts5_locale() may not be used without locale=1}}
+
+do_execsql_test 6.2 {
+ SELECT typeof( fts5_locale(NULL, 'xyz') ), typeof( fts5_locale('', 'abc') );
+} {text text}
+
finish_test
-C Update\sthe\shighlight()\sand\ssnippet()\sfunctions\sto\suse\slocales\swhen\savailable.
-D 2024-07-30T15:55:51.884
+C Fix\scases\ssurrounding\sblob\svalues\sbeing\sstored\sin\slocale=1\sfts5\stables.
+D 2024-07-30T20:39:58.636
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
F ext/fts5/fts5.h 7cd47e5dfe68d798e667caa76722374f0c909f2db05bb4d42b1ec5300d18e658
-F ext/fts5/fts5Int.h 330b1e2dad9ea9cccc9fa93817062fa21e89f00e7eac9a84be440f7e93bf7c3c
+F ext/fts5/fts5Int.h 833a2fe729f926ebcde47e21e495d141b99ede9a188fc577873f24bea0f0bfa2
F ext/fts5/fts5_aux.c 652f839dc0c77431295f10b08f268631560bb5630e65fd701de7a58744428a82
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
F ext/fts5/fts5_config.c 0c96490fbad746b3780174f38b2ee5e3d719f2f81ee6b58ca828772871e0f680
F ext/fts5/fts5_expr.c c7336d5f9ecc0e2b014d700be2bec0ea383b0e82c494a7c5c4ac622327c2bfad
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
-F ext/fts5/fts5_main.c ee4bba42d8d093cd870c55a342ab85a647ec39dc79985e2b1084514d990de82d
-F ext/fts5/fts5_storage.c 2118a5bd2ae617367259f6cf2412b6ff26bc1cd1cc29bf64a6249edcc1572f19
+F ext/fts5/fts5_main.c d9a3fef86887e373027d48ab8216ab8caca95f2316e05d953d8885633162a9c5
+F ext/fts5/fts5_storage.c cc6173bb755d668573169c038034a9ec8deadd10b3c10c145adbdf04ab5f889c
F ext/fts5/fts5_tcl.c 93b705cb87633574983161edc5234f9b91ba03f9fecfbd2c5d401a1da6f93aa5
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/test/fts5bigid.test 2860854c2561a57594192b00c33a29f91cb85e25f3d6c03b5c2b8f62708f39dd
F ext/fts5/test/fts5bigpl.test 8f09858aab866c33593560e6480b2b6975ae7ff29ca32ad7b77e2da61402f8ef
F ext/fts5/test/fts5bigtok.test 541119e616c637caea925a8c028c37c2c29e94383e00aa2f9198d530724b6e36
+F ext/fts5/test/fts5blob.test a16160688e181a212bcb4968325c57ef2864a0bcae0794d6a1e16185007f00b9
F ext/fts5/test/fts5cat.test daba0b80659460b0cb60bd1f40b402478a761fe7ea414c3c94c2be25568cc33a
F ext/fts5/test/fts5circref.test f880dfd0d99f6fb73b88ccacb0927d18e833672fd906cc47d6b4e529419eaa62
F ext/fts5/test/fts5colset.test 544f4998cdbfe06a3123887fc0221612e8aa8192cdaff152872f1aadb10e6897
F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1
F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad
F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c
-F ext/fts5/test/fts5locale.test 7c332f882080a69ed4be9790d99ee49747ca44cf241d45f762298e17fea6e7a7
+F ext/fts5/test/fts5locale.test bfd8704f9bea963314fcbcf810f08a357ac8035bcb80a2d6170c1e57fa6ad52a
F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3
F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082
F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 14006711d83d098e665c540b978b0e29aa8f458da1c2c8e9c2baf2ad5ebd502c
-R 8ced4a5b525d31957945a9ad2a3de4d9
+P 569ae56a673a2e101fac7003a1cb41c7a02a515adf55bd1e1f2a03c19eb2b085
+R 937929d817b727d57bac9fb974f93247
U dan
-Z 395467bf290b92744f082b81bcb8dffe
+Z 807f8f93a084e88be44a4f6a2009b39e
# Remove this line to create a well-formed Fossil manifest.
-569ae56a673a2e101fac7003a1cb41c7a02a515adf55bd1e1f2a03c19eb2b085
+ae435aff1785d5832821c19dc88ccf6c496a7ff55be80276b31e5c9abee723db