** This is necessary - using sqlite3_value_nochange() instead of just having
** SQLite pass the original value back via xUpdate() - so as not to discard
** any locale information associated with such values.
-**
*/
struct Fts5Storage {
Fts5Config *pConfig;
Fts5Index *pIndex;
+ int db_enc; /* Database encoding */
int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */
i64 nTotalRow; /* Total number of rows in FTS table */
i64 *aTotalSize; /* Total sizes of each column */
sqlite3_stmt *pSavedRow;
- sqlite3_stmt *aStmt[12];
+ sqlite3_stmt *aStmt[13];
};
#define FTS5_STMT_LOOKUP_DOCSIZE 9
#define FTS5_STMT_REPLACE_CONFIG 10
#define FTS5_STMT_SCAN 11
+#define FTS5_STMT_ENC_CONVERT 12
/*
** Prepare the two insert statements - Fts5Storage.pInsertContent and
"REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */
"SELECT %s FROM %s AS T", /* SCAN */
+ "SELECT substr(?, 1)", /* ENC_CONVERT */
};
Fts5Config *pC = p->pConfig;
char *zSql = 0;
return rc;
}
+/*
+** Set the value of Fts5Storage.db_enc to the db encoding. Return SQLITE_OK
+** if successful, or an SQLite error code otherwise.
+*/
+static int fts5StorageFindDbEnc(Fts5Storage *p){
+ const char *zSql = "PRAGMA encoding";
+ sqlite3_stmt *pStmt = 0;
+ int rc = SQLITE_OK;
+
+ rc = sqlite3_prepare(p->pConfig->db, zSql, -1, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ static const char *aEnc[] = {
+ "UTF-8", "UTF-16le", "UTF-16be"
+ };
+ const char *zEnc = (const char*)sqlite3_column_text(pStmt, 0);
+ int ii;
+ for(ii=0; ii<ArraySize(aEnc); ii++){
+ if( sqlite3_stricmp(aEnc[ii], zEnc)==0 ){
+ p->db_enc = ii+1;
+ break;
+ }
+ }
+ }
+ rc = sqlite3_finalize(pStmt);
+ }
+
+ return rc;
+}
+
/*
** Open a new Fts5Index handle. If the bCreate argument is true, create
** and initialize the underlying tables
p->pConfig = pConfig;
p->pIndex = pIndex;
- if( bCreate ){
+ rc = fts5StorageFindDbEnc(p);
+
+ if( rc==SQLITE_OK && bCreate ){
if( pConfig->eContent==FTS5_CONTENT_NORMAL
|| pConfig->eContent==FTS5_CONTENT_UNINDEXED
){
return rc;
}
+/*
+** Argument pVal is a blob value for which the internal encoding does not
+** match the database encoding. This happens when using sqlite3_bind_blob()
+** (which always sets encoding=utf8) with a utf-16 database. The problem
+** is that fts5 is about to call sqlite3_column_text() on the value to
+** obtain text for tokenization. And the conversion between text and blob
+** must take place assuming the blob is encoded in database encoding -
+** otherwise it won't match the text extracted from the same blob if it
+** is read from the db later on.
+**
+** This function attempts to create a new value containing a copy of
+** the blob in pVal, but with the encoding set to the database encoding.
+** If successful, it sets (*ppOut) to point to the new value and returns
+** SQLITE_OK. It is the responsibility of the caller to eventually free
+** this value using sqlite3_value_free(). Or, if an error occurs, (*ppOut)
+** is set to NULL and an SQLite error code returned.
+*/
+static int fts5EncodingFix(
+ Fts5Storage *p,
+ sqlite3_value *pVal,
+ sqlite3_value **ppOut
+){
+ sqlite3_stmt *pStmt = 0;
+ int rc = fts5StorageGetStmt(
+ p, FTS5_STMT_ENC_CONVERT, &pStmt, p->pConfig->pzErrmsg
+ );
+ if( rc==SQLITE_OK ){
+ sqlite3_value *pDup = 0;
+ const char *pBlob = sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+
+ sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
+
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ sqlite3_value *p = sqlite3_column_value(pStmt, 0);
+ pDup = sqlite3_value_dup(p);
+ if( pDup==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ *ppOut = p;
+ }
+ }
+ rc = sqlite3_reset(pStmt);
+ if( rc!=SQLITE_OK ){
+ sqlite3_value_free(pDup);
+ }else{
+ *ppOut = pDup;
+ }
+ }
+
+ return rc;
+}
+
/*
** Insert new entries into the FTS index and %_docsize table.
*/
const char *pText = 0; /* Pointer to buffer containing text value */
int nLoc = 0; /* Size of pText in bytes */
const char *pLoc = 0; /* Pointer to buffer containing text value */
+ sqlite3_value *pFree = 0;
sqlite3_value *pVal = apVal[ctx.iCol+2];
if( p->pSavedRow && sqlite3_value_nochange(pVal) ){
if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
}else{
+ if( sqlite3_value_type(pVal)==SQLITE_BLOB
+ && sqlite3_value_encoding(pVal)!=p->db_enc
+ ){
+ rc = fts5EncodingFix(p, pVal, &pFree);
+ if( pFree ){
+ assert( rc==SQLITE_OK );
+ pVal = pFree;
+ }
+ }
pText = (const char*)sqlite3_value_text(pVal);
nText = sqlite3_value_bytes(pVal);
}
);
sqlite3Fts5ClearLocale(pConfig);
}
+ if( pFree ){
+ sqlite3_value_free(pFree);
+ }
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
-C In\sthe\sCLI,\slimit\scolumn\swidths\sspecified\sby\sthe\s".width"\scommand\sto\nhave\san\sabsolute\svalue\snot\smore\sthan\s30,000.
-D 2025-10-09T14:01:52.855
+C Avoid\scorrupting\sthe\sfts5\sindex\sif\sa\svalue\sthat\swas\sinserted\svia\ssqlite3_bind_blob()\sin\sa\snon-utf8\sdb\sis\sdeleted.
+D 2025-10-09T14:50:27.036
F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F ext/fts5/fts5_hash.c a6266cedd801ab7964fa9e74ebcdda6d30ec6a96107fa24148ec6b7b5b80f6e0
F ext/fts5/fts5_index.c 1e5009261966215b61bbe3b46d79916346efac775b57c1487a478f684c971111
F ext/fts5/fts5_main.c 42025174a556257287071e90516d3ab8115daf1dd525a301883544469a260014
-F ext/fts5/fts5_storage.c 19bc7c4cbe1e6a2dd9849ef7d84b5ca1fcbf194cefc3e386b901e00e08bf05c2
+F ext/fts5/fts5_storage.c f6c646106ce4e34bf3de6370fb9df953e891462a8e2cd4ed6e4dc472e37069cd
F ext/fts5/fts5_tcl.c 7fb5a3d3404099075aaa2457307cb459bbc257c0de3dbd52b1e80a5b503e0329
F ext/fts5/fts5_test_mi.c 4308d5658cb1f5eee5998dcbaac7d5bdf7a2ef43c8192ca6e0c843f856ccee26
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl c5aa7cf7148b6dcffb5b61520ae18212baf169936af734ab265143f59db328fe
F ext/fts5/test/fts5aa.test cf4ff6180873bbc131666ba846ddd90148fcb61c20aad089711d3511cce24300
-F ext/fts5/test/fts5ab.test 4bdb619fee409e11417e8827e320b857e42e926a01a0408fc9f143ec870a6ced
+F ext/fts5/test/fts5ab.test c7e5c1519afb20366cb40d0179897a3c39d9fc06ba6b9c286d79df0ccd97e2ee
F ext/fts5/test/fts5ac.test 4a73626de86f3d17c95738034880c4f0de8d54741fb943d819b528373657e59b
F ext/fts5/test/fts5ad.test 058e616612964e61d19f70295f0e6eaedceb4b29b1fbf4f859615ef7e779dc22
F ext/fts5/test/fts5ae.test 3d49edbd50bb0684199a2e7568aeb30d1d29718f5c0f61751983740fa836d15f
F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7
F tool/warnings.sh 1ad0169b022b280bcaaf94a7fa231591be96b514230ab5c98fbf15cd7df842dd
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 2d40dc2173ac394024ae267419bd8d3621f15d4cd2449c9c2f7310acc3b190f0
-R eee9d1c206b49ec3ae26a46c172e3b61
-U drh
-Z 04b05decf94b84100216295e717c7c17
+P b3741f1101d25eded57a62b4967ec1bcce532dc9937c4b7b74cb689861efb442
+R 412e48a669e146967bbdc02f8b0696ba
+U dan
+Z 30d36738aec1726fc6292939ee2c4886
# Remove this line to create a well-formed Fossil manifest.