From: dan Date: Thu, 9 Oct 2025 14:50:27 +0000 (+0000) Subject: Avoid corrupting the fts5 index if a value that was inserted via sqlite3_bind_blob... X-Git-Tag: major-release~97 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=40d299ae32e4b15126b22d939f04738e18ae02c4;p=thirdparty%2Fsqlite.git Avoid corrupting the fts5 index if a value that was inserted via sqlite3_bind_blob() in a non-utf8 db is deleted. FossilOrigin-Name: 8bf26c956e199762d55c159392c2e3813a9e12b914d3ca33000bf332cd946cb0 --- diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 76820e85b3..70d5244ecc 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -40,16 +40,16 @@ ** This is necessary - using sqlite3_value_nochange() instead of just having ** SQLite pass the original value back via xUpdate() - so as not to discard ** any locale information associated with such values. -** */ struct Fts5Storage { Fts5Config *pConfig; Fts5Index *pIndex; + int db_enc; /* Database encoding */ int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ i64 nTotalRow; /* Total number of rows in FTS table */ i64 *aTotalSize; /* Total sizes of each column */ sqlite3_stmt *pSavedRow; - sqlite3_stmt *aStmt[12]; + sqlite3_stmt *aStmt[13]; }; @@ -72,6 +72,7 @@ struct Fts5Storage { #define FTS5_STMT_LOOKUP_DOCSIZE 9 #define FTS5_STMT_REPLACE_CONFIG 10 #define FTS5_STMT_SCAN 11 +#define FTS5_STMT_ENC_CONVERT 12 /* ** Prepare the two insert statements - Fts5Storage.pInsertContent and @@ -113,6 +114,7 @@ static int fts5StorageGetStmt( "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ "SELECT %s FROM %s AS T", /* SCAN */ + "SELECT substr(?, 1)", /* ENC_CONVERT */ }; Fts5Config *pC = p->pConfig; char *zSql = 0; @@ -333,6 +335,36 @@ int sqlite3Fts5CreateTable( return rc; } +/* +** Set the value of Fts5Storage.db_enc to the db encoding. Return SQLITE_OK +** if successful, or an SQLite error code otherwise. +*/ +static int fts5StorageFindDbEnc(Fts5Storage *p){ + const char *zSql = "PRAGMA encoding"; + sqlite3_stmt *pStmt = 0; + int rc = SQLITE_OK; + + rc = sqlite3_prepare(p->pConfig->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + static const char *aEnc[] = { + "UTF-8", "UTF-16le", "UTF-16be" + }; + const char *zEnc = (const char*)sqlite3_column_text(pStmt, 0); + int ii; + for(ii=0; iidb_enc = ii+1; + break; + } + } + } + rc = sqlite3_finalize(pStmt); + } + + return rc; +} + /* ** Open a new Fts5Index handle. If the bCreate argument is true, create ** and initialize the underlying tables @@ -361,7 +393,9 @@ int sqlite3Fts5StorageOpen( p->pConfig = pConfig; p->pIndex = pIndex; - if( bCreate ){ + rc = fts5StorageFindDbEnc(p); + + if( rc==SQLITE_OK && bCreate ){ if( pConfig->eContent==FTS5_CONTENT_NORMAL || pConfig->eContent==FTS5_CONTENT_UNINDEXED ){ @@ -1031,6 +1065,59 @@ int sqlite3Fts5StorageContentInsert( return rc; } +/* +** Argument pVal is a blob value for which the internal encoding does not +** match the database encoding. This happens when using sqlite3_bind_blob() +** (which always sets encoding=utf8) with a utf-16 database. The problem +** is that fts5 is about to call sqlite3_column_text() on the value to +** obtain text for tokenization. And the conversion between text and blob +** must take place assuming the blob is encoded in database encoding - +** otherwise it won't match the text extracted from the same blob if it +** is read from the db later on. +** +** This function attempts to create a new value containing a copy of +** the blob in pVal, but with the encoding set to the database encoding. +** If successful, it sets (*ppOut) to point to the new value and returns +** SQLITE_OK. It is the responsibility of the caller to eventually free +** this value using sqlite3_value_free(). Or, if an error occurs, (*ppOut) +** is set to NULL and an SQLite error code returned. +*/ +static int fts5EncodingFix( + Fts5Storage *p, + sqlite3_value *pVal, + sqlite3_value **ppOut +){ + sqlite3_stmt *pStmt = 0; + int rc = fts5StorageGetStmt( + p, FTS5_STMT_ENC_CONVERT, &pStmt, p->pConfig->pzErrmsg + ); + if( rc==SQLITE_OK ){ + sqlite3_value *pDup = 0; + const char *pBlob = sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + + sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC); + + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + sqlite3_value *p = sqlite3_column_value(pStmt, 0); + pDup = sqlite3_value_dup(p); + if( pDup==0 ){ + rc = SQLITE_NOMEM; + }else{ + *ppOut = p; + } + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ){ + sqlite3_value_free(pDup); + }else{ + *ppOut = pDup; + } + } + + return rc; +} + /* ** Insert new entries into the FTS index and %_docsize table. */ @@ -1058,6 +1145,7 @@ int sqlite3Fts5StorageIndexInsert( const char *pText = 0; /* Pointer to buffer containing text value */ int nLoc = 0; /* Size of pText in bytes */ const char *pLoc = 0; /* Pointer to buffer containing text value */ + sqlite3_value *pFree = 0; sqlite3_value *pVal = apVal[ctx.iCol+2]; if( p->pSavedRow && sqlite3_value_nochange(pVal) ){ @@ -1074,6 +1162,15 @@ int sqlite3Fts5StorageIndexInsert( if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); }else{ + if( sqlite3_value_type(pVal)==SQLITE_BLOB + && sqlite3_value_encoding(pVal)!=p->db_enc + ){ + rc = fts5EncodingFix(p, pVal, &pFree); + if( pFree ){ + assert( rc==SQLITE_OK ); + pVal = pFree; + } + } pText = (const char*)sqlite3_value_text(pVal); nText = sqlite3_value_bytes(pVal); } @@ -1086,6 +1183,9 @@ int sqlite3Fts5StorageIndexInsert( ); sqlite3Fts5ClearLocale(pConfig); } + if( pFree ){ + sqlite3_value_free(pFree); + } } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; diff --git a/ext/fts5/test/fts5ab.test b/ext/fts5/test/fts5ab.test index 7e312286f3..a74c0f8884 100644 --- a/ext/fts5/test/fts5ab.test +++ b/ext/fts5/test/fts5ab.test @@ -294,6 +294,39 @@ do_execsql_test 7.0 { INSERT INTO x1 VALUES($doc); } +#------------------------------------------------------------------------- +# Forum post: https://sqlite.org/forum/forumpost/ea4d8c9acb +# +reset_db +do_execsql_test 8.0 { + PRAGMA encoding = 'UTF-16le'; + CREATE VIRTUAL TABLE vt0 USING fts5(c0); +} +set v [db one {SELECT x'2a12'}] +do_execsql_test 8.1 { + INSERT INTO vt0 VALUES ($v); +} +do_execsql_test 8.2 { + SELECT quote(c0) FROM vt0 +} {X'2A12'} +do_execsql_test 8.3 { + INSERT INTO vt0(vt0) VALUES('integrity-check'); +} {} +reset_db +do_execsql_test 8.4 { + PRAGMA encoding = 'UTF-16le'; + CREATE VIRTUAL TABLE vt0 USING fts5(c0); +} +do_execsql_test 8.5 { + INSERT INTO vt0 VALUES (x'2a12'); +} +do_execsql_test 8.6 { + SELECT quote(c0) FROM vt0 +} {X'2A12'} +do_execsql_test 8.7 { + INSERT INTO vt0(vt0) VALUES('integrity-check'); +} {} + } ;# foreach_detail_mode... diff --git a/manifest b/manifest index 51244c99e7..2ac6bf55fa 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C In\sthe\sCLI,\slimit\scolumn\swidths\sspecified\sby\sthe\s".width"\scommand\sto\nhave\san\sabsolute\svalue\snot\smore\sthan\s30,000. -D 2025-10-09T14:01:52.855 +C Avoid\scorrupting\sthe\sfts5\sindex\sif\sa\svalue\sthat\swas\sinserted\svia\ssqlite3_bind_blob()\sin\sa\snon-utf8\sdb\sis\sdeleted. +D 2025-10-09T14:50:27.036 F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea @@ -115,7 +115,7 @@ F ext/fts5/fts5_expr.c b8c32da1127bafaf10d6b4768b0dcb92285798524bed2d87a8686f99a F ext/fts5/fts5_hash.c a6266cedd801ab7964fa9e74ebcdda6d30ec6a96107fa24148ec6b7b5b80f6e0 F ext/fts5/fts5_index.c 1e5009261966215b61bbe3b46d79916346efac775b57c1487a478f684c971111 F ext/fts5/fts5_main.c 42025174a556257287071e90516d3ab8115daf1dd525a301883544469a260014 -F ext/fts5/fts5_storage.c 19bc7c4cbe1e6a2dd9849ef7d84b5ca1fcbf194cefc3e386b901e00e08bf05c2 +F ext/fts5/fts5_storage.c f6c646106ce4e34bf3de6370fb9df953e891462a8e2cd4ed6e4dc472e37069cd F ext/fts5/fts5_tcl.c 7fb5a3d3404099075aaa2457307cb459bbc257c0de3dbd52b1e80a5b503e0329 F ext/fts5/fts5_test_mi.c 4308d5658cb1f5eee5998dcbaac7d5bdf7a2ef43c8192ca6e0c843f856ccee26 F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b @@ -127,7 +127,7 @@ F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl c5aa7cf7148b6dcffb5b61520ae18212baf169936af734ab265143f59db328fe F ext/fts5/test/fts5aa.test cf4ff6180873bbc131666ba846ddd90148fcb61c20aad089711d3511cce24300 -F ext/fts5/test/fts5ab.test 4bdb619fee409e11417e8827e320b857e42e926a01a0408fc9f143ec870a6ced +F ext/fts5/test/fts5ab.test c7e5c1519afb20366cb40d0179897a3c39d9fc06ba6b9c286d79df0ccd97e2ee F ext/fts5/test/fts5ac.test 4a73626de86f3d17c95738034880c4f0de8d54741fb943d819b528373657e59b F ext/fts5/test/fts5ad.test 058e616612964e61d19f70295f0e6eaedceb4b29b1fbf4f859615ef7e779dc22 F ext/fts5/test/fts5ae.test 3d49edbd50bb0684199a2e7568aeb30d1d29718f5c0f61751983740fa836d15f @@ -2169,8 +2169,8 @@ F tool/version-info.c 33d0390ef484b3b1cb685d59362be891ea162123cea181cb8e6d2cf6dd F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 1ad0169b022b280bcaaf94a7fa231591be96b514230ab5c98fbf15cd7df842dd F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2d40dc2173ac394024ae267419bd8d3621f15d4cd2449c9c2f7310acc3b190f0 -R eee9d1c206b49ec3ae26a46c172e3b61 -U drh -Z 04b05decf94b84100216295e717c7c17 +P b3741f1101d25eded57a62b4967ec1bcce532dc9937c4b7b74cb689861efb442 +R 412e48a669e146967bbdc02f8b0696ba +U dan +Z 30d36738aec1726fc6292939ee2c4886 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 380326b0c2..9c10c755fe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b3741f1101d25eded57a62b4967ec1bcce532dc9937c4b7b74cb689861efb442 +8bf26c956e199762d55c159392c2e3813a9e12b914d3ca33000bf332cd946cb0