return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
}
+/*
+** Locate a tokenizer instance with a specification matching the second
+** argument. Create a new tokenizer if one can not be found. Return SQLITE_OK
+** if successful, or an SQLite error code otherwise.
+*/
int sqlite3Fts5ConfigFindTokenizer(
- Fts5Config *pConfig,
- const char *z,
- Fts5TokenizerInst **ppOut
+ Fts5Config *pConfig, /* Table configuration */
+ const char *z, /* Requested tokenizer specification */
+ Fts5TokenizerInst **ppOut /* OUT: Tokenizer instance */
){
Fts5TokenizerInst *pRet = 0;
int rc = SQLITE_OK;
return rc;
}
+/*
+** Free all tokenizer instances in the list starting at Fts5Config.pTokList.
+*/
static void fts5ConfigFreeTokenizers(Fts5Config *pConfig){
Fts5TokenizerInst *p = pConfig->pTokList;
while( p ){
return SQLITE_OK;
}
+/*
+** Like sqlite3Fts5Tokenize(), but using the tokenizer defined by
+** specification zSpec.
+*/
int sqlite3Fts5SpecTokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
const char *zSpec, /* Tokenizer specification */
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
}
-#define IS_TOKENIZE_BLOB(pVal) ( \
- sqlite3_value_subtype(pVal)==SQLITE_FTS5_TOKENIZE_SUBTYPE \
- && sqlite3_value_type(pVal)==SQLITE_BLOB \
-)
+/*
+** If the value passed as the third argument is a tokenizer blob, and the
+** Fts5Config object indicates that the table is a contentless-table,
+** return non-zero.
+**
+** Or, if the value passed as the third argument is a tokenizer blob but
+** the table is not a contentless table, set *pRc to SQLITE_ERROR and leave
+** an error message in the Fts5Config object. Return 0 in this case.
+**
+** Finally, if the value is not a tokenizer blob, return 0.
+*/
+static int fts5IsTokenizeBlob(
+ int *pRc,
+ Fts5Config *pConfig,
+ sqlite3_value *pVal
+){
+ assert( *pRc==SQLITE_OK );
+ if( sqlite3_value_subtype(pVal)==SQLITE_FTS5_TOKENIZE_SUBTYPE
+ && sqlite3_value_type(pVal)==SQLITE_BLOB
+ ){
+ if( pConfig->eContent==FTS5_CONTENT_NONE ) return 1;
+
+ *pRc = SQLITE_ERROR;
+ *pConfig->pzErrmsg = sqlite3_mprintf(
+ "table does not support alternative tokenizers"
+ );
+ }
+ return 0;
+}
+/*
+** Value pVal is guaranteed to be a tokenize-blob. This function unpacks
+** the blob and returns a pointer to the nul-terminated tokenizer
+** specification. It also sets output parameter (*pzT) to point to the
+** start of the utf-8 text value (not nul-terminated) and (*pnT) to the
+** number of valid bytes in this buffer.
+*/
static const char *fts5UnpackTokenizeBlob(
sqlite3_value *pVal,
const char **pzT,
nText = sqlite3_column_bytes(pSeek, iCol);
}else if( ALWAYS(apVal) ){
sqlite3_value *pVal = apVal[iCol-1];
- if( IS_TOKENIZE_BLOB(pVal) ){
+ if( fts5IsTokenizeBlob(&rc, pConfig, pVal) ){
zTok = fts5UnpackTokenizeBlob(pVal, &zText, &nText);
}else{
zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
continue;
}
ctx.szCol = 0;
- rc = sqlite3Fts5SpecTokenize(pConfig, zTok, FTS5_TOKENIZE_DOCUMENT,
- zText, nText, (void*)&ctx, fts5StorageInsertCallback
- );
- p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
- if( p->aTotalSize[iCol-1]<0 ){
- rc = FTS5_CORRUPT;
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5SpecTokenize(pConfig, zTok, FTS5_TOKENIZE_DOCUMENT,
+ zText, nText, (void*)&ctx, fts5StorageInsertCallback
+ );
+ p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
+ if( p->aTotalSize[iCol-1]<0 ){
+ rc = FTS5_CORRUPT;
+ }
}
}
}
return rc;
}
+/*
+** This function is used to extract text from an sqlite3_value to use
+** as an fts5 query string. It also finds the required tokenizer to use
+** for tokenizing query terms.
+**
+** If successful, SQLITE_OK is returned, output variable (*ppTok) is set
+** to point to the required tokenizer instance, (*pzText) points to a
+** nul-terminated buffer containing the query string as utf-8 text, and
+** (*pbDel) is set to true if the caller must sqlite3_free(*pzText) at
+** some point in the future. Or, if an error occurs, an SQLite error
+** code is returned.
+*/
int sqlite3Fts5UnpackTokenizeBlob(
Fts5Config *pConfig,
sqlite3_value *pVal,
Fts5TokenizerInst **ppTok,
char **pzText,
- int *pbDel
+ int *pbDel /* OUT: Set to true if sqlite3_free() req. */
){
int rc = SQLITE_OK;
- if( IS_TOKENIZE_BLOB(pVal) ){
+ if( fts5IsTokenizeBlob(&rc, pConfig, pVal) ){
const char *zTok = 0;
const char *zText = 0;
int nText = 0;
rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
sqlite3_value *pVal = apVal[i];
- if( IS_TOKENIZE_BLOB(pVal) ){
+ if( fts5IsTokenizeBlob(&rc, pConfig, pVal) ){
const char *zT = 0;
int nT = 0;
fts5UnpackTokenizeBlob(pVal, &zT, &nT);
rc = sqlite3_bind_text(pInsert, i, zT, nT, SQLITE_STATIC);
- }else{
+ }else if( rc==SQLITE_OK ){
rc = sqlite3_bind_value(pInsert, i, apVal[i]);
}
}
const char *zTok = 0;
int nText = 0;
- if( IS_TOKENIZE_BLOB(pVal) ){
+ if( fts5IsTokenizeBlob(&rc, pConfig, pVal) ){
zTok = fts5UnpackTokenizeBlob(pVal, &zText, &nText);
}else{
zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]);
nText = sqlite3_value_bytes(apVal[ctx.iCol+2]);
}
- rc = sqlite3Fts5SpecTokenize(pConfig,
- zTok, FTS5_TOKENIZE_DOCUMENT, zText, nText,
- (void*)&ctx, fts5StorageInsertCallback
- );
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5SpecTokenize(pConfig,
+ zTok, FTS5_TOKENIZE_DOCUMENT, zText, nText,
+ (void*)&ctx, fts5StorageInsertCallback
+ );
+ }
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
-C Add\ssome\stests\sfor\sthe\sfts5\stokenize-blob\sfunctionality\son\sthis\sbranch.
-D 2024-04-16T14:23:56.964
+C Prevent\stokenize-blobs\sfrom\sbeing\sused\swith\snon-contentless\stables.\sFix\ssome\sother\sissues\swith\sthe\snew\scode\son\sthis\sbranch.
+D 2024-04-17T19:48:41.847
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
F ext/fts5/fts5.h e701ea20480be693f2b50ab314ec4d002bd9b97cd89636427ed1528c690107ae
-F ext/fts5/fts5Int.h 655147fa7eaba54753b9642c52d2476965be77d0da31e651989dfeaf351f6e8e
+F ext/fts5/fts5Int.h 098b3fd928d10035e9b52756affe6315fe337abfbc19e80ea33d1db07d4f5f7a
F ext/fts5/fts5_aux.c 4584e88878e54828bf7d4d0d83deedd232ec60628b7731be02bad6adb62304b1
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
-F ext/fts5/fts5_config.c 777bfe8e7131a07f5074e7fcaec91ef88580a7bde400e4561a89495e7d9bae99
+F ext/fts5/fts5_config.c fe565c6a12d6897053a5ab7b0cc6a0691c668103e3c3f1de8dec5491a72316fb
F ext/fts5/fts5_expr.c f1e9110062a9ff63007431d0af1b1506cca3e5f79e1b2f2dc47795b9e98d4b13
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c ee0f4d50bc0c58a7c5ef7d645e7e38e1e59315b8ea9d722ae00c5f949ee65379
-F ext/fts5/fts5_main.c b2dfe719a003337c159e1bfb97fa885d0dc1b9921de2e96953e188481663ae5e
-F ext/fts5/fts5_storage.c 768fafc623dd2d9974cc9816f5ab1006baaa105ba055d3c51578d11d73d76d24
+F ext/fts5/fts5_main.c 86b2c807711fc6eef3c1cf3e558093669093bf91a2014bfe5b71be8ad1ea41cb
+F ext/fts5/fts5_storage.c 19fc854c3fad12e3f79ed3608b944a07fb41ffd50af493a1f521cee3a35af192
F ext/fts5/fts5_tcl.c fd485d0fb56f2c42885e68c74dd53c594a4761af6088617ce120804a6a5aca82
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43
F ext/fts5/test/fts5tokenizer2.test cb5428c7cfb3b6a74b7adfcde65506e329112003e8dffa7501d01c2d18d02569
-F ext/fts5/test/fts5tokenizer3.test c96e232d51d21a4deb59d797070df9087121a7f5e3dc5d1cea60c6b3d9e76e69
+F ext/fts5/test/fts5tokenizer3.test 507d50608b61031f72f8cf3c752ea8db51d3d67ae99ebe6f0d191e58455dc19c
F ext/fts5/test/fts5trigram.test 6c4e37864f3e7d90673db5563d9736d7e40080ab94d10ebdffa94c1b77941da0
F ext/fts5/test/fts5trigram2.test 9fe4207f8a4241747aff1005258b564958588d21bfd240d6cd4c2e955d31c156
F ext/fts5/test/fts5ubsan.test 783d5a8d13ebfa169e634940228db54540780e3ba7a87ad1e4510e61440bf64b
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 6c51c9c6a8a6a730c1d9e0119bc39edeefbbcb3b30476347a51d2e08eb91fe36
-R e266f84378ceb47ebfd337fc2f715b86
+P c2f9d1259cc094ad1d3e5e0a50b262a248915743fed3b1a730a1d9f0f845f48b
+R 1ef80b3031ba5f310c6dd6728c192f80
U dan
-Z 7dd42f59f6272cbb7143aeed0598944f
+Z 1752c8f42f1640cfd39a63e89bc136e9
# Remove this line to create a well-formed Fossil manifest.