/* Parse a MATCH expression. */
int sqlite3Fts5ExprNew(
Fts5Config *pConfig,
+ Fts5TokenizerInst*,
int bPhraseToAnd,
int iCol, /* Column on LHS of MATCH operator */
const char *zExpr,
Fts5ExprPhrase **apPhrase; /* Array of all phrases */
Fts5ExprNode *pExpr; /* Result of a successful parse */
int bPhraseToAnd; /* Convert "a+b" to "a AND b" */
+ Fts5TokenizerInst *pTok;
};
/*
int sqlite3Fts5ExprNew(
Fts5Config *pConfig, /* FTS5 Configuration */
+ Fts5TokenizerInst *pTok, /* Tokenizer to use, or NULL */
int bPhraseToAnd,
int iCol,
const char *zExpr, /* Expression text */
*pzErr = 0;
memset(&sParse, 0, sizeof(sParse));
sParse.bPhraseToAnd = bPhraseToAnd;
+ sParse.pTok = pTok ? pTok : pConfig->pTokList;
pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
if( pEngine==0 ){ return SQLITE_NOMEM; }
sParse.pConfig = pConfig;
}
}
zExpr[iOut] = '\0';
- rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg);
+ rc = sqlite3Fts5ExprNew(
+ pConfig, 0, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg
+ );
}else{
*pp = 0;
}
rc = fts5ParseStringFromToken(pToken, &z);
if( rc==SQLITE_OK ){
+ Fts5TokenizerInst *p = pParse->pTok;
int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0);
int n;
sqlite3Fts5Dequote(z);
n = (int)strlen(z);
- rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
+ rc = p->pTokApi->xTokenize(p->pTok, &sCtx, flags, z, n, fts5ParseTokenize);
}
sqlite3_free(z);
if( rc || (rc = sCtx.rc) ){
rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
if( rc==SQLITE_OK ){
- rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
+ rc = sqlite3Fts5ExprNew(pConfig, 0, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
}
if( rc==SQLITE_OK ){
char *zText;
rc = sqlite3Fts5UnpackTokenizeBlob(
pConfig, apVal[i], &pInst, &zText, &bDel
);
+ if( rc!=SQLITE_OK ) goto filter_out;
if( zText==0 ) zText = "";
iCol = 0;
goto filter_out;
}else{
char **pzErr = &pTab->p.base.zErrMsg;
- rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
+ rc = sqlite3Fts5ExprNew(pConfig, pInst, 0, iCol, zText, &pExpr,pzErr);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
pExpr = 0;
rc = sqlite3Fts5TestRegisterTok(db, pApi);
if( rc==SQLITE_OK ){
- rc = sqlite3_create_function(db, "fts5tokenize", 2, SQLITE_UTF8, 0,
- f5tScalarFunc, 0, 0
+ rc = sqlite3_create_function(db, "fts5tokenize", 2,
+ SQLITE_UTF8 | SQLITE_RESULT_SUBTYPE, 0, f5tScalarFunc, 0, 0
);
}
if( rc!=SQLITE_OK ){
--- /dev/null
+# 2024 Apr 16
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on the tokenize-blob functionality.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5tokenizer3
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+set ::constructor_count 0
+proc tcl_create1 {args} { incr ::constructor_count ; return "tcl_tokenize1" }
+proc tcl_create2 {args} { incr ::constructor_count ; return "tcl_tokenize2" }
+sqlite3_fts5_create_tokenizer db tcl1 tcl_create1
+sqlite3_fts5_create_tokenizer db tcl2 tcl_create2
+
+proc tcl_tokenize1 {tflags text} {
+ foreach t [split $text] {
+ sqlite3_fts5_token [string toupper $t] 0 0
+ }
+ return 0
+}
+proc tcl_tokenize2 {tflags text} {
+ foreach t [split $text] {
+ sqlite3_fts5_token [string tolower $t] 0 0
+ }
+ return 0
+}
+
+sqlite3_fts5_register_fts5tokenize db
+
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize = tcl1, content=);
+ CREATE VIRTUAL TABLE v1 USING fts5vocab(x1, instance);
+ INSERT INTO x1 VALUES('Abc Def');
+}
+
+do_execsql_test 1.1 {
+ SELECT hex( fts5tokenize('Ghi Jkl', 'tcl2') );
+} {74636C3200476869204A6B6C}
+
+do_execsql_test 1.2 {
+ INSERT INTO x1 VALUES(fts5tokenize('Ghi Jkl', 'tcl2'));
+}
+
+do_execsql_test 1.3 {
+ SELECT DISTINCT term FROM v1 ORDER BY 1
+} {ABC DEF ghi jkl}
+
+do_execsql_test 1.4 {
+ INSERT INTO x1(x1, rowid, x) VALUES('delete', 2, 'Ghi Jkl');
+ SELECT DISTINCT term FROM v1 ORDER BY 1
+} {ABC DEF ghi jkl}
+
+do_execsql_test 1.5 {
+ INSERT INTO x1(x1, rowid, x)
+ VALUES('delete', 2, fts5tokenize('Ghi Jkl', 'tcl2'));
+ SELECT DISTINCT term FROM v1 ORDER BY 1
+} {ABC DEF}
+
+do_execsql_test 1.6 {
+ INSERT INTO x1(x1) VALUES('delete-all');
+ INSERT INTO x1 VALUES('Abc Def');
+ INSERT INTO x1 VALUES(fts5tokenize('Ghi Jkl', 'tcl2'));
+} {}
+
+do_execsql_test 1.7 {
+ SELECT rowid FROM x1('Ghi Jkl');
+} {}
+do_execsql_test 1.8 {
+ SELECT rowid FROM x1(fts5tokenize('Abc Def', 'tcl1'));
+} {1}
+do_execsql_test 1.9 {
+ SELECT rowid FROM x1(fts5tokenize('Ghi Jkl', 'tcl2'));
+} {2}
+
+#-------------------------------------------------------------------------
+# Error conditions.
+#
+do_catchsql_test 1.10 {
+ INSERT INTO x1 VALUES(fts5tokenize('Mno Pqr', 'tcl3'));
+} {1 {no such tokenizer: tcl3}}
+do_catchsql_test 1.11 {
+ INSERT INTO x1(x1, rowid, x)
+ VALUES('delete', 2, fts5tokenize('Mno Pqr', 'tcl3'));
+} {1 {no such tokenizer: tcl3}}
+do_catchsql_test 1.12 {
+ SELECT rowid FROM x1(fts5tokenize('Mno Pqr', 'tcl3'));
+} {1 {no such tokenizer: tcl3}}
+
+do_catchsql_test 1.13 {
+ INSERT INTO x1 VALUES(fts5tokenize('Mno Pqr', 'unicode61 option'));
+} {1 {error in tokenizer constructor}}
+do_catchsql_test 1.14 {
+ INSERT INTO x1(x1, rowid, x)
+ VALUES('delete', 2, fts5tokenize('Mno Pqr', 'unicode61 option'));
+} {1 {error in tokenizer constructor}}
+do_catchsql_test 1.15 {
+ SELECT rowid FROM x1(fts5tokenize('Mno Pqr', 'unicode61 option'));
+} {1 {error in tokenizer constructor}}
+
+# Check the tokenizer cache has been working.
+#
+do_test 1.16 {
+ set ::constructor_count
+} 2
+proc tcl_create4 {args} { incr ::constructor_count ; return "tcl_tokenize2" }
+sqlite3_fts5_create_tokenizer db tcl4 tcl_create4
+do_execsql_test 1.17 {
+ SELECT rowid FROM x1(fts5tokenize('Mno Pqr', 'tcl4'));
+}
+do_test 1.18 {
+ set ::constructor_count
+} 3
+do_execsql_test 1.19 {
+ SELECT rowid FROM x1(fts5tokenize('Mno Pqr', 'tcl2'));
+}
+do_test 1.20 {
+ set ::constructor_count
+} 3
+
+
+finish_test
+
-C Add\sexperimental\sway\sto\sspecify\san\salternative\stokenizer\swhen\swriting\sto\sor\squerying\san\sfts5\stable.
-D 2024-04-15T20:24:50.588
+C Add\ssome\stests\sfor\sthe\sfts5\stokenize-blob\sfunctionality\son\sthis\sbranch.
+D 2024-04-16T14:23:56.964
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
F ext/fts5/fts5.h e701ea20480be693f2b50ab314ec4d002bd9b97cd89636427ed1528c690107ae
-F ext/fts5/fts5Int.h 5fdd75e46cbaabf84c072907f0f3c5da8dbab76c226355a8bdf528e18a530ba8
+F ext/fts5/fts5Int.h 655147fa7eaba54753b9642c52d2476965be77d0da31e651989dfeaf351f6e8e
F ext/fts5/fts5_aux.c 4584e88878e54828bf7d4d0d83deedd232ec60628b7731be02bad6adb62304b1
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
F ext/fts5/fts5_config.c 777bfe8e7131a07f5074e7fcaec91ef88580a7bde400e4561a89495e7d9bae99
-F ext/fts5/fts5_expr.c e91156ebdcc08d837f4f324168f69f3c0d7fdef0e521fd561efb48ef3297b696
+F ext/fts5/fts5_expr.c f1e9110062a9ff63007431d0af1b1506cca3e5f79e1b2f2dc47795b9e98d4b13
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c ee0f4d50bc0c58a7c5ef7d645e7e38e1e59315b8ea9d722ae00c5f949ee65379
-F ext/fts5/fts5_main.c 49111d5d88bd35fa3d052ec8a3047c02fed8e9a3bf3bcd2048cb3ae78dcefe65
+F ext/fts5/fts5_main.c b2dfe719a003337c159e1bfb97fa885d0dc1b9921de2e96953e188481663ae5e
F ext/fts5/fts5_storage.c 768fafc623dd2d9974cc9816f5ab1006baaa105ba055d3c51578d11d73d76d24
-F ext/fts5/fts5_tcl.c 97e5e14f7d0447979f918ecfd7bcadb0e15ce15f79d007c7400190cafd265beb
+F ext/fts5/fts5_tcl.c fd485d0fb56f2c42885e68c74dd53c594a4761af6088617ce120804a6a5aca82
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c 83cfcede3898001cab84432a36ce1503e3080cf9b1c682b022ec82e267ea4c13
F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43
F ext/fts5/test/fts5tokenizer2.test cb5428c7cfb3b6a74b7adfcde65506e329112003e8dffa7501d01c2d18d02569
+F ext/fts5/test/fts5tokenizer3.test c96e232d51d21a4deb59d797070df9087121a7f5e3dc5d1cea60c6b3d9e76e69
F ext/fts5/test/fts5trigram.test 6c4e37864f3e7d90673db5563d9736d7e40080ab94d10ebdffa94c1b77941da0
F ext/fts5/test/fts5trigram2.test 9fe4207f8a4241747aff1005258b564958588d21bfd240d6cd4c2e955d31c156
F ext/fts5/test/fts5ubsan.test 783d5a8d13ebfa169e634940228db54540780e3ba7a87ad1e4510e61440bf64b
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P b40580be719a129ecd1aa3c69d1086c967d063920fdd48617c864e73c059abc1
-R 3c57328f56827a22357c3ccf2b463e50
-T *branch * fts5-tokenize-blob
-T *sym-fts5-tokenize-blob *
-T -sym-trunk *
+P 6c51c9c6a8a6a730c1d9e0119bc39edeefbbcb3b30476347a51d2e08eb91fe36
+R e266f84378ceb47ebfd337fc2f715b86
U dan
-Z 169f93ab8cae82ba28e335410f939065
+Z 7dd42f59f6272cbb7143aeed0598944f
# Remove this line to create a well-formed Fossil manifest.
-6c51c9c6a8a6a730c1d9e0119bc39edeefbbcb3b30476347a51d2e08eb91fe36
\ No newline at end of file
+c2f9d1259cc094ad1d3e5e0a50b262a248915743fed3b1a730a1d9f0f845f48b
\ No newline at end of file