From: dan Date: Wed, 11 Oct 2023 21:08:12 +0000 (+0000) Subject: Add the tokendata=1 option to ignore trailing token-data when querying an fts5 table. X-Git-Tag: version-3.45.0~114^2~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=03204e910680096d982139bda04c6f8a1b0f8f67;p=thirdparty%2Fsqlite.git Add the tokendata=1 option to ignore trailing token-data when querying an fts5 table. FossilOrigin-Name: 122935182ad5869ce3a4c6d796c38a0509f6f3384dd1b3e60a3f2f0f366cc5f5 --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 1687168d5f..4aa578559b 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -196,6 +196,7 @@ struct Fts5Config { char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ + int bTokendata; /* "tokendata=" option value (dflt==0) */ int eDetail; /* FTS5_DETAIL_XXX value */ char *zContentExprlist; Fts5Tokenizer *pTok; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 5d0770502e..d2e8309cd2 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -398,6 +398,16 @@ static int fts5ConfigParseSpecial( return rc; } + if( sqlite3_strnicmp("tokendata", zCmd, nCmd)==0 ){ + if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ + *pzErr = sqlite3_mprintf("malformed tokendata=... directive"); + rc = SQLITE_ERROR; + }else{ + pConfig->bTokendata = (zArg[0]=='1'); + } + return rc; + } + *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 745a5d9fa6..a2c6320719 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -101,7 +101,8 @@ struct Fts5ExprTerm { u8 bPrefix; /* True for a prefix term */ u8 bFirst; /* True if token must be first in column */ char *pTerm; /* Term data */ - int nTerm; /* Size of term in bytes */ + int nQueryTerm; /* Effective size of term in bytes */ + int nFullTerm; /* Size of term in bytes incl. tokendata */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; @@ -968,7 +969,7 @@ static int fts5ExprNearInitAll( p->pIter = 0; } rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, p->pTerm, p->nTerm, + pExpr->pIndex, p->pTerm, p->nQueryTerm, (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), pNear->pColset, @@ -1703,6 +1704,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; + Fts5Config *pConfig; int rc; }; @@ -1737,7 +1739,8 @@ static int fts5ParseTokenize( }else{ memset(pSyn, 0, (size_t)nByte); pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); - pSyn->nTerm = nToken; + pSyn->nFullTerm = pSyn->nQueryTerm = nToken; + if( pCtx->pConfig->bTokendata ) pSyn->nQueryTerm = strlen(pSyn->pTerm); memcpy(pSyn->pTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; @@ -1764,7 +1767,8 @@ static int fts5ParseTokenize( pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); - pTerm->nTerm = nToken; + pTerm->nFullTerm = pTerm->nQueryTerm = nToken; + if( pCtx->pConfig->bTokendata ) pTerm->nQueryTerm = strlen(pTerm->pTerm); } } @@ -1831,6 +1835,7 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; + sCtx.pConfig = pConfig; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ @@ -1880,8 +1885,7 @@ int sqlite3Fts5ExprClonePhrase( int rc = SQLITE_OK; /* Return code */ Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ - TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ - + TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */ pOrig = pExpr->apExprPhrase[iPhrase]; pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); if( rc==SQLITE_OK ){ @@ -1912,11 +1916,12 @@ int sqlite3Fts5ExprClonePhrase( if( pOrig->nTerm ){ int i; /* Used to iterate through phrase terms */ + sCtx.pConfig = pExpr->pConfig; for(i=0; rc==SQLITE_OK && inTerm; i++){ int tflags = 0; Fts5ExprTerm *p; for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ - rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm, p->nTerm, 0, 0); + rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm,p->nFullTerm,0,0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ @@ -2298,12 +2303,12 @@ static Fts5ExprNode *fts5ParsePhraseToAnd( fts5ExprPhraseFree(pPhrase); }else{ Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; + Fts5ExprTerm *pTo = &pPhrase->aTerm[0]; pParse->apPhrase[pParse->nPhrase++] = pPhrase; pPhrase->nTerm = 1; - pPhrase->aTerm[0].pTerm = sqlite3Fts5Strndup( - &pParse->rc, p->pTerm, p->nTerm - ); - pPhrase->aTerm[0].nTerm = p->nTerm; + pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm); + pTo->nQueryTerm = p->nQueryTerm; + pTo->nFullTerm = p->nFullTerm; pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) ); @@ -2488,7 +2493,7 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ /* Determine the maximum amount of space required. */ for(p=pTerm; p; p=p->pSynonym){ - nByte += pTerm->nTerm * 2 + 3 + 2; + nByte += pTerm->nQueryTerm * 2 + 3 + 2; } zQuoted = sqlite3_malloc64(nByte); @@ -2496,7 +2501,7 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ int i = 0; for(p=pTerm; p; p=p->pSynonym){ char *zIn = p->pTerm; - char *zEnd = &zIn[p->nTerm]; + char *zEnd = &zIn[p->nQueryTerm]; zQuoted[i++] = '"'; while( zInnTerm; iTerm++){ Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", - p->nTerm, p->pTerm + p->nQueryTerm, p->pTerm ); if( pPhrase->aTerm[iTerm].bPrefix ){ zRet = fts5PrintfAppend(zRet, "*"); @@ -2997,11 +3002,11 @@ static int fts5ExprPopulatePoslistsCb( if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; inPhrase; i++){ - Fts5ExprTerm *pTerm; + Fts5ExprTerm *pT; if( p->aPopulator[i].bOk==0 ) continue; - for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ - if( (pTerm->nTerm==nToken || (pTerm->nTermbPrefix)) - && memcmp(pTerm->pTerm, pToken, pTerm->nTerm)==0 + for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ + if( (pT->nFullTerm==nToken || (pT->nFullTermbPrefix)) + && memcmp(pT->pTerm, pToken, pT->nFullTerm)==0 ){ int rc = sqlite3Fts5PoslistWriterAppend( &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6cf30b5a00..b90a66b88f 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6037,11 +6037,12 @@ static void fts5MergePrefixLists( static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ + int bTokenscan, int iIdx, /* Index to scan for data */ u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ - Fts5Iter **ppIter /* OUT: New iterator */ + Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; @@ -6060,6 +6061,8 @@ static void fts5SetupPrefixIter( xAppend = fts5AppendPoslist; } + assert( bTokenscan==0 || iIdx==0 ); + aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); @@ -6075,6 +6078,12 @@ static void fts5SetupPrefixIter( int bNewTerm = 1; memset(&doclist, 0, sizeof(doclist)); + + /* If iIdx is non-zero, then it is the number of a prefix-index for + ** prefixes 1 character longer than the prefix being queried for. That + ** index contains all the doclists required, except for the one + ** corresponding to the prefix itself. That one is extracted from the + ** main term index here. */ if( iIdx!=0 ){ int dummy = 0; const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; @@ -6110,6 +6119,7 @@ static void fts5SetupPrefixIter( assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( bNewTerm ){ if( nTermnToken && pTerm[nToken]!=0x00 ) break; } if( p1->base.nData==0 ) continue; @@ -6438,7 +6448,7 @@ int sqlite3Fts5IndexQuery( } } - if( iIdx<=pConfig->nPrefix ){ + if( iIdx<=pConfig->nPrefix && (pConfig->bTokendata==0 || iIdx!=0) ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); @@ -6451,7 +6461,10 @@ int sqlite3Fts5IndexQuery( }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; - fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); + int bTokenscan = (iIdx==0); + fts5SetupPrefixIter( + p, bDesc, bTokenscan, iPrefixIdx, buf.p, nToken+1, pColset, &pRet + ); if( pRet==0 ){ assert( p->rc!=SQLITE_OK ); }else{ diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 9c012932da..001cad1de2 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -438,6 +438,20 @@ proc detail_is_none {} { detail_check ; expr {$::detail == "none"} } proc detail_is_col {} { detail_check ; expr {$::detail == "col" } } proc detail_is_full {} { detail_check ; expr {$::detail == "full"} } +proc foreach_tokenizer_mode {prefix script} { + set saved $::testprefix + foreach {d mapping} { + "" {} + "-origintext" {, tokenize="origintext unicode61", tokendata=1} + } { + set s [string map [list %TOKENIZER% $mapping] $script] + set ::testprefix "$prefix$d" + reset_db + sqlite3_fts5_register_origintext db + uplevel $s + } + set ::testprefix $saved +} #------------------------------------------------------------------------- # Convert a poslist of the type returned by fts5_test_poslist() to a diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index e1551fc516..a80a307a49 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -22,6 +22,7 @@ ifcapable !fts5 { } foreach_detail_mode $::testprefix { +foreach_tokenizer_mode $::testprefix { do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); @@ -44,7 +45,7 @@ do_execsql_test 1.1 { # do_execsql_test 2.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); @@ -73,8 +74,9 @@ do_execsql_test 2.4 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 3.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } foreach {i x y} { 1 {g f d b f} {h h e i a} @@ -97,8 +99,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 4.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { @@ -121,8 +124,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 5.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { @@ -145,8 +149,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 6.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } @@ -181,6 +186,7 @@ do_execsql_test 6.6 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db expr srand(0) do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); @@ -222,6 +228,7 @@ for {set i 1} {$i <= 10} {incr i} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); @@ -236,6 +243,7 @@ do_execsql_test 8.1 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db expr srand(0) @@ -280,8 +288,9 @@ for {set i 1} {$i <= 10} {incr i} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 10.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } set d10 { 1 {g f d b f} {h h e i a} @@ -314,19 +323,19 @@ do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') } #------------------------------------------------------------------------- # do_catchsql_test 11.1 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 column name: rank}} do_catchsql_test 11.2 { - CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL%); + CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 table name: rank}} do_catchsql_test 11.3 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 column name: rowid}} #------------------------------------------------------------------------- # do_execsql_test 12.1 { - CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } {} do_catchsql_test 12.2 { @@ -341,8 +350,9 @@ do_test 12.3 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 13.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o'); } {} @@ -365,8 +375,9 @@ do_execsql_test 13.6 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 14.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); WITH d(x,y) AS ( SELECT NULL, 'xyz xyz xyz xyz xyz xyz' @@ -449,8 +460,9 @@ do_catchsql_test 16.2 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO b2 VALUES('a'); INSERT INTO b2 VALUES('b'); INSERT INTO b2 VALUES('c'); @@ -466,8 +478,9 @@ do_test 17.2 { if {[string match n* %DETAIL%]==0} { reset_db + sqlite3_fts5_register_origintext db do_execsql_test 17.3 { - CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); INSERT INTO c2 VALUES('x x x', 'x x x'); SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; } {1} @@ -476,8 +489,9 @@ if {[string match n* %DETAIL%]==0} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL%); + CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL% %TOKENIZER%); INSERT INTO uio VALUES(NULL); INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; @@ -524,8 +538,8 @@ do_execsql_test 17.9 { #-------------------------------------------------------------------- # do_execsql_test 18.1 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); - CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL% %TOKENIZER%); + CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1 VALUES('abc*', NULL); INSERT INTO t2 VALUES(1, 'abcdefg'); } @@ -540,8 +554,9 @@ do_execsql_test 18.3 { # fts5 table in the temp schema. # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 19.0 { - CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1 VALUES('x y z'); INSERT INTO t1 VALUES('w x 1'); SELECT rowid FROM t1 WHERE t1 MATCH 'x'; @@ -551,8 +566,9 @@ do_execsql_test 19.0 { # Test that 6 and 7 byte varints can be read. # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 20.0 { - CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL% %TOKENIZER%); } set ::ids [list \ 0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43] @@ -570,7 +586,7 @@ do_test 20.1 { # do_execsql_test 21.0 { CREATE TEMP TABLE t8(a, b); - CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 21.1 { @@ -581,7 +597,7 @@ do_execsql_test 21.1 { } do_execsql_test 22.0 { - CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t9(rowid, x) VALUES(2, 'bbb'); BEGIN; INSERT INTO t9(rowid, x) VALUES(1, 'aaa'); @@ -596,7 +612,7 @@ do_execsql_test 22.1 { #------------------------------------------------------------------------- do_execsql_test 23.0 { - CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL% %TOKENIZER%); CREATE TABLE t11(x); } do_execsql_test 23.1 { @@ -608,7 +624,7 @@ do_execsql_test 23.2 { #------------------------------------------------------------------------- do_execsql_test 24.0 { - CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t12 VALUES('aaaa'); } do_execsql_test 24.1 { @@ -618,6 +634,9 @@ do_execsql_test 24.1 { INSERT INTO t12 VALUES('aaaa'); END; } +execsql_pp { + SELECT rowid, hex(block) FROM t12_data +} do_execsql_test 24.2 { INSERT INTO t12(t12) VALUES('integrity-check'); } @@ -627,7 +646,7 @@ do_execsql_test 24.3 { #------------------------------------------------------------------------- do_execsql_test 25.0 { - CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 25.1 { BEGIN; @@ -638,6 +657,7 @@ SELECT * FROM t13('BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB } +} } expand_all_sql db diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 155d74c025..3fd5f17e7b 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -112,9 +112,50 @@ do_execsql_test 2.5 { INSERT INTO ft(ft) VALUES('optimize'); } -proc b {x} { string map [list "\0" "."] $x } -db func b b +#------------------------------------------------------------------------- +reset_db + +sqlite3_fts5_register_origintext db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize="origintext unicode61"); + CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); + + INSERT INTO ft(rowid, x) VALUES(1, 'hello'); + INSERT INTO ft(rowid, x) VALUES(2, 'Hello'); + INSERT INTO ft(rowid, x) VALUES(3, 'HELLO'); +} + +#proc b {x} { string map [list "\0" "."] $x } +#db func b b +#execsql_pp { SELECT b(term) FROM vocab } + +do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1 +do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2 +do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, + tokenize="origintext unicode61", + tokendata=1 + ); + CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance); + + INSERT INTO ft2(rowid, x) VALUES(1, 'hello'); + INSERT INTO ft2(rowid, x) VALUES(2, 'Hello'); + INSERT INTO ft2(rowid, x) VALUES(3, 'HELLO'); + + INSERT INTO ft2(rowid, x) VALUES(10, 'helloooo'); +} + +#proc b {x} { string map [list "\0" "."] $x } +#db func b b #execsql_pp { SELECT b(term) FROM vocab } +do_execsql_test 3.1.1 { SELECT rowid FROM ft2('hello') } {1 2 3} +do_execsql_test 3.1.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} +do_execsql_test 3.1.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} + +do_execsql_test 3.1.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} + finish_test diff --git a/manifest b/manifest index e0c37bf1e2..92f3bfcf68 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sfor\sfts5\sexpression\sparser\smodule\sto\sallow\sembedded\s0x00\sbytes\sin\stokens. -D 2023-10-03T19:06:52.966 +C Add\sthe\stokendata=1\soption\sto\signore\strailing\stoken-data\swhen\squerying\san\sfts5\stable. +D 2023-10-11T21:08:12.656 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -88,13 +88,13 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h 05501612cc655504c5dce8ba765ab621d50fc478490089beaa0d75e00b23e520 -F ext/fts5/fts5Int.h 66a38b285e2b860baa29745d8eff27f5b0809268e7820498494d9acfaccf8a5c +F ext/fts5/fts5Int.h a21eb1cf036ac9eb943e45ed307762901ea86f0159bf0848baa2079a112ddc2f F ext/fts5/fts5_aux.c 572d5ec92ba7301df2fea3258576332f2f4d2dfd66d8263afd157d9deceac480 F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 -F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081 -F ext/fts5/fts5_expr.c cc215d39714b428523d2f2ef42b713c83095a28a67bc7f6f2dc4ac036a29f460 +F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf +F ext/fts5/fts5_expr.c fd091d0558fda2517602ed5886ec615ce3e1bd76fb0bb0e5d1aa85ba8db287a8 F ext/fts5/fts5_hash.c 76765856397eff56f526b0640b23a1677d737d35e07bc00e4b4b2e0fc5fda60d -F ext/fts5/fts5_index.c e472083d371f420d52ec80445b9d2a99b16b23548205cb4064ddcd41bd79f63e +F ext/fts5/fts5_index.c 79a8e45771d0be24f0399b12268299f132ce0970ade941ba8a2d40b1d1aee4d7 F ext/fts5/fts5_main.c 799ec88d2309055f6406bddb0bd6ed80148c5da5eb14594c3c5309a6e944d489 F ext/fts5/fts5_storage.c 3c9b41fce41b6410f2e8f82eb035c6a29b2560483f773e6dc98cf3cb2e4ddbb5 F ext/fts5/fts5_tcl.c 0d2bb0ff7bf6ee136015be118167f0bd956ddd05a8f02c68bd34299b50648f9f @@ -106,8 +106,8 @@ F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d0988 F ext/fts5/fts5_vocab.c 12138e84616b56218532e3e8feb1d3e0e7ae845e33408dbe911df520424dc9d6 F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl a9de9c2209cc4e7ae3c753e783504e67206c6c1467d08f209cd0c5923d3e8d8b -F ext/fts5/test/fts5aa.test ba5158eba7d61359becdfca895ef471072c7bf7b20e5e60dcb4d024c8419c926 +F ext/fts5/test/fts5_common.tcl 8b1848ac2baad10e444e4183034a52050b52d20b3796d9d30e78f01ab0d05583 +F ext/fts5/test/fts5aa.test 4db81519863244a3cab35795fe65ab6b592e7970c7409eba098b23ebbfc08d95 F ext/fts5/test/fts5ab.test bd932720c748383277456b81f91bc00453de2174f9762cd05f95d0495dc50390 F ext/fts5/test/fts5ac.test a7aa7e1fefc6e1918aa4d3111d5c44a09177168e962c5fd2cca9620de8a7ed6d F ext/fts5/test/fts5ad.test e8cf959dfcd57c8e46d6f5f25665686f3b6627130a9a981371dafdf6482790de @@ -187,7 +187,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 3e1ac3230f65a0d644e9bf0738bebb09b4db9d9f123e1307d8630e42269b4afb +F ext/fts5/test/fts5origintext.test 646df137f1aa5b3d7032374ebe82bfdbe88d9f825d73ce8d44bead480317a9c5 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2123,8 +2123,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 -R 5cce41f02eae121cb66e72942ef56113 +P 342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f +R 28168382a793dd4cf732de7d6442ef72 U dan -Z 80b8e2664e768ed2ac03913fcf0180ea +Z cded15b3b6aeefc28aa13f7856e47e8d # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 4798938837..8bc375eea2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f \ No newline at end of file +122935182ad5869ce3a4c6d796c38a0509f6f3384dd1b3e60a3f2f0f366cc5f5 \ No newline at end of file