#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
#define FTS5INDEX_QUERY_SKIPHASH 0x0040
#define FTS5INDEX_QUERY_NOTOKENDATA 0x0080
+#define FTS5INDEX_QUERY_SCANONETERM 0x0100
/*
** Create/destroy an Fts5Index object.
int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*);
int sqlite3Fts5ExprInstToken(Fts5Expr*, int, int, int, int, const char**, int*);
+void sqlite3Fts5ExprClearTokens(Fts5Expr*);
/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
sCtx.aPopulator = aPopulator;
sCtx.iOff = (((i64)iCol) << 32) - 1;
- /* If this is a tokendata=1 table, clear out the hash tables of
- ** full-terms. */
- if( pConfig->bTokendata ){
- for(i=0; i<pExpr->nPhrase; i++){
- Fts5ExprTerm *pT;
- for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){
- sqlite3Fts5IndexIterClearTokendata(pT->pIter);
- }
- }
- }
-
for(i=0; i<pExpr->nPhrase; i++){
Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
Fts5Colset *pColset = pNode->pNear->pColset;
return sqlite3Fts5IterToken(pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut);
}
+/*
+** Clear the token mappings for all Fts5IndexIter objects mannaged by
+** the expression passed as the only argument.
+*/
+void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){
+ int ii;
+ for(ii=0; ii<pExpr->nPhrase; ii++){
+ Fts5ExprTerm *pT;
+ for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){
+ sqlite3Fts5IndexIterClearTokendata(pT->pIter);
+ }
+ }
+}
+
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
sqlite3_stmt *pIdxSelect;
+ sqlite3_stmt *pIdxNextSelect;
int nRead; /* Total number of blocks read */
sqlite3_stmt *pDeleteFromIdx;
fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
}
- if( p->rc==SQLITE_OK && bGe==0 ){
+ if( p->rc==SQLITE_OK && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM)) ){
pIter->flags |= FTS5_SEGITER_ONETERM;
if( pIter->pLeaf ){
if( flags & FTS5INDEX_QUERY_DESC ){
);
}
+
+/*
+** SQL used by fts5SegIterNextInit() to find the page to open.
+*/
+static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){
+ if( p->pIdxNextSelect==0 ){
+ Fts5Config *pConfig = p->pConfig;
+ fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf(
+ "SELECT pgno FROM '%q'.'%q_idx' WHERE "
+ "segid=? AND term>? ORDER BY term ASC LIMIT 1",
+ pConfig->zDb, pConfig->zName
+ ));
+
+ }
+ return p->pIdxNextSelect;
+}
+
+/*
+** This is similar to fts5SegIterSeekInit(), except that it initializes
+** the segment iterator to point to the first term following the page
+** with pToken/nToken on it.
+*/
+static void fts5SegIterNextInit(
+ Fts5Index *p,
+ const char *pTerm, int nTerm,
+ Fts5StructureSegment *pSeg, /* Description of segment */
+ Fts5SegIter *pIter /* Object to populate */
+){
+ int iPg = -1; /* Page of segment to open */
+ int bDlidx = 0;
+ sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
+
+ pSel = fts5IdxNextStmt(p);
+ if( pSel ){
+ assert( p->rc==SQLITE_OK );
+ sqlite3_bind_int(pSel, 1, pSeg->iSegid);
+ sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC);
+
+ if( sqlite3_step(pSel)==SQLITE_ROW ){
+ i64 val = sqlite3_column_int64(pSel, 0);
+ iPg = (int)(val>>1);
+ bDlidx = (val & 0x0001);
+ }
+ p->rc = sqlite3_reset(pSel);
+ if( p->rc ) return;
+ }
+
+ memset(pIter, 0, sizeof(*pIter));
+ pIter->pSeg = pSeg;
+ pIter->flags |= FTS5_SEGITER_ONETERM;
+ if( iPg>=0 ){
+ pIter->iLeafPgno = iPg - 1;
+ fts5SegIterNextPage(p, pIter);
+ fts5SegIterSetNext(p, pIter);
+ fts5SegIterAllocTombstone(p, pIter);
+ }
+ if( pIter->pLeaf ){
+ const u8 *a = pIter->pLeaf->p;
+ int iTermOff = 0;
+
+ pIter->iPgidxOff = pIter->pLeaf->szLeaf;
+ pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff);
+ pIter->iLeafOffset = iTermOff;
+ fts5SegIterLoadTerm(p, pIter, 0);
+ fts5SegIterLoadNPos(p, pIter);
+ if( bDlidx ) fts5SegIterLoadDlidx(p, pIter);
+
+ assert( p->rc!=SQLITE_OK ||
+ fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0
+ );
+ }
+}
+
/*
** Initialize the object pIter to point to term pTerm/nTerm within the
** in-memory hash table. If there is no such term in the hash-table, the
sqlite3_finalize(p->pIdxWriter);
sqlite3_finalize(p->pIdxDeleter);
sqlite3_finalize(p->pIdxSelect);
+ sqlite3_finalize(p->pIdxNextSelect);
sqlite3_finalize(p->pDataVersion);
sqlite3_finalize(p->pDeleteFromIdx);
sqlite3Fts5HashFree(p->pHash);
if( p->rc==SQLITE_OK ){
if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){
int nAlloc = pIn ? pIn->nIterAlloc*2 : 16;
- int nByte = nAlloc * sizeof(Fts5Iter*);
+ int nByte = nAlloc * sizeof(Fts5Iter*) + sizeof(Fts5TokenDataIter);
Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte);
if( pNew==0 ){
}else{
pRet->apIter[pRet->nIter++] = pAppend;
}
+ assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc );
return pRet;
}
}
}
+/*
+** This function sets up an iterator to use for a non-prefix query on a
+** tokendata=1 table.
+*/
static Fts5Iter *fts5SetupTokendataIter(
Fts5Index *p, /* FTS index to query */
const u8 *pToken, /* Buffer containing query term */
Fts5Iter *pRet = 0;
Fts5TokenDataIter *pSet = 0;
Fts5Structure *pStruct = 0;
- const int flags = FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_SCAN;
+ const int flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN;
Fts5Buffer bSeek = {0, 0, 0};
Fts5Buffer *pSmall = 0;
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
- fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter);
+ int bDone = 0;
- pNewIter++;
if( pPrevIter ){
if( fts5BufferCompare(pSmall, &pPrevIter->term) ){
- fts5SegIterSetEOF(pPrevIter);
+ memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter));
+ memset(pPrevIter, 0, sizeof(Fts5SegIter));
+ bDone = 1;
+ }else if( pPrevIter->pLeaf
+ && pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf
+ ){
+ fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter);
+ bDone = 1;
}
- pPrevIter++;
}
+
+ if( bDone==0 ){
+ fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter);
+ }
+
+ pNewIter++;
+ if( pPrevIter ) pPrevIter++;
}
}
fts5TokendataSetTermIfEof(pPrev, pSmall);
- pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
+ pNew->bSkipEmpty = 1;
pNew->pColset = pColset;
fts5IterSetOutputCb(&p->rc, pNew);
*/
void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
- assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL );
if( pIter->pTokenDataIter ){
pIter->pTokenDataIter->nMap = 0;
}
);
assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) );
+ /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table,
+ ** clear any token mappings accumulated at the fts5_index.c level. In
+ ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH,
+ ** we need to retain the mappings for the entire query. */
+ if( pCsr->ePlan==FTS5_PLAN_MATCH
+ && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata
+ ){
+ sqlite3Fts5ExprClearTokens(pCsr->pExpr);
+ }
+
if( pCsr->ePlan<3 ){
int bSkip = 0;
if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
INSERT INTO ft VALUES('WORLD');
}
-breakpoint
do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3}
do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6}
do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9}
SELECT fts5_test_poslist(ft) FROM ft('hello');
} {{0.0.0 0.0.2 0.0.4}}
-breakpoint
do_execsql_test 1.3 {
SELECT
insttoken(ft, 0, 0),
FROM ft('hello') ORDER BY rank;
} {hello.Hello hello.HELLO hello}
+ do_execsql_test 1.5 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
+ );
+ INSERT INTO ft2(rowid, x) VALUES(1, 'ONE one two three ONE');
+ INSERT INTO ft2(rowid, x) VALUES(2, 'TWO one two three TWO');
+ INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE');
+ }
+
+ do_execsql_test 1.6 {
+ SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank;
+ } {three.THREE 3 three 1 three 2}
}
finish_test
--- /dev/null
+# 2023 November 22
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focused on phrase queries.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5origintext4
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+sqlite3_fts5_register_origintext db
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1
+ );
+}
+
+do_execsql_test 1.1 {
+ BEGIN;
+ INSERT INTO ft SELECT 'the first thing';
+
+ WITH s(i) AS (
+ SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<90000
+ )
+ INSERT INTO ft SELECT 'The second thing' FROM s;
+
+ INSERT INTO ft SELECT 'the first thing';
+ COMMIT;
+ INSERT INTO ft(ft) VALUES('optimize');
+}
+
+foreach {tn sql expr} {
+ 1 { SELECT rowid FROM ft('the') } {$mem > 250000}
+ 2 { SELECT rowid FROM ft('first') } {$mem < 50000}
+ 3 { SELECT rowid FROM ft('the first') } {$mem < 50000}
+} {
+ db close
+ sqlite3 db test.db
+ sqlite3_fts5_register_origintext db
+
+ execsql $sql
+ do_test 1.2.$tn {
+ set mem [lindex [sqlite3_db_status db CACHE_USED 0] 1]
+ expr $expr
+ } 1
+}
+
+proc b {x} { string map [list "\0" "."] $x }
+db func b b
+# execsql_pp { SELECT segid, b(term), pgno from ft_idx }
+
+finish_test
+
-C Remove\sold\scode\sfor\stokendata=1\squeries.
-D 2023-12-01T20:37:11.688
+C Ensure\sthat\stokendata=1\squeries\savoid\sloading\slarge\sdoclists\sfor\squeries\slike\s"common\sAND\suncommon",\sjust\sas\stokendata=0\squeries\sdo.
+D 2023-12-02T17:32:16.568
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h 5e5630fc81e212f658afaa5b2650dac939d2729d0723aef1eeaff908f1725648
-F ext/fts5/fts5Int.h 2dc73393460e5c5cab67adc7e32e1387cc225b57e05f629d490e65cddea1a8c5
+F ext/fts5/fts5Int.h 285118aa6dfccb382e84eaeb9f7bec334e4f7104efa9303240605447003445c9
F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad
F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5
F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf
-F ext/fts5/fts5_expr.c aac8026aedf56c9a6e32b31c89f9dd7e5548378457085093307d06be14f1a176
+F ext/fts5/fts5_expr.c f83259b52b7b3e76768b835fe155cb7e345affdfafb96574372b4127d5f5496a
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
-F ext/fts5/fts5_index.c 2296bcd6736eaf093212892474619dfdb7ac1e262732e2b72cb528172c0b13d6
-F ext/fts5/fts5_main.c 20596de592af135f68b9be875f0a28715f6562bbdedd215e1c89eac1b42e97f9
+F ext/fts5/fts5_index.c a02b6ff2d391dd9c2119f437eba1e8af5ac4b2f1798c7c39a93d73de95ad2337
+F ext/fts5/fts5_main.c 075995302198fe6f591fdbbedd415dfac564a9bfc20aea81e6fa0503b2d94af0
F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d
F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785
F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca
F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0703aff5981625b527e62
-F ext/fts5/test/fts5origintext2.test 3259b331073fec918e02fd4d14d50586f9a3531da047a2a8f4624983eb654229
-F ext/fts5/test/fts5origintext3.test cb0f5835f8dff5954ee20570b68ee520cf04a08f6f9ca967b9d01d27e532da37
+F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969
+F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a
+F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871
F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b
F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a
F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 8258967411d3ff212424b25fec79ded0d8ae83e773cd35a0bbf300c94923f25b
-R d3ee6e0cbe8554b06da39b239b5142bc
+P b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b
+R aa740197e8da931b3ffaf4ddf853a1ed
U dan
-Z cbe01276ff2d44c618e4cc899051c453
+Z 62655ccf950056e7477fcaff4611778d
# Remove this line to create a well-formed Fossil manifest.
-b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b
\ No newline at end of file
+7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e
\ No newline at end of file