From b5effc0605abcf3f67a4c62ef42d0ded03a214fd Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 Dec 2023 20:09:59 +0000 Subject: [PATCH] Different approach to querying a tokendata=1 table. Saves cpu and memory. FossilOrigin-Name: c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e --- ext/fts5/fts5Int.h | 19 +- ext/fts5/fts5_expr.c | 6 +- ext/fts5/fts5_index.c | 553 ++++++++++++++++++++++++++--- ext/fts5/fts5_main.c | 7 +- ext/fts5/test/fts5origintext.test | 10 +- ext/fts5/test/fts5origintext2.test | 1 + ext/fts5/test/fts5origintext3.test | 9 + manifest | 24 +- manifest.uuid | 2 +- 9 files changed, 541 insertions(+), 90 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 9d2622448d..cee9528858 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -385,17 +385,18 @@ struct Fts5IndexIter { /* ** Values used as part of the flags argument passed to IndexQuery(). */ -#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ -#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ -#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ -#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ +#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ +#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ +#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ +#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* The following are used internally by the fts5_index.c module. They are ** defined here only to make it easier to avoid clashes with the flags ** above. */ -#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 -#define FTS5INDEX_QUERY_NOOUTPUT 0x0020 -#define FTS5INDEX_QUERY_SKIPHASH 0x0040 +#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 +#define FTS5INDEX_QUERY_NOOUTPUT 0x0020 +#define FTS5INDEX_QUERY_SKIPHASH 0x0040 +#define FTS5INDEX_QUERY_NOTOKENDATA 0x0080 /* ** Create/destroy an Fts5Index object. @@ -467,7 +468,7 @@ int sqlite3Fts5StructureTest(Fts5Index*, void*); /* ** Used by xInstToken() and xPhraseToken(). */ -int sqlite3Fts5IterToken(Fts5IndexIter*, int, int, const char**, int*); +int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*); /* ** Insert or remove data to or from the index. Each time a document is @@ -548,7 +549,7 @@ int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); /* Used to populate hash tables for xInstToken in detail=none/column mode. */ void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); int sqlite3Fts5IndexIterWriteTokendata( - Fts5IndexIter*, const char*, int, int iCol, int iOff + Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff ); /* diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 6589d1b3e6..89e7cbf364 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3003,6 +3003,7 @@ static int fts5ExprPopulatePoslistsCb( Fts5Expr *pExpr = p->pExpr; int i; int nQuery = nToken; + i64 iRowid = pExpr->pRoot->iRowid; UNUSED_PARAM2(iUnused1, iUnused2); @@ -3025,7 +3026,7 @@ static int fts5ExprPopulatePoslistsCb( int iCol = p->iOff>>32; int iTokOff = p->iOff & 0x7FFFFFFF; rc = sqlite3Fts5IndexIterWriteTokendata( - pT->pIter, pToken, nToken, iCol, iTokOff + pT->pIter, pToken, nToken, iRowid, iCol, iTokOff ); } if( rc ) return rc; @@ -3210,6 +3211,7 @@ int sqlite3Fts5ExprInstToken( ){ Fts5ExprPhrase *pPhrase = 0; Fts5IndexIter *pIter = 0; + i64 iRowid = pExpr->pRoot->iRowid; if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ return SQLITE_RANGE; @@ -3220,6 +3222,6 @@ int sqlite3Fts5ExprInstToken( } pIter = pPhrase->aTerm[iToken].pIter; - return sqlite3Fts5IterToken(pIter, iCol, iOff+iToken, ppOut, pnOut); + return sqlite3Fts5IterToken(pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 4b7c8d3335..5f39a1e764 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -327,6 +327,8 @@ typedef struct Fts5TokenMapEntry Fts5TokenMapEntry; typedef struct Fts5TokenMapToken Fts5TokenMapToken; typedef struct Fts5TokenMap Fts5TokenMap; +typedef struct Fts5TokenDataIter Fts5TokenDataIter; + struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ int nn; /* Size of record in bytes */ @@ -594,9 +596,16 @@ struct Fts5SegIter { ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. +** +** pColset: +** If not NULL, points to an object containing a set of column indices. +** Only matches that occur in one of these columns will be returned. +** The Fts5Iter does not own the Fts5Colset object, and so it is not +** freed when the iterator is closed - it is owned by the upper layer. */ struct Fts5Iter { Fts5IndexIter base; /* Base class containing output vars */ + Fts5TokenDataIter *pTokenDataIter; Fts5Index *pIndex; /* Index that owns this iterator */ Fts5Buffer poslist; /* Buffer containing current poslist */ @@ -3780,6 +3789,28 @@ static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ } } +static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ + int iIter; + for(iIter=pIter->nSeg-1; iIter>0; iIter--){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){ + Fts5SegIter *pSeg = &pIter->aSeg[iEq]; + if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); + fts5MultiIterAdvanced(p, pIter, iEq, iIter); + } + } + fts5MultiIterSetEof(pIter); + fts5AssertMultiIterSetup(p, pIter); + + if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter)) + || fts5MultiIterIsDeleted(pIter) + ){ + fts5MultiIterNext(p, pIter, 0, 0); + }else if( pIter->base.bEof==0 ){ + Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + pIter->xSetOutputs(pIter, pSeg); + } +} /* ** Allocate a new Fts5Iter object. @@ -3866,26 +3897,7 @@ static void fts5MultiIterNew( ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ - for(iIter=pNew->nSeg-1; iIter>0; iIter--){ - int iEq; - if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ - Fts5SegIter *pSeg = &pNew->aSeg[iEq]; - if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); - fts5MultiIterAdvanced(p, pNew, iEq, iIter); - } - } - fts5MultiIterSetEof(pNew); - fts5AssertMultiIterSetup(p, pNew); - - if( (pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew)) - || fts5MultiIterIsDeleted(pNew) - ){ - fts5MultiIterNext(p, pNew, 0, 0); - }else if( pNew->base.bEof==0 ){ - Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; - pNew->xSetOutputs(pNew, pSeg); - } - + fts5MultiIterFinishSetup(p, pNew); }else{ fts5MultiIterFree(pNew); *ppOut = 0; @@ -6718,6 +6730,431 @@ int sqlite3Fts5IndexWrite( return rc; } +/* +** pToken points to a buffer of size nToken bytes containing a search +** term, including the index number at the start, used on a tokendata=1 +** table. This function returns true if the term in buffer pBuf matches +** token pToken/nToken. +*/ +static int fts5IsTokendataPrefix( + Fts5Buffer *pBuf, + const u8 *pToken, + int nToken +){ + return ( + pBuf->n>=nToken + && 0==memcmp(pBuf->p, pToken, nToken) + && (pBuf->n==nToken || pBuf->p[nToken]==0x00) + ); +} + +/* +** Ensure the segment-iterator passed as the only argument points to EOF. +*/ +static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ + fts5DataRelease(pSeg->pLeaf); + pSeg->pLeaf = 0; +} + +typedef struct Fts5TokenDataMap Fts5TokenDataMap; +struct Fts5TokenDataMap { + i64 iRowid; + i64 iPos; + int iIter; +}; + +struct Fts5TokenDataIter { + int nIter; + int nIterAlloc; + + int nMap; + int nMapAlloc; + Fts5TokenDataMap *aMap; + + Fts5PoslistReader *aPoslistReader; + Fts5Iter *apIter[1]; +}; + +static Fts5TokenDataIter *fts5AppendTokendataIter( + Fts5Index *p, + Fts5TokenDataIter *pIn, + Fts5Iter *pAppend +){ + Fts5TokenDataIter *pRet = pIn; + + if( p->rc==SQLITE_OK ){ + if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){ + int nAlloc = pIn ? pIn->nIterAlloc*2 : 16; + int nByte = nAlloc * sizeof(Fts5Iter*); + Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte); + + if( pNew==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + if( pIn==0 ) memset(pNew, 0, nByte); + pRet = pNew; + pNew->nIterAlloc = nAlloc; + } + } + } + if( p->rc ){ + sqlite3Fts5IterClose((Fts5IndexIter*)pAppend); + }else{ + pRet->apIter[pRet->nIter++] = pAppend; + } + + return pRet; +} + +static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ + if( pSet ){ + int ii; + for(ii=0; iinIter; ii++){ + fts5MultiIterFree(pSet->apIter[ii]); + } + sqlite3_free(pSet->aPoslistReader); + sqlite3_free(pSet->aMap); + sqlite3_free(pSet); + } +} + +static int fts5TokendataIterToken( + Fts5Iter *pIter, + i64 iRowid, + int iCol, int iOff, + const char **ppOut, int *pnOut +){ + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + Fts5TokenDataMap *aMap = pT->aMap; + i64 iPos = (((i64)iCol)<<32) + iOff; + + int i1 = 0; + int i2 = pT->nMap; + int iTest = 0; + + while( i2>i1 ){ + iTest = (i1 + i2) / 2; + + if( aMap[iTest].iRowidiRowid ){ + i2 = iTest; + }else{ + if( aMap[iTest].iPosiPos ){ + i2 = iTest; + }else{ + break; + } + } + } + + if( i2>i1 ){ + Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; + *ppOut = (const char*)pMap->aSeg[0].term.p+1; + *pnOut = pMap->aSeg[0].term.n-1; + } + + return SQLITE_OK; +} + +static void fts5TokendataIterAppendMap( + Fts5Index *p, + Fts5TokenDataIter *pT, + int iIter, + i64 iRowid, + i64 iPos +){ + if( p->rc==SQLITE_OK ){ + if( pT->nMap==pT->nMapAlloc ){ + int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; + int nByte = nNew * sizeof(Fts5TokenDataMap); + Fts5TokenDataMap *aNew; + + aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pT->aMap[pT->nMap].iRowid = iRowid; + pT->aMap[pT->nMap].iPos = iPos; + pT->aMap[pT->nMap].iIter = iIter; + pT->nMap++; + } +} + +static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ + int ii; + int nHit = 0; + i64 iRowid = SMALLEST_INT64; + int iMin = 0; + + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + + pIter->base.nData = 0; + pIter->base.pData = 0; + + for(ii=0; iinIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( p->base.bEof==0 ){ + if( nHit==0 || p->base.iRowidbase.iRowid; + nHit = 1; + pIter->base.pData = p->base.pData; + pIter->base.nData = p->base.nData; + iMin = ii; + }else if( p->base.iRowid==iRowid ){ + nHit++; + } + } + } + + if( nHit==0 ){ + pIter->base.bEof = 1; + }else{ + int eDetail = pIter->pIndex->pConfig->eDetail; + pIter->base.bEof = 0; + pIter->base.iRowid = iRowid; + + if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){ + fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1); + }else + if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){ + int nReader = 0; + int nByte = 0; + i64 iPrev = 0; + + /* Allocate array of iterators if they are not already allocated. */ + if( pT->aPoslistReader==0 ){ + pT->aPoslistReader = sqlite3Fts5MallocZero( + &pIter->pIndex->rc, sizeof(Fts5PoslistReader) * pT->nIter + ); + if( pT->aPoslistReader==0 ) return; + } + + /* Populate an iterator for each poslist that will be merged */ + for(ii=0; iinIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( iRowid==p->base.iRowid ){ + sqlite3Fts5PoslistReaderInit( + p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++] + ); + nByte += p->base.nData; + } + } + + /* Ensure the output buffer is large enough */ + if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10) ){ + return; + } + + /* Ensure the token-mapping is large enough */ + if( eDetail==FTS5_DETAIL_FULL && pT->nMapAlloc<(pT->nMap + nByte) ){ + int nNew = (pT->nMapAlloc + nByte) * 2; + Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_realloc( + pT->aMap, nNew*sizeof(Fts5TokenDataMap) + ); + if( aNew==0 ){ + pIter->pIndex->rc = SQLITE_NOMEM; + return; + } + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pIter->poslist.n = 0; + + while( 1 ){ + i64 iMinPos = LARGEST_INT64; + + /* Find smallest position */ + iMin = 0; + for(ii=0; iiaPoslistReader[ii]; + if( pReader->bEof==0 ){ + if( pReader->iPosiPos; + iMin = ii; + } + } + } + + /* If all readers were at EOF, break out of the loop. */ + if( iMinPos==LARGEST_INT64 ) break; + + sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos); + sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]); + + if( eDetail==FTS5_DETAIL_FULL ){ + pT->aMap[pT->nMap].iPos = iMinPos; + pT->aMap[pT->nMap].iIter = iMin; + pT->aMap[pT->nMap].iRowid = iRowid; + pT->nMap++; + } + } + + pIter->base.pData = pIter->poslist.p; + pIter->base.nData = pIter->poslist.n; + } + } +} + +static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ + int ii; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + + for(ii=0; iinIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( p->base.bEof==0 + && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowidpIndex, p, bFrom, iFrom); + while( bFrom && p->base.bEof==0 + && p->base.iRowidpIndex->rc==SQLITE_OK + ){ + fts5MultiIterNext(p->pIndex, p, 0, 0); + } + } + } + + fts5IterSetOutputsTokendata(pIter); +} + +static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ + if( pIter && pIter->aSeg[0].pLeaf==0 ){ + fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p); + } +} + +static Fts5Iter *fts5SetupTokendataIter( + Fts5Index *p, /* FTS index to query */ + int bDesc, /* True for "ORDER BY rowid DESC" */ + const u8 *pToken, /* Buffer containing query term */ + int nToken, /* Size of buffer pToken in bytes */ + Fts5Colset *pColset /* Colset to filter on */ +){ + Fts5Iter *pRet = 0; + Fts5TokenDataIter *pSet = 0; + Fts5Structure *pStruct = 0; + const int flags = FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_SCAN; + + assert( bDesc==0 ); + + Fts5Buffer bSeek = {0, 0, 0}; + Fts5Buffer *pSmall = 0; + + fts5IndexFlush(p); + pStruct = fts5StructureRead(p); + + while( 1 ){ + Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0; + Fts5Iter *pNew = 0; + Fts5SegIter *pNewIter = 0; + Fts5SegIter *pPrevIter = 0; + + int iLvl, iSeg, ii; + + pNew = fts5MultiIterAlloc(p, pStruct->nSegment); + if( pNew==0 ) break; + + if( pSmall ){ + fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p); + fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0"); + }else{ + fts5BufferSet(&p->rc, &bSeek, nToken, pToken); + } + + pNewIter = &pNew->aSeg[0]; + pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0); + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); + + pNewIter++; + if( pPrevIter ){ + if( fts5BufferCompare(pSmall, &pPrevIter->term) ){ + fts5SegIterSetEOF(pPrevIter); + } + pPrevIter++; + } + } + } + fts5TokendataSetTermIfEof(pPrev, pSmall); + + pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); + pNew->pColset = pColset; + fts5IterSetOutputCb(&p->rc, pNew); + + /* Loop through all segments in the new iterator. Find the smallest + ** term that any segment-iterator points to. Iterator pNew will be + ** used for this term. Also, set any iterator that points to a term that + ** does not match pToken/nToken to point to EOF */ + pSmall = 0; + for(ii=0; iinSeg; ii++){ + Fts5SegIter *pII = &pNew->aSeg[ii]; + if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){ + fts5SegIterSetEOF(pII); + } + if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){ + pSmall = &pII->term; + } + } + + /* If pSmall is still NULL at this point, then the new iterator does + ** not point to any terms that match the query. So delete it and break + ** out of the loop - all required iterators have been collected. */ + if( pSmall==0 ){ + sqlite3Fts5IterClose((Fts5IndexIter*)pNew); + break; + } + + /* Append this iterator to the set and continue. */ + pSet = fts5AppendTokendataIter(p, pSet, pNew); + } + + if( p->rc==SQLITE_OK && pSet ){ + int ii; + for(ii=0; iinIter; ii++){ + Fts5Iter *pIter = pSet->apIter[ii]; + int iSeg; + for(iSeg=0; iSegnSeg; iSeg++){ + pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM; + } + fts5MultiIterFinishSetup(p, pIter); + } + } + + if( p->rc==SQLITE_OK ){ + pRet = fts5MultiIterAlloc(p, 0); + } + if( pRet ){ + pRet->pTokenDataIter = pSet; + if( pSet ){ + fts5IterSetOutputsTokendata(pRet); + }else{ + pRet->base.bEof = 1; + } + }else{ + fts5TokendataIterDelete(pSet); + } + + fts5StructureRelease(pStruct); + fts5BufferFree(&bSeek); + return pRet; +} + + /* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. @@ -6739,6 +7176,7 @@ int sqlite3Fts5IndexQuery( if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ int iIdx = 0; /* Index to search */ int iPrefixIdx = 0; /* +1 prefix index */ + int bTokendata = (flags&FTS5INDEX_QUERY_NOTOKENDATA)?0:pConfig->bTokendata; if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); /* Figure out which index to search and set iIdx accordingly. If this @@ -6766,7 +7204,7 @@ int sqlite3Fts5IndexQuery( } } - if( iIdx<=pConfig->nPrefix && (pConfig->bTokendata==0 || iIdx!=0) ){ + if( iIdx<=pConfig->nPrefix && (bTokendata==0 || iIdx!=0) ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); @@ -6776,6 +7214,10 @@ int sqlite3Fts5IndexQuery( ); fts5StructureRelease(pStruct); } + }else if( bTokendata && iIdx==0 ){ + int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; + buf.p[0] = '0'; + pRet = fts5SetupTokendataIter(p, bDesc, buf.p, nToken+1, pColset); }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; @@ -6816,7 +7258,11 @@ int sqlite3Fts5IndexQuery( int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->rc==SQLITE_OK ); - fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); + if( pIter->pTokenDataIter ){ + fts5TokendataIterNext(pIter, 0, 0); + }else{ + fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); + } return fts5IndexReturn(pIter->pIndex); } @@ -6849,7 +7295,11 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); + if( pIter->pTokenDataIter ){ + fts5TokendataIterNext(pIter, 1, iMatch); + }else{ + fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); + } return fts5IndexReturn(pIter->pIndex); } @@ -6869,12 +7319,18 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ */ int sqlite3Fts5IterToken( Fts5IndexIter *pIndexIter, + i64 iRowid, int iCol, int iOff, const char **ppOut, int *pnOut ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5TokenMap *pMap = pIter->pTokenMap; + + if( pIter->pTokenDataIter ){ + return fts5TokendataIterToken(pIter, iRowid, iCol, iOff, ppOut, pnOut); + } + if( pMap ){ if( pMap->bHashed==0 ){ Fts5Index *p = pIter->pIndex; @@ -6899,53 +7355,29 @@ int sqlite3Fts5IterToken( void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL ); - if( pIter->pTokenMap ){ - pIter->pTokenMap->nEntry = 0; + if( pIter->pTokenDataIter ){ + pIter->pTokenDataIter->nMap = 0; } } int sqlite3Fts5IndexIterWriteTokendata( Fts5IndexIter *pIndexIter, const char *pToken, int nToken, - int iCol, int iOff + i64 iRowid, int iCol, int iOff ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; Fts5Index *p = pIter->pIndex; + assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL ); - if( pIter->pTokenMap==0 ){ - pIter->pTokenMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); - } - if( p->rc==SQLITE_OK ){ - Fts5TokenMap *pMap = pIter->pTokenMap; + if( pT ){ int ii; - for(ii=0; iinToken; ii++){ - if( nToken==pMap->aToken[ii].nTerm - && 0==memcmp(pMap->aToken[ii].pTerm, pToken, nToken) - ){ - break; - } - } - if( ii==pMap->nToken ){ - fts5TokenMapTerm(p, pMap, (const u8*)pToken, nToken); - } - if( pMap->nEntry>=pMap->nEntryAlloc ){ - int nNew = pMap->nEntryAlloc ? pMap->nEntryAlloc*2 : 32; - Fts5TokenMapEntry *aNew = (Fts5TokenMapEntry*)sqlite3_realloc( - pMap->aEntry, nNew * sizeof(Fts5TokenMapEntry) - ); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - pMap->aEntry = aNew; - pMap->nEntryAlloc = nNew; - } + for(ii=0; iinIter; ii++){ + Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; + if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; } - if( p->rc==SQLITE_OK ){ - Fts5TokenMapEntry *pEntry = &pMap->aEntry[pMap->nEntry++]; - pEntry->iRowid = pIndexIter->iRowid; - pEntry->iCol = iCol; - pEntry->iOff = iOff; - pEntry->iTok = ii+1; + if( iinIter ){ + fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff); } } return fts5IndexReturn(p); @@ -6958,6 +7390,7 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ if( pIndexIter ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *pIndex = pIter->pIndex; + fts5TokendataIterDelete(pIter->pTokenDataIter); fts5MultiIterFree(pIter); sqlite3Fts5IndexCloseReader(pIndex); } @@ -7465,7 +7898,9 @@ static int fts5QueryCksum( int eDetail = p->pConfig->eDetail; u64 cksum = *pCksum; Fts5IndexIter *pIter = 0; - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); + int rc = sqlite3Fts5IndexQuery( + p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA), 0, &pIter + ); while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){ i64 rowid = pIter->iRowid; diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 9f19b24b88..34050474f8 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -656,12 +656,15 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ } idxStr[iIdxStr] = '\0'; - /* Set idxFlags flags for the ORDER BY clause */ + /* Set idxFlags flags for the ORDER BY clause + ** + ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC". + */ if( pInfo->nOrderBy==1 ){ int iSort = pInfo->aOrderBy[0].iColumn; if( iSort==(pConfig->nCol+1) && bSeenMatch ){ idxFlags |= FTS5_BI_ORDER_RANK; - }else if( iSort==-1 ){ + }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){ idxFlags |= FTS5_BI_ORDER_ROWID; } if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 845e8145db..8273b3ca4d 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -141,7 +141,7 @@ do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1 do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2 do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 -do_execsql_test 3.0 { +do_execsql_test 3.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize="origintext unicode61", tokendata=1, @@ -160,11 +160,11 @@ do_execsql_test 3.0 { #db func b b #execsql_pp { SELECT b(term) FROM vocab } -do_execsql_test 3.1.1 { SELECT rowid FROM ft2('hello') } {1 2 3} -do_execsql_test 3.1.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} -do_execsql_test 3.1.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} +do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3} +do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} +do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} -do_execsql_test 3.1.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} +do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} #------------------------------------------------------------------------- # diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test index 26f9864098..948db1c519 100644 --- a/ext/fts5/test/fts5origintext2.test +++ b/ext/fts5/test/fts5origintext2.test @@ -102,6 +102,7 @@ breakpoint do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3} do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6} do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9} +do_execsql_test 1.14 { SELECT rowid FROM ft('hello') ORDER BY rank; } {1 2 3} finish_test diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test index 57b5984f4d..2b1e5c6387 100644 --- a/ext/fts5/test/fts5origintext3.test +++ b/ext/fts5/test/fts5origintext3.test @@ -46,6 +46,7 @@ foreach_detail_mode $testprefix { SELECT fts5_test_poslist(ft) FROM ft('hello'); } {{0.0.0 0.0.2 0.0.4}} +breakpoint do_execsql_test 1.3 { SELECT insttoken(ft, 0, 0), @@ -54,6 +55,14 @@ foreach_detail_mode $testprefix { FROM ft('hello'); } {hello.Hello hello.HELLO hello} + do_execsql_test 1.4 { + SELECT + insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0) + FROM ft('hello') ORDER BY rank; + } {hello.Hello hello.HELLO hello} + } finish_test diff --git a/manifest b/manifest index 13a7da6a20..04745fb5d2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssigned\sinteger\soverflow\sin\sfts5. -D 2023-11-29T16:22:39.960 +C Different\sapproach\sto\squerying\sa\stokendata=1\stable.\sSaves\scpu\sand\smemory. +D 2023-12-01T20:09:59.031 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -90,14 +90,14 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h 5e5630fc81e212f658afaa5b2650dac939d2729d0723aef1eeaff908f1725648 -F ext/fts5/fts5Int.h 782151060d176be22861f57bf38e087a82cfb0dfc4b2fa6f9ccbc2641b6d01e3 +F ext/fts5/fts5Int.h 2dc73393460e5c5cab67adc7e32e1387cc225b57e05f629d490e65cddea1a8c5 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c 5d557c7ebefaeac5a5111cc47d4fee8a2fc6bc15245d5c99eebf53dd04bf794e +F ext/fts5/fts5_expr.c aac8026aedf56c9a6e32b31c89f9dd7e5548378457085093307d06be14f1a176 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c bafdef8be40a20bb86a131af4f09b56919f416fa13d1a86af1bf92bad9a2870d -F ext/fts5/fts5_main.c 55b53085dbd1693b5735463198a8d124dfbc27f08311c839637b44b8254ef7cb +F ext/fts5/fts5_index.c b6012920df8963245226bb536db7ddea62dfd7a860d6112887b175f7aaf55b82 +F ext/fts5/fts5_main.c 20596de592af135f68b9be875f0a28715f6562bbdedd215e1c89eac1b42e97f9 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -190,9 +190,9 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 7caef7634889bab8b44d145141c0d9325299398fb89b116bccd6262fde5659db -F ext/fts5/test/fts5origintext2.test 26482f4af1f2785cb01d06af9aae202289b6e8cf7b708d18aea305b459c2f302 -F ext/fts5/test/fts5origintext3.test 87a212b8235794348c56cb70f21e122d182a5af688c56057b90b7c151d0aa347 +F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0703aff5981625b527e62 +F ext/fts5/test/fts5origintext2.test 3259b331073fec918e02fd4d14d50586f9a3531da047a2a8f4624983eb654229 +F ext/fts5/test/fts5origintext3.test cb0f5835f8dff5954ee20570b68ee520cf04a08f6f9ca967b9d01d27e532da37 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2146,8 +2146,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 554fc13f2ca5f2ebd9ad0206034c25b556ff40db3106051c5e539f2e142e88ea -R 5c7c1632cd09d2b131adef895eab90a8 +P 60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 +R e2c2c11ec4bab5e354a94b25a41ae3cd U dan -Z 009b023eba91b6acc35beb7d051a677a +Z f7d96142774ad25f02aad90982aa0fc8 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index df2c2789ad..e669309199 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 \ No newline at end of file +c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e \ No newline at end of file -- 2.47.2