From: dan Date: Wed, 25 Sep 2024 18:55:11 +0000 (+0000) Subject: Change the way tokendata indexes are collected for prefix queries. X-Git-Tag: major-relase~109^2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2eff8f225292c7fd17299910338e01706dba083a;p=thirdparty%2Fsqlite.git Change the way tokendata indexes are collected for prefix queries. FossilOrigin-Name: 204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index b15521f163..a71fb13e06 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -502,7 +502,14 @@ int sqlite3Fts5StructureTest(Fts5Index*, void*); /* ** Used by xInstToken(): */ -int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*); +int sqlite3Fts5IterToken( + Fts5IndexIter *pIndexIter, + const char *pToken, int nToken, + i64 iRowid, + int iCol, + int iOff, + const char **ppOut, int *pnOut +); /* ** Insert or remove data to or from the index. Each time a document is diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 0124a1cc93..877c3f79c5 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3241,7 +3241,8 @@ int sqlite3Fts5ExprInstToken( pTerm = &pPhrase->aTerm[iToken]; if( pExpr->pConfig->bTokendata || pTerm->bPrefix ){ rc = sqlite3Fts5IterToken( - pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut + pTerm->pIter, pTerm->pTerm, pTerm->nQueryTerm, + iRowid, iCol, iOff+iToken, ppOut, pnOut ); }else{ *ppOut = pTerm->pTerm; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index ded1ec59cf..1efbe5a7b4 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6571,13 +6571,12 @@ int sqlite3Fts5IndexWrite( static int fts5IsTokendataPrefix( Fts5Buffer *pBuf, const u8 *pToken, - int nToken, - int bPrefix + int nToken ){ return ( pBuf->n>=nToken && 0==memcmp(pBuf->p, pToken, nToken) - && (bPrefix || pBuf->n==nToken || pBuf->p[nToken]==0x00) + && (pBuf->n==nToken || pBuf->p[nToken]==0x00) ); } @@ -6602,20 +6601,25 @@ static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ struct Fts5TokenDataMap { i64 iRowid; /* Row this token is located in */ i64 iPos; /* Position of token */ + int iIter; /* Iterator token was read from */ + int nByte; /* Length of token in bytes (or 0) */ }; /* ** An object used to supplement Fts5Iter for tokendata=1 iterators. */ struct Fts5TokenDataIter { - int nIter; - int nIterAlloc; - int nMap; int nMapAlloc; Fts5TokenDataMap *aMap; + /* The following are used for prefix-queries only. */ + Fts5Buffer terms; + + /* The following are used for other full-token tokendata queries only. */ + int nIter; + int nIterAlloc; Fts5PoslistReader *aPoslistReader; int *aPoslistToIter; Fts5Iter *apIter[1]; @@ -6666,6 +6670,7 @@ static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ for(ii=0; iinIter; ii++){ fts5MultiIterFree(pSet->apIter[ii]); } + fts5BufferFree(&pSet->terms); sqlite3_free(pSet->aPoslistReader); sqlite3_free(pSet->aMap); sqlite3_free(pSet); @@ -6679,6 +6684,7 @@ static void fts5TokendataIterAppendMap( Fts5Index *p, Fts5TokenDataIter *pT, int iIter, + int nByte, i64 iRowid, i64 iPos ){ @@ -6701,6 +6707,7 @@ static void fts5TokendataIterAppendMap( pT->aMap[pT->nMap].iRowid = iRowid; pT->aMap[pT->nMap].iPos = iPos; pT->aMap[pT->nMap].iIter = iIter; + pT->aMap[pT->nMap].nByte = nByte; pT->nMap++; } } @@ -6745,7 +6752,7 @@ static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ pIter->base.iRowid = iRowid; if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){ - fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1); + fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, 0, iRowid, -1); }else if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){ int nReader = 0; @@ -6880,8 +6887,7 @@ static Fts5Iter *fts5SetupTokendataIter( Fts5Index *p, /* FTS index to query */ const u8 *pToken, /* Buffer containing query term */ int nToken, /* Size of buffer pToken in bytes */ - Fts5Colset *pColset, /* Colset to filter on */ - int bPrefix /* True to match any prefix */ + Fts5Colset *pColset /* Colset to filter on */ ){ Fts5Iter *pRet = 0; Fts5TokenDataIter *pSet = 0; @@ -6963,7 +6969,7 @@ static Fts5Iter *fts5SetupTokendataIter( pSmall = 0; for(ii=0; iinSeg; ii++){ Fts5SegIter *pII = &pNew->aSeg[ii]; - if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken, bPrefix) ){ + if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){ fts5SegIterSetEOF(pII); } if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){ @@ -6999,6 +7005,7 @@ static Fts5Iter *fts5SetupTokendataIter( pRet = fts5MultiIterAlloc(p, 0); } if( pRet ){ + pRet->nSeg = 0; pRet->pTokenDataIter = pSet; if( pSet ){ fts5IterSetOutputsTokendata(pRet); @@ -7071,9 +7078,9 @@ int sqlite3Fts5IndexQuery( } } - if( (bTokendata && iIdx==0) || iIdx>pConfig->nPrefix ){ + if( bTokendata && iIdx==0 ){ buf.p[0] = FTS5_MAIN_PREFIX; - pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset, iIdx>0); + pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset); }else if( iIdx<=pConfig->nPrefix ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); @@ -7121,7 +7128,8 @@ int sqlite3Fts5IndexQuery( int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pTokenDataIter ){ + if( pIter->nSeg==0 ){ + assert( pIter->pTokenDataIter ); fts5TokendataIterNext(pIter, 0, 0); }else{ fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); @@ -7158,7 +7166,8 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - if( pIter->pTokenDataIter ){ + if( pIter->nSeg==0 ){ + assert( pIter->pTokenDataIter ); fts5TokendataIterNext(pIter, 1, iMatch); }else{ fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); @@ -7177,14 +7186,161 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ return (z ? &z[1] : 0); } +static void fts5TokendataMerge( + Fts5TokenDataMap *a1, int n1, + Fts5TokenDataMap *a2, int n2, + Fts5TokenDataMap *aOut +){ + int i1 = 0; + int i2 = 0; + + assert( n1>=0 && n2>=0 ); + while( i1=n2 || (i1nMap * sizeof(Fts5TokenDataMap); + + aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( aTmp ){ + Fts5TokenDataMap *a1 = pT->aMap; + Fts5TokenDataMap *a2 = aTmp; + i64 nHalf; + + for(nHalf=1; nHalfnMap; nHalf=nHalf*2){ + int i1; + for(i1=0; i1nMap; i1+=(nHalf*2)){ + int n1 = MIN(nHalf, pT->nMap-i1); + int n2 = MIN(nHalf, pT->nMap-i1-n1); + fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); + } + SWAPVAL(Fts5TokenDataMap*, a1, a2); + } + + if( a1!=pT->aMap ){ + memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); + } + sqlite3_free(aTmp); + +#ifdef SQLITE_DEBUG + { + int ii; + for(ii=1; iinMap; ii++){ + Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; + Fts5TokenDataMap *p2 = &pT->aMap[ii]; + assert( p1->iRowidiRowid + || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos) + ); + } + } +#endif + } +} + +static int fts5SetupPrefixIterTokendata( + Fts5Iter *pIter, + const char *pToken, + int nToken +){ + Fts5Index *p = pIter->pIndex; + Fts5Buffer token = {0, 0, 0}; + Fts5TokenDataIter *pT = 0; + + fts5BufferGrow(&p->rc, &token, nToken+1); + pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*pT)); + + if( p->rc==SQLITE_OK ){ + const int flags = FTS5INDEX_QUERY_SCAN + | FTS5INDEX_QUERY_SKIPEMPTY + | FTS5INDEX_QUERY_NOOUTPUT; + Fts5Structure *pStruct = 0; + Fts5Iter *p1 = 0; /* Iterator used to find tokendata */ + + int bNewTerm = 1; + int iTermOff = 0; + int nTermByte = 0; + + /* Fill in the token prefix to search for */ + token.p[0] = FTS5_MAIN_PREFIX; + memcpy(&token.p[1], pToken, nToken); + token.n = nToken+1; + + /* Grab a reference to the table structure. That will be released before + ** this function returns. */ + pStruct = fts5StructureRead(p); + + fts5MultiIterNew(p, pStruct, flags, 0, token.p, token.n, -1, 0, &p1); + fts5IterSetOutputCb(&p->rc, p1); + for( /* no-op */ ; + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext2(p, p1, &bNewTerm) + ){ + i64 iPos = 0; + int iPosOff = 0; + + Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; + p1->xSetOutputs(p1, pSeg); + + if( bNewTerm ){ + int nTerm = pSeg->term.n; + const u8 *pTerm = pSeg->term.p; + assert_nc( memcmp(token.p, pTerm, MIN(token.n, nTerm))<=0 ); + if( nTermterms.n; + fts5BufferAppendBlob(&p->rc, &pT->terms, nTermByte, pTerm+1); + } + + while( 0==sqlite3Fts5PoslistNext64( + p1->base.pData, p1->base.nData, &iPosOff, &iPos + ) ){ + fts5TokendataIterAppendMap( + p, pT, iTermOff, nTermByte, p1->base.iRowid, iPos + ); + } + } + + /* fts5SetupPrefixIter */ + fts5MultiIterFree(p1); + fts5StructureRelease(pStruct); + + fts5TokendataIterSortMap(p, pT); + } + + if( p->rc==SQLITE_OK ){ + pIter->pTokenDataIter = pT; + }else{ + fts5TokendataIterDelete(pT); + } + fts5BufferFree(&token); + + return fts5IndexReturn(p); +} + /* ** This is used by xInstToken() to access the token at offset iOff, column ** iCol of row iRowid. The token is returned via output variables *ppOut ** and *pnOut. The iterator passed as the first argument must be a tokendata=1 ** iterator (pIter->pTokenDataIter!=0). +** +** pToken/nToken: */ int sqlite3Fts5IterToken( Fts5IndexIter *pIndexIter, + const char *pToken, int nToken, i64 iRowid, int iCol, int iOff, @@ -7192,13 +7348,22 @@ int sqlite3Fts5IterToken( ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5TokenDataIter *pT = pIter->pTokenDataIter; - Fts5TokenDataMap *aMap = pT->aMap; i64 iPos = (((i64)iCol)<<32) + iOff; - + Fts5TokenDataMap *aMap = 0; int i1 = 0; - int i2 = pT->nMap; + int i2 = 0; int iTest = 0; + assert( pT || (pToken && pIter->nSeg>0) ); + if( pT==0 ){ + int rc = fts5SetupPrefixIterTokendata(pIter, pToken, nToken); + if( rc!=SQLITE_OK ) return rc; + pT = pIter->pTokenDataIter; + } + + i2 = pT->nMap; + aMap = pT->aMap; + while( i2>i1 ){ iTest = (i1 + i2) / 2; @@ -7221,9 +7386,15 @@ int sqlite3Fts5IterToken( } if( i2>i1 ){ - Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; - *ppOut = (const char*)pMap->aSeg[0].term.p+1; - *pnOut = pMap->aSeg[0].term.n-1; + if( pIter->nSeg==0 ){ + Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; + *ppOut = (const char*)pMap->aSeg[0].term.p+1; + *pnOut = pMap->aSeg[0].term.n-1; + }else{ + Fts5TokenDataMap *p = &aMap[iTest]; + *ppOut = (const char*)&pT->terms.p[p->iIter]; + *pnOut = aMap[iTest].nByte; + } } return SQLITE_OK; @@ -7235,7 +7406,9 @@ int sqlite3Fts5IterToken( */ void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - if( pIter && pIter->pTokenDataIter ){ + if( pIter && pIter->pTokenDataIter + && (pIter->nSeg==0 || pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL) + ){ pIter->pTokenDataIter->nMap = 0; } } @@ -7255,17 +7428,30 @@ int sqlite3Fts5IndexIterWriteTokendata( Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5TokenDataIter *pT = pIter->pTokenDataIter; Fts5Index *p = pIter->pIndex; - int ii; + i64 iPos = (((i64)iCol)<<32) + iOff; assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL ); - assert( pIter->pTokenDataIter ); - - for(ii=0; iinIter; ii++){ - Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; - if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; - } - if( iinIter ){ - fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff); + assert( pIter->pTokenDataIter || pIter->nSeg>0 ); + if( pIter->nSeg>0 ){ + /* This is a prefix term iterator. */ + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + if( pT==0 ){ + pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*pT)); + pIter->pTokenDataIter = pT; + } + if( pT ){ + fts5TokendataIterAppendMap(p, pT, pT->terms.n, nToken, iRowid, iPos); + fts5BufferAppendBlob(&p->rc, &pT->terms, nToken, (const u8*)pToken); + } + }else{ + int ii; + for(ii=0; iinIter; ii++){ + Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; + if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; + } + if( iinIter ){ + fts5TokendataIterAppendMap(p, pT, ii, 0, iRowid, iPos); + } } return fts5IndexReturn(p); } diff --git a/manifest b/manifest index 31fbff973a..91a17cd96b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\strunk\schanges\sinto\sthis\sbranch. -D 2024-09-24T15:43:52.339 +C Change\sthe\sway\stokendata\sindexes\sare\scollected\sfor\sprefix\squeries. +D 2024-09-25T18:55:11.223 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -93,13 +93,13 @@ F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7e F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl 009cf59c77afa86d137b0cca3e3b1a5efbe2264faa2df233f9a7aa8563926d15 F ext/fts5/fts5.h efaaac0df3d3bc740383044c144b582f47921aafa21d7b10eb98f42c24c740b0 -F ext/fts5/fts5Int.h 93aba03ca417f403b07b2ab6f50aa0e0c1b8b031917a9026b81520e7047a168e +F ext/fts5/fts5Int.h 83a7af3fee07d5163bf7bf97db310544fcc143c94acb13dbced7e06ae8025a18 F ext/fts5/fts5_aux.c 65a0468dd177d6093aa9ae1622e6d86b0136b8d267c62c0ad6493ad1e9a3d759 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 F ext/fts5/fts5_config.c da21548ddbc1a457cb42545f527065221ede8ada6a734891b8c34317a7a9506b -F ext/fts5/fts5_expr.c 1f60d81aa4703435f98f46bbb41fb2a2efa898423fec070a2b3f7a02f177ac64 +F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c aadd271f3c2048418298377908dd09d496753a5c7da84161a9c86ca8c1e78e9a +F ext/fts5/fts5_index.c 8dfb22c5e42cd56d3abbe107a5561fc3b4f731fc4c821ac049482d9dedc50acc F ext/fts5/fts5_main.c 4503498d3453e29a3cd89dacaba029011e89cb8c481a6241611d106e7a369bd4 F ext/fts5/fts5_storage.c 3332497823c3d171cf56379f2bd8c971ce15a19aadacff961106462022c92470 F ext/fts5/fts5_tcl.c 4db9258a7882c5eac0da4433042132aaf15b87dd1e1636c7a6ca203abd2c8bfe @@ -2214,8 +2214,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 97c2824f471e7e622c4a166947a6e8162cae891345101539829a6fcec83373fe 42bb941584a1ac922ee6b0b6ecadce71c9259555563cf49913a6f820f3f9b887 -R cb71c4478793484afa282b1a3cf11afa +P 9945206e6e26a48a49b9747650d299eb983cc21a3a61c621cd81f0bbc85a74d7 +R d12c6f9d3e41d3b7f32c957f52650189 U dan -Z efd6491e97a3bc8cd929e608bea6d1a9 +Z 0b9676f39cb90827333f01676ed89ac5 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index dc02877987..92e4aa62cf 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9945206e6e26a48a49b9747650d299eb983cc21a3a61c621cd81f0bbc85a74d7 +204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff