From: dan Date: Thu, 16 Nov 2023 21:11:56 +0000 (+0000) Subject: When querying a tokendata=1 fts5 table, do not use a prefix cursor for the case where... X-Git-Tag: version-3.45.0~114^2~19 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=a0764f63a8fa984cc1c90b377916f8dbe11207ec;p=thirdparty%2Fsqlite.git When querying a tokendata=1 fts5 table, do not use a prefix cursor for the case where the term has only one variant. FossilOrigin-Name: d711c96ba855686d6881a50498418de3492144f005684b5ae55bca24413dce47 --- diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 2d6d561b7b..887bb75dac 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -368,6 +368,7 @@ struct Fts5Index { sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ sqlite3_stmt *pIdxSelect; + sqlite3_stmt *pIdxProbe; int nRead; /* Total number of blocks read */ sqlite3_stmt *pDeleteFromIdx; @@ -2629,6 +2630,18 @@ static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ return p->pIdxSelect; } +static sqlite3_stmt *fts5IdxProbeStmt(Fts5Index *p){ + if( p->pIdxProbe==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxProbe, sqlite3_mprintf( + "SELECT 1 FROM '%q'.'%q_idx' WHERE " + "segid=? AND term>? AND termzDb, pConfig->zName + )); + } + return p->pIdxProbe; +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. @@ -3846,7 +3859,7 @@ static void fts5MultiIterNew( assert( iIter==nSeg ); } - /* If the above was successful, each component iterators now points + /* If the above was successful, each component iterator now points ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ @@ -6179,7 +6192,7 @@ static void fts5TokenMapPoslist( } static int fts5TokenMapHash(i64 iRowid, int iCol, int iOff){ - return iRowid + (iRowid << 3) + (iCol << 6) + (iOff << 9); + return (iRowid + (iRowid << 3) + (iCol << 6) + (iOff << 9)) & 0x7FFFFFFF; } static void fts5TokenMapHashify(Fts5Index *p, Fts5TokenMap *pMap){ @@ -6228,6 +6241,84 @@ static const u8 *fts5TokenMapLookup( return 0; } +/* +** The iterator passed as the second argument has been opened to scan and +** merge doclists for a series of tokens in tokendata=1 mode. This function +** tests whether or not, instead of using the cursor to read doclists to +** merge, it can be used directly by the upper layer. This is the case +** if the cursor currently points to the only token that corresponds to +** the queried term. i.e. if the next token that will be visited by the +** iterator does not match the query. +*/ +int fts5TokendataIterIsOk( + Fts5Index *p, + Fts5Iter *pIter, + const u8 *pToken, + int nToken +){ + int ii; + Fts5Buffer buf = {0, 0, 0}; + int bRet = 1; + Fts5Buffer *pTerm = 0; + + /* Iterator is not usable if it uses the hash table */ + if( pIter->aSeg[0].pSeg==0 ) return 0; + + for(ii=0; bRet && iinSeg; ii++){ + Fts5SegIter *pSeg = &pIter->aSeg[ii]; + Fts5Data *pLeaf = pSeg->pLeaf; + if( pLeaf ){ + + if( pTerm==0 ){ + pTerm = &pSeg->term; + }else{ + if( pSeg->term.n!=pTerm->n + || memcmp(pSeg->term.p, pTerm->p, pTerm->n) + ){ + bRet = 0; + break; + } + } + + if( pSeg->iEndofDoclistszLeaf ){ + /* Next term is on this node. Check it directly. */ + int nPrefix = 0; + fts5GetVarint32(&pLeaf->p[pSeg->iEndofDoclist], nPrefix); + if( nPrefix>=nToken ) bRet = 0; + }else{ + /* Next term is on a subsequent page. In this case query the %_idx + ** table to discover exactly what that next term is. */ + sqlite3_stmt *pProbe = fts5IdxProbeStmt(p); + if( pProbe ){ + int rc = SQLITE_OK; + if( buf.n==0 ){ + sqlite3Fts5BufferAppendBlob(&p->rc, &buf, nToken, pToken); + sqlite3Fts5BufferAppendBlob(&p->rc, &buf, 1, (const u8*)"\1"); + } + sqlite3_bind_int(pProbe, 1, pSeg->pSeg->iSegid); + sqlite3_bind_blob(pProbe,2, pSeg->term.p,pSeg->term.n, SQLITE_STATIC); + sqlite3_bind_blob(pProbe,3, buf.p, buf.n, SQLITE_STATIC); + + if( sqlite3_step(pProbe)==SQLITE_ROW ){ + bRet = 0; + } + rc = sqlite3_reset(pProbe); + if( p->rc==SQLITE_OK ) p->rc = rc; + } + } + } + } + + if( bRet ){ + for(ii=0; iinSeg; ii++){ + Fts5SegIter *pSeg = &pIter->aSeg[ii]; + pSeg->flags |= FTS5_SEGITER_ONETERM; + } + } + + fts5BufferFree(&buf); + return bRet; +} static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ @@ -6261,9 +6352,6 @@ static void fts5SetupPrefixIter( aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); - if( iIdx==0 ){ - pMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); - } assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); if( p->rc==SQLITE_OK ){ @@ -6308,79 +6396,92 @@ static void fts5SetupPrefixIter( pToken[0] = FTS5_MAIN_PREFIX + iIdx; fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); fts5IterSetOutputCb(&p->rc, p1); - for( /* no-op */ ; - fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext2(p, p1, &bNewTerm) - ){ - Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; - int nTerm = pSeg->term.n; - const u8 *pTerm = pSeg->term.p; - p1->xSetOutputs(p1, pSeg); - if( pMap ){ - if( bNewTerm ){ - fts5TokenMapTerm(p, pMap, &pTerm[1], nTerm-1); - } - fts5TokenMapPoslist(p, pMap, p1); - } + if( bDesc==0 && bTokenscan && fts5TokendataIterIsOk(p, p1, pToken,nToken) ){ + /* In this case iterator p1 may be used as is. */ + *ppIter = p1; + }else{ - assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); - if( bNewTerm ){ - if( nTermnToken && pTerm[nToken]!=0x00 ) break; + if( iIdx==0 && p->pConfig->eDetail==FTS5_DETAIL_FULL ){ + pMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); } + assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); - if( p1->base.nData==0 ) continue; - - if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ - for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ - int i1 = i*nMerge; - int iStore; - assert( i1+nMerge<=nBuf ); - for(iStore=i1; iStoreaSeg[ p1->aFirst[1].iFirst ]; + int nTerm = pSeg->term.n; + const u8 *pTerm = pSeg->term.p; + p1->xSetOutputs(p1, pSeg); + + assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); + if( bNewTerm ){ + if( nTermnToken && pTerm[nToken]!=0x00 ) break; + } + + if( pMap ){ + if( bNewTerm ){ + fts5TokenMapTerm(p, pMap, &pTerm[1], nTerm-1); } - if( iStore==i1+nMerge ){ - xMerge(p, &doclist, nMerge, &aBuf[i1]); + fts5TokenMapPoslist(p, pMap, p1); + } + + if( p1->base.nData==0 ) continue; + if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ + for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ + int i1 = i*nMerge; + int iStore; + assert( i1+nMerge<=nBuf ); for(iStore=i1; iStorebase.iRowid-(u64)iLastRowid, p1, &doclist); + iLastRowid = p1->base.iRowid; } - - xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); - iLastRowid = p1->base.iRowid; - } - - assert( (nBuf%nMerge)==0 ); - for(i=0; irc==SQLITE_OK ){ - xMerge(p, &doclist, nMerge, &aBuf[i]); + + assert( (nBuf%nMerge)==0 ); + for(i=0; irc==SQLITE_OK ){ + xMerge(p, &doclist, nMerge, &aBuf[i]); + } + for(iFree=i; iFreep = (u8*)&pData[1]; + pData->nn = pData->szLeaf = doclist.n; + if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); + if( pMap ) fts5TokenMapHashify(p, pMap); + fts5MultiIterNew2(p, pData, pMap, bDesc, ppIter); + pMap = 0; } + fts5BufferFree(&doclist); } - fts5MultiIterFree(p1); - - pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING); - if( pData ){ - pData->p = (u8*)&pData[1]; - pData->nn = pData->szLeaf = doclist.n; - if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); - if( pMap ) fts5TokenMapHashify(p, pMap); - fts5MultiIterNew2(p, pData, pMap, bDesc, ppIter); - } - fts5BufferFree(&doclist); } + fts5TokenMapFree(pMap); fts5StructureRelease(pStruct); sqlite3_free(aBuf); } @@ -6514,6 +6615,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ sqlite3_finalize(p->pIdxWriter); sqlite3_finalize(p->pIdxDeleter); sqlite3_finalize(p->pIdxSelect); + sqlite3_finalize(p->pIdxProbe); sqlite3_finalize(p->pDataVersion); sqlite3_finalize(p->pDeleteFromIdx); sqlite3Fts5HashFree(p->pHash); @@ -6766,7 +6868,7 @@ int sqlite3Fts5IterToken( ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; if( pIter->pTokenMap ){ - *ppOut = fts5TokenMapLookup( + *ppOut = (const char*)fts5TokenMapLookup( pIter->pTokenMap, pIndexIter->iRowid, iCol, iOff, pnOut ); }else{ diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test new file mode 100644 index 0000000000..7cf8d80071 --- /dev/null +++ b/ext/fts5/test/fts5origintext2.test @@ -0,0 +1,107 @@ +# 2014 Jan 08 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); +} + +do_execsql_test 1.1 { + BEGIN; + INSERT INTO ft VALUES('Hello'); + INSERT INTO ft VALUES('hello'); + INSERT INTO ft VALUES('HELLO'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('World'); + INSERT INTO ft VALUES('world'); + INSERT INTO ft VALUES('WORLD'); + COMMIT; +} + +do_execsql_test 1.2 { SELECT rowid FROM ft('hello'); } {1 2 3} +do_execsql_test 1.3 { SELECT rowid FROM ft('today'); } {4 5 6} +do_execsql_test 1.4 { SELECT rowid FROM ft('world'); } {7 8 9} + +do_execsql_test 1.5 { + SELECT count(*) FROM ft_data +} 3 + +do_execsql_test 1.6 { + DELETE FROM ft; + INSERT INTO ft(ft, rank) VALUES('pgsz', 64); + BEGIN; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO ft SELECT 'Hello Hello Hello Hello Hello Hello Hello' FROM s; + INSERT INTO ft VALUES ('hELLO hELLO hELLO'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('World World World World World World World'); + INSERT INTO ft VALUES('world world world world world world world'); + INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD'); + INSERT INTO ft VALUES('World World World World World World World'); + INSERT INTO ft VALUES('world world world world world world world'); + INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD'); + COMMIT; +} + +do_execsql_test 1.7 { + SELECT count(*) FROM ft_data; +} 23 + +do_execsql_test 1.8 { SELECT rowid FROM ft('hello') WHERE rowid>100; } {101} + +do_execsql_test 1.9 { + DELETE FROM ft; + INSERT INTO ft(ft) VALUES('optimize'); + SELECT count(*) FROM ft_data; +} {2} +do_execsql_test 1.10 { + BEGIN; + INSERT INTO ft VALUES('Hello'); + INSERT INTO ft VALUES('hello'); + INSERT INTO ft VALUES('HELLO'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('World'); + INSERT INTO ft VALUES('world'); + INSERT INTO ft VALUES('WORLD'); +} + +breakpoint +do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3} +do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6} +do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9} + +finish_test + diff --git a/manifest b/manifest index 3477a07b6e..7763b46700 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\simplementation\sof\sxInstToken()\sAPI. -D 2023-11-15T11:45:19.681 +C When\squerying\sa\stokendata=1\sfts5\stable,\sdo\snot\suse\sa\sprefix\scursor\sfor\sthe\scase\swhere\sthe\sterm\shas\sonly\sone\svariant. +D 2023-11-16T21:11:56.608 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -94,7 +94,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf F ext/fts5/fts5_expr.c 4b50ed0c724cb160f086e20e964ed2d57b99d0d3c1cb1b029901c0300b11bd9f F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 3b51c2f0554a665694e777c8f2765cb5b1283d4bc960dde350a604af3e5e5d98 +F ext/fts5/fts5_index.c 70fa4a6d8a062ca4b63a62d0721d72ce2f6336413c6e8b0703881c708797d24d F ext/fts5/fts5_main.c f151eb2c6d27418d907c88cd623ad4508bdcf518a79d504e850270754c228b74 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -189,6 +189,7 @@ F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca F ext/fts5/test/fts5origintext.test 908a1fb6b1106e4b6ed0f9cf683c2ad7f986cce1aea1e0a13b3309c6f568932b +F ext/fts5/test/fts5origintext2.test a654c77f1548ccd8eab7f6d07230655c0070cdf32dcd4740ccdf496f77d5282c F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2143,8 +2144,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 828566392b3ea8db603cb1ae5eccbc8ac035efaa284bc7c15ba89874f634aec9 -R 7870d9470a55737470bd92d95fe480a9 +P a34b26fe7f60b74e7ae5cf64900920a3d352a20da2496401bcbc27041689cd07 +R d7c277a055a404d272fdcb5090bf371a U dan -Z d10d6cf5b22c051f4553454e4a3996a4 +Z 0e1bf556ad9eba9db356685a09c7ab31 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 96d818fc70..6373f95ef5 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a34b26fe7f60b74e7ae5cf64900920a3d352a20da2496401bcbc27041689cd07 \ No newline at end of file +d711c96ba855686d6881a50498418de3492144f005684b5ae55bca24413dce47 \ No newline at end of file