From: dan Date: Tue, 1 Oct 2024 20:38:08 +0000 (+0000) Subject: Rationalize code further. And add tests. X-Git-Tag: major-relase~109^2~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d2a88e961a78582050bd56e3676bf54fef01fb3f;p=thirdparty%2Fsqlite.git Rationalize code further. And add tests. FossilOrigin-Name: 0ca002a4ab88f3e7ae1e6e518038157eaa20759f57888c2ed7e50cb92bd96348 --- diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index ede091650d..f6f51d7343 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6199,15 +6199,45 @@ static void fts5MergePrefixLists( } -static int fts5VisitPrefixRange( - Fts5Index *p, - Fts5Colset *pColset, - u8 *pToken, - int nToken, +/* +** Iterate through a range of entries in the FTS index, invoking the xVisit +** callback for each of them. +** +** Parameter pToken points to an nToken buffer containing an FTS index term +** (i.e. a document term with the preceding 1 byte index identifier - +** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits +** all entries for terms that have pToken/nToken as a prefix. If bPrefix +** is false, then only entries with pToken/nToken as the entire key are +** visited. +** +** If the current table is a tokendata=1 table, then if bPrefix is true then +** each index term is treated separately. However, if bPrefix is false, then +** all index terms corresponding to pToken/nToken are collapsed into a single +** term before the callback is invoked. +** +** The callback invoked for each entry visited is specified by paramter xVisit. +** Each time it is invoked, it is passed a pointer to the Fts5Index object, +** a copy of the 7th paramter to this function (pCtx) and a pointer to the +** iterator that indicates the current entry. If the current entry is the +** first with a new term (i.e. different from that of the previous entry, +** including the very first term), then the final two parameters are passed +** a pointer to the term and its size in bytes, respectively. If the current +** entry is not the first associated with its term, these two parameters +** are passed 0. +** +** If parameter pColset is not NULL, then it is used to filter entries before +** the callback is invoked. +*/ +static int fts5VisitEntries( + Fts5Index *p, /* Fts5 index object */ + Fts5Colset *pColset, /* Columns filter to apply, or NULL */ + u8 *pToken, /* Buffer containing token */ + int nToken, /* Size of buffer pToken in bytes */ + int bPrefix, /* True for a prefix scan */ void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int), - void *pCtx + void *pCtx /* Passed as second argument to xVisit() */ ){ - const int flags = FTS5INDEX_QUERY_SCAN + const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN : 0) | FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_NOOUTPUT; Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ @@ -6226,7 +6256,6 @@ static int fts5VisitPrefixRange( p1->xSetOutputs(p1, pSeg); - if( bNewTerm ){ nNew = pSeg->term.n; pNew = pSeg->term.p; @@ -6241,6 +6270,9 @@ static int fts5VisitPrefixRange( return p->rc; } +/* +** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries(). +*/ typedef struct PrefixSetupCtx PrefixSetupCtx; struct PrefixSetupCtx { void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); @@ -6252,6 +6284,9 @@ struct PrefixSetupCtx { Fts5Buffer doclist; }; +/* +** fts5VisitEntries() callback used by fts5SetupPrefixIter() +*/ static void prefixIterSetupCb( Fts5Index *p, void *pCtx, @@ -6325,6 +6360,7 @@ static void fts5SetupPrefixIter( assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) ); if( p->rc==SQLITE_OK ){ + void *pCtx = (void*)&s; int i; Fts5Data *pData; @@ -6334,30 +6370,12 @@ static void fts5SetupPrefixIter( ** corresponding to the prefix itself. That one is extracted from the ** main term index here. */ if( iIdx!=0 ){ - Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ - int dummy = 0; - const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; pToken[0] = FTS5_MAIN_PREFIX; - fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1); - fts5IterSetOutputCb(&p->rc, p1); - for(; - fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext2(p, p1, &dummy) - ){ - Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; - p1->xSetOutputs(p1, pSeg); - if( p1->base.nData ){ - s.xAppend(p, (u64)p1->base.iRowid-(u64)s.iLastRowid, p1, &s.doclist); - s.iLastRowid = p1->base.iRowid; - } - } - fts5MultiIterFree(p1); + fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx); } pToken[0] = FTS5_MAIN_PREFIX + iIdx; - fts5VisitPrefixRange( - p, pColset, pToken, nToken, prefixIterSetupCb, (void*)&s - ); + fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx); assert( (s.nBuf%s.nMerge)==0 ); for(i=0; i0, not just a container for +** Fts5TokenDataMap structures), then this variable is an index into +** the apIter[] array. The corresponding term is that which the iterator +** at apIter[iIter] currently points to. +** +** Or, if the Fts5TokenDataIter iterator is just a container object +** (nIter==0), then iIter is an index into the term.p[] buffer where +** the term is stored. +** +** nByte: +** In the case where iIter is an index into term.p[], this variable +** is the size of the term in bytes. If iIter is an index into apIter[], +** this variable is unused. */ struct Fts5TokenDataMap { i64 iRowid; /* Row this token is located in */ i64 iPos; /* Position of token */ - int iIter; /* Iterator token was read from */ int nByte; /* Length of token in bytes (or 0) */ }; /* ** An object used to supplement Fts5Iter for tokendata=1 iterators. +** +** This object serves two purposes. The first is as a container for an array +** of Fts5TokenDataMap structures, which are used to find the token required +** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and +** aMap[] variables. */ struct Fts5TokenDataIter { - int nMap; - int nMapAlloc; - Fts5TokenDataMap *aMap; + int nMapAlloc; /* Allocated size of aMap[] in entries */ + int nMap; /* Number of valid entries in aMap[] */ + Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ /* The following are used for prefix-queries only. */ Fts5Buffer terms; @@ -7234,10 +7280,18 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ return (z ? &z[1] : 0); } +/* +** The two input arrays - a1[] and a2[] - are in sorted order. This function +** merges the two arrays together and writes the result to output array +** aOut[]. aOut[] is guaranteed to be large enough to hold the result. +** +** Duplicate entries are copied into the output. So the size of the output +** array is always (n1+n2) entries. +*/ static void fts5TokendataMerge( - Fts5TokenDataMap *a1, int n1, - Fts5TokenDataMap *a2, int n2, - Fts5TokenDataMap *aOut + Fts5TokenDataMap *a1, int n1, /* Input array 1 */ + Fts5TokenDataMap *a2, int n2, /* Input array 2 */ + Fts5TokenDataMap *aOut /* Output array */ ){ int i1 = 0; int i2 = 0; @@ -7258,6 +7312,12 @@ static void fts5TokendataMerge( } } +/* +** Sort the contents of the pT->aMap[] array. +** +** The sorting algorithm requries a malloc(). If this fails, an error code +** is left in Fts5Index.rc before returning. +*/ static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ Fts5TokenDataMap *aTmp = 0; int nByte = pT->nMap * sizeof(Fts5TokenDataMap); @@ -7298,13 +7358,23 @@ static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ } } +/* +** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() +** to pass data to prefixIterSetupTokendataCb(). +*/ typedef struct TokendataSetupCtx TokendataSetupCtx; struct TokendataSetupCtx { - Fts5TokenDataIter *pT; - int iTermOff; - int nTermByte; + Fts5TokenDataIter *pT; /* Object being populated with mappings */ + int iTermOff; /* Offset of current term in terms.p[] */ + int nTermByte; /* Size of current term in bytes */ }; +/* +** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This +** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each +** position in the current position-list. It doesn't matter that some of +** these may be out of order - they will be sorted later. +*/ static void prefixIterSetupTokendataCb( Fts5Index *p, void *pCtx, @@ -7331,10 +7401,15 @@ static void prefixIterSetupTokendataCb( } } +/* +** pIter is a prefix query. This function populates pIter->pTokenDataIter +** with an Fts5TokenDataIter object containing mappings for all rows +** matched by the query. +*/ static int fts5SetupPrefixIterTokendata( Fts5Iter *pIter, - const char *pToken, - int nToken + const char *pToken, /* Token prefix to search for */ + int nToken /* Size of pToken in bytes */ ){ Fts5Index *p = pIter->pIndex; Fts5Buffer token = {0, 0, 0}; @@ -7352,8 +7427,8 @@ static int fts5SetupPrefixIterTokendata( memcpy(&token.p[1], pToken, nToken); token.n = nToken+1; - fts5VisitPrefixRange( - p, 0, token.p, token.n, prefixIterSetupTokendataCb, (void*)&ctx + fts5VisitEntries( + p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx ); fts5TokendataIterSortMap(p, ctx.pT); diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test index 9dda2a5748..a4bca0de9b 100644 --- a/ext/fts5/test/fts5origintext3.test +++ b/ext/fts5/test/fts5origintext3.test @@ -95,6 +95,29 @@ foreach_detail_mode $testprefix { do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24} do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24} + do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft3 USING fts5( + x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%, + prefix=2 + ); + } + do_execsql_test 2.1 { + INSERT INTO ft3(rowid, x) VALUES(1, 'one'); + INSERT INTO ft3(rowid, x) VALUES(2, 'ONE'); + INSERT INTO ft3(rowid, x) VALUES(3, 'ONT'); + INSERT INTO ft3(rowid, x) VALUES(4, 'on'); + INSERT INTO ft3(rowid, x) VALUES(5, 'On'); + } + + do_execsql_test 2.2 { + SELECT rowid FROM ft3('on*'); + } {1 2 3 4 5} + + do_execsql_test 2.3 { + SELECT rowid, insttoken(ft3, 0, 0) FROM ft3('on*'); + } {1 one 2 one.ONE 3 ont.ONT 4 on 5 on.On} + + } finish_test diff --git a/manifest b/manifest index 0d2ad1c4fb..7b755eff5b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Rationalize\ssome\sof\sthe\snew\scode\son\sthis\sbranch. -D 2024-09-28T20:45:11.387 +C Rationalize\scode\sfurther.\sAnd\sadd\stests. +D 2024-10-01T20:38:08.239 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -99,7 +99,7 @@ F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70 F ext/fts5/fts5_config.c da21548ddbc1a457cb42545f527065221ede8ada6a734891b8c34317a7a9506b F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c c1005920192146452a3545500761ecc8cfab84572d251e8536103a01899f67d5 +F ext/fts5/fts5_index.c 9b2b9636ccefd6140c0ad7a44c51c2ea39f377753a13f06a2e6792215b62cede F ext/fts5/fts5_main.c 4503498d3453e29a3cd89dacaba029011e89cb8c481a6241611d106e7a369bd4 F ext/fts5/fts5_storage.c 3332497823c3d171cf56379f2bd8c971ce15a19aadacff961106462022c92470 F ext/fts5/fts5_tcl.c 4db9258a7882c5eac0da4433042132aaf15b87dd1e1636c7a6ca203abd2c8bfe @@ -203,7 +203,7 @@ F ext/fts5/test/fts5optimize2.test 795d4ae5f66a7239cf8d5aef4c2ea96aeb8bcd907bd9b F ext/fts5/test/fts5optimize3.test 1653029284e10e0715246819893ba30565c4ead0d0fc470adae92c353ea857d3 F ext/fts5/test/fts5origintext.test 63d5b0dc00f0104add8960da0705a70bffd4d86b6feb6ddbb38bff21141d42f0 F ext/fts5/test/fts5origintext2.test f4505ff79bf7369f2b8b10b9cef7476049d844e20b37f29cad3a8b8d5ac6f9ba -F ext/fts5/test/fts5origintext3.test 45c33cf0c91a9ca0e36d298462db3edc7c8fe45fd185649a9dbfd66bb670058b +F ext/fts5/test/fts5origintext3.test 1f5174a9f4cf42f58f833dbfb314940793ca4723854ec2651e7530ddb35a66a6 F ext/fts5/test/fts5origintext4.test 0d3ef0a8038f471dbc83001c34fe5f7ae39b571bfc209670771eb28bc0fc50e8 F ext/fts5/test/fts5origintext5.test ee12b440ec335e5b422d1668aca0051b52ff28b6ee67073e8bbc29f509fd562b F ext/fts5/test/fts5phrase.test bb2554bb61d15f859678c96dc89a7de415cd5fc3b7b54c29b82a0d0ad138091c @@ -2214,8 +2214,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff -R da06610bae74973a44f69a92b9b60e12 +P 66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075 +R 364baf490f2f60461704cce12defe7d5 U dan -Z ca9c653ea5575a07920ac6ddffa15d1d +Z 98928b751e601e5ab0ec38779c287b09 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index f550314bcd..7b4f75a7f2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075 +0ca002a4ab88f3e7ae1e6e518038157eaa20759f57888c2ed7e50cb92bd96348