From 090b8649be3603920327a0ae45c2cf9384d8615b Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 2 Nov 2024 19:10:50 +0000 Subject: [PATCH] Allow an fts5 table or query to be configured to collect xInstToken data for any prefix terms as part of the first parse of the main index, if any. FossilOrigin-Name: 46929ae92b26f02bc70de9931b21a8a7cf9a2453d5fb07f68b712f62e28e9152 --- ext/fts5/fts5Int.h | 3 +- ext/fts5/fts5_config.c | 13 + ext/fts5/fts5_index.c | 501 +++++++++++++++-------------- ext/fts5/fts5_main.c | 28 +- ext/fts5/test/fts5origintext.test | 121 ++++--- ext/fts5/test/fts5origintext3.test | 47 ++- manifest | 24 +- manifest.uuid | 2 +- 8 files changed, 430 insertions(+), 309 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index bb72f45c7b..832f9ad477 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -247,7 +247,8 @@ struct Fts5Config { char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ int bSecureDelete; /* 'secure-delete' */ - int nDeleteMerge; /* 'deletemerge' */ + int nDeleteMerge; /* 'deletemerge' */ + int bPrefixInsttoken; /* 'prefix-insttoken' */ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index a674b44d0b..eea82b046d 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -1026,6 +1026,19 @@ int sqlite3Fts5ConfigSetValue( }else{ pConfig->bSecureDelete = (bVal ? 1 : 0); } + } + + else if( 0==sqlite3_stricmp(zKey, "insttoken") ){ + int bVal = -1; + if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ + bVal = sqlite3_value_int(pVal); + } + if( bVal<0 ){ + *pbBadkey = 1; + }else{ + pConfig->bPrefixInsttoken = (bVal ? 1 : 0); + } + }else{ *pbBadkey = 1; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 04f0a6740c..e7028e411c 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6260,6 +6260,7 @@ static int fts5VisitEntries( const u8 *pNew = 0; p1->xSetOutputs(p1, pSeg); + if( p->rc ) break; if( bNewTerm ){ nNew = pSeg->term.n; @@ -6275,6 +6276,247 @@ static int fts5VisitEntries( return p->rc; } + +/* +** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an +** array of these for each row it visits (so all iRowid fields are the same). +** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an +** array of these for the entire query (in which case iRowid fields may take +** a variety of values). +** +** Each instance in the array indicates the iterator (and therefore term) +** associated with position iPos of rowid iRowid. This is used by the +** xInstToken() API. +** +** iRowid: +** Rowid for the current entry. +** +** iPos: +** Position of current entry within row. In the usual ((iCol<<32)+iOff) +** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()). +** +** iIter: +** If the Fts5TokenDataIter iterator that the entry is part of is +** actually an iterator (i.e. with nIter>0, not just a container for +** Fts5TokenDataMap structures), then this variable is an index into +** the apIter[] array. The corresponding term is that which the iterator +** at apIter[iIter] currently points to. +** +** Or, if the Fts5TokenDataIter iterator is just a container object +** (nIter==0), then iIter is an index into the term.p[] buffer where +** the term is stored. +** +** nByte: +** In the case where iIter is an index into term.p[], this variable +** is the size of the term in bytes. If iIter is an index into apIter[], +** this variable is unused. +*/ +struct Fts5TokenDataMap { + i64 iRowid; /* Row this token is located in */ + i64 iPos; /* Position of token */ + int iIter; /* Iterator token was read from */ + int nByte; /* Length of token in bytes (or 0) */ +}; + +/* +** An object used to supplement Fts5Iter for tokendata=1 iterators. +** +** This object serves two purposes. The first is as a container for an array +** of Fts5TokenDataMap structures, which are used to find the token required +** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and +** aMap[] variables. +*/ +struct Fts5TokenDataIter { + int nMapAlloc; /* Allocated size of aMap[] in entries */ + int nMap; /* Number of valid entries in aMap[] */ + Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ + + /* The following are used for prefix-queries only. */ + Fts5Buffer terms; + + /* The following are used for other full-token tokendata queries only. */ + int nIter; + int nIterAlloc; + Fts5PoslistReader *aPoslistReader; + int *aPoslistToIter; + Fts5Iter *apIter[1]; +}; + +/* +** The two input arrays - a1[] and a2[] - are in sorted order. This function +** merges the two arrays together and writes the result to output array +** aOut[]. aOut[] is guaranteed to be large enough to hold the result. +** +** Duplicate entries are copied into the output. So the size of the output +** array is always (n1+n2) entries. +*/ +static void fts5TokendataMerge( + Fts5TokenDataMap *a1, int n1, /* Input array 1 */ + Fts5TokenDataMap *a2, int n2, /* Input array 2 */ + Fts5TokenDataMap *aOut /* Output array */ +){ + int i1 = 0; + int i2 = 0; + + assert( n1>=0 && n2>=0 ); + while( i1=n2 || (i1rc==SQLITE_OK ){ + if( pT->nMap==pT->nMapAlloc ){ + int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; + int nByte = nNew * sizeof(Fts5TokenDataMap); + Fts5TokenDataMap *aNew; + + aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pT->aMap[pT->nMap].iRowid = iRowid; + pT->aMap[pT->nMap].iPos = iPos; + pT->aMap[pT->nMap].iIter = iIter; + pT->aMap[pT->nMap].nByte = nByte; + pT->nMap++; + } +} + +/* +** Sort the contents of the pT->aMap[] array. +** +** The sorting algorithm requries a malloc(). If this fails, an error code +** is left in Fts5Index.rc before returning. +*/ +static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ + Fts5TokenDataMap *aTmp = 0; + int nByte = pT->nMap * sizeof(Fts5TokenDataMap); + + aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( aTmp ){ + Fts5TokenDataMap *a1 = pT->aMap; + Fts5TokenDataMap *a2 = aTmp; + i64 nHalf; + + for(nHalf=1; nHalfnMap; nHalf=nHalf*2){ + int i1; + for(i1=0; i1nMap; i1+=(nHalf*2)){ + int n1 = MIN(nHalf, pT->nMap-i1); + int n2 = MIN(nHalf, pT->nMap-i1-n1); + fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); + } + SWAPVAL(Fts5TokenDataMap*, a1, a2); + } + + if( a1!=pT->aMap ){ + memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); + } + sqlite3_free(aTmp); + +#ifdef SQLITE_DEBUG + { + int ii; + for(ii=1; iinMap; ii++){ + Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; + Fts5TokenDataMap *p2 = &pT->aMap[ii]; + assert( p1->iRowidiRowid + || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos) + ); + } + } +#endif + } +} + +/* +** Delete an Fts5TokenDataIter structure and its contents. +*/ +static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ + if( pSet ){ + int ii; + for(ii=0; iinIter; ii++){ + fts5MultiIterFree(pSet->apIter[ii]); + } + fts5BufferFree(&pSet->terms); + sqlite3_free(pSet->aPoslistReader); + sqlite3_free(pSet->aMap); + sqlite3_free(pSet); + } +} + + +/* +** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() +** to pass data to prefixIterSetupTokendataCb(). +*/ +typedef struct TokendataSetupCtx TokendataSetupCtx; +struct TokendataSetupCtx { + Fts5TokenDataIter *pT; /* Object being populated with mappings */ + int iTermOff; /* Offset of current term in terms.p[] */ + int nTermByte; /* Size of current term in bytes */ +}; + +/* +** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This +** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each +** position in the current position-list. It doesn't matter that some of +** these may be out of order - they will be sorted later. +*/ +static void prefixIterSetupTokendataCb( + Fts5Index *p, + void *pCtx, + Fts5Iter *p1, + const u8 *pNew, + int nNew +){ + TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; + int iPosOff = 0; + i64 iPos = 0; + + if( pNew ){ + pSetup->nTermByte = nNew-1; + pSetup->iTermOff = pSetup->pT->terms.n; + fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1); + } + + while( 0==sqlite3Fts5PoslistNext64( + p1->base.pData, p1->base.nData, &iPosOff, &iPos + ) ){ + fts5TokendataIterAppendMap(p, + pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos + ); + } +} + + /* ** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries(). */ @@ -6287,6 +6529,7 @@ struct PrefixSetupCtx { Fts5Buffer *aBuf; int nBuf; Fts5Buffer doclist; + TokendataSetupCtx *pTokendata; }; /* @@ -6331,6 +6574,10 @@ static void prefixIterSetupCb( ); pSetup->iLastRowid = p1->base.iRowid; } + + if( pSetup->pTokendata ){ + prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew); + } } static void fts5SetupPrefixIter( @@ -6344,11 +6591,21 @@ static void fts5SetupPrefixIter( ){ Fts5Structure *pStruct; PrefixSetupCtx s; + TokendataSetupCtx s2; memset(&s, 0, sizeof(s)); + memset(&s2, 0, sizeof(s2)); + s.nMerge = 1; s.iLastRowid = 0; s.nBuf = 32; + if( iIdx==0 + && p->pConfig->eDetail==FTS5_DETAIL_FULL + && p->pConfig->bPrefixInsttoken + ){ + s.pTokendata = &s2; + s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, sizeof(*s2.pT)); + } if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ s.xMerge = fts5MergeRowidLists; @@ -6400,8 +6657,15 @@ static void fts5SetupPrefixIter( if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n); fts5MultiIterNew2(p, pData, bDesc, ppIter); } + + if( p->rc==SQLITE_OK && s.pTokendata ){ + fts5TokendataIterSortMap(p, s2.pT); + (*ppIter)->pTokenDataIter = s2.pT; + s2.pT = 0; + } } + fts5TokendataIterDelete(s2.pT); fts5BufferFree(&s.doclist); fts5StructureRelease(pStruct); sqlite3_free(s.aBuf); @@ -6658,71 +6922,6 @@ static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ pSeg->pLeaf = 0; } -/* -** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an -** array of these for each row it visits (so all iRowid fields are the same). -** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an -** array of these for the entire query (in which case iRowid fields may take -** a variety of values). -** -** Each instance in the array indicates the iterator (and therefore term) -** associated with position iPos of rowid iRowid. This is used by the -** xInstToken() API. -** -** iRowid: -** Rowid for the current entry. -** -** iPos: -** Position of current entry within row. In the usual ((iCol<<32)+iOff) -** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()). -** -** iIter: -** If the Fts5TokenDataIter iterator that the entry is part of is -** actually an iterator (i.e. with nIter>0, not just a container for -** Fts5TokenDataMap structures), then this variable is an index into -** the apIter[] array. The corresponding term is that which the iterator -** at apIter[iIter] currently points to. -** -** Or, if the Fts5TokenDataIter iterator is just a container object -** (nIter==0), then iIter is an index into the term.p[] buffer where -** the term is stored. -** -** nByte: -** In the case where iIter is an index into term.p[], this variable -** is the size of the term in bytes. If iIter is an index into apIter[], -** this variable is unused. -*/ -struct Fts5TokenDataMap { - i64 iRowid; /* Row this token is located in */ - i64 iPos; /* Position of token */ - int iIter; /* Iterator token was read from */ - int nByte; /* Length of token in bytes (or 0) */ -}; - -/* -** An object used to supplement Fts5Iter for tokendata=1 iterators. -** -** This object serves two purposes. The first is as a container for an array -** of Fts5TokenDataMap structures, which are used to find the token required -** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and -** aMap[] variables. -*/ -struct Fts5TokenDataIter { - int nMapAlloc; /* Allocated size of aMap[] in entries */ - int nMap; /* Number of valid entries in aMap[] */ - Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ - - /* The following are used for prefix-queries only. */ - Fts5Buffer terms; - - /* The following are used for other full-token tokendata queries only. */ - int nIter; - int nIterAlloc; - Fts5PoslistReader *aPoslistReader; - int *aPoslistToIter; - Fts5Iter *apIter[1]; -}; - /* ** This function appends iterator pAppend to Fts5TokenDataIter pIn and ** returns the result. @@ -6759,57 +6958,6 @@ static Fts5TokenDataIter *fts5AppendTokendataIter( return pRet; } -/* -** Delete an Fts5TokenDataIter structure and its contents. -*/ -static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ - if( pSet ){ - int ii; - for(ii=0; iinIter; ii++){ - fts5MultiIterFree(pSet->apIter[ii]); - } - fts5BufferFree(&pSet->terms); - sqlite3_free(pSet->aPoslistReader); - sqlite3_free(pSet->aMap); - sqlite3_free(pSet); - } -} - -/* -** Append a mapping to the token-map belonging to object pT. -*/ -static void fts5TokendataIterAppendMap( - Fts5Index *p, - Fts5TokenDataIter *pT, - int iIter, - int nByte, - i64 iRowid, - i64 iPos -){ - if( p->rc==SQLITE_OK ){ - if( pT->nMap==pT->nMapAlloc ){ - int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; - int nByte = nNew * sizeof(Fts5TokenDataMap); - Fts5TokenDataMap *aNew; - - aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - return; - } - - pT->aMap = aNew; - pT->nMapAlloc = nNew; - } - - pT->aMap[pT->nMap].iRowid = iRowid; - pT->aMap[pT->nMap].iPos = iPos; - pT->aMap[pT->nMap].iIter = iIter; - pT->aMap[pT->nMap].nByte = nByte; - pT->nMap++; - } -} - /* ** The iterator passed as the only argument must be a tokendata=1 iterator ** (pIter->pTokenDataIter!=0). This function sets the iterator output @@ -7285,127 +7433,6 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ return (z ? &z[1] : 0); } -/* -** The two input arrays - a1[] and a2[] - are in sorted order. This function -** merges the two arrays together and writes the result to output array -** aOut[]. aOut[] is guaranteed to be large enough to hold the result. -** -** Duplicate entries are copied into the output. So the size of the output -** array is always (n1+n2) entries. -*/ -static void fts5TokendataMerge( - Fts5TokenDataMap *a1, int n1, /* Input array 1 */ - Fts5TokenDataMap *a2, int n2, /* Input array 2 */ - Fts5TokenDataMap *aOut /* Output array */ -){ - int i1 = 0; - int i2 = 0; - - assert( n1>=0 && n2>=0 ); - while( i1=n2 || (i1aMap[] array. -** -** The sorting algorithm requries a malloc(). If this fails, an error code -** is left in Fts5Index.rc before returning. -*/ -static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ - Fts5TokenDataMap *aTmp = 0; - int nByte = pT->nMap * sizeof(Fts5TokenDataMap); - - aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); - if( aTmp ){ - Fts5TokenDataMap *a1 = pT->aMap; - Fts5TokenDataMap *a2 = aTmp; - i64 nHalf; - - for(nHalf=1; nHalfnMap; nHalf=nHalf*2){ - int i1; - for(i1=0; i1nMap; i1+=(nHalf*2)){ - int n1 = MIN(nHalf, pT->nMap-i1); - int n2 = MIN(nHalf, pT->nMap-i1-n1); - fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); - } - SWAPVAL(Fts5TokenDataMap*, a1, a2); - } - - if( a1!=pT->aMap ){ - memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); - } - sqlite3_free(aTmp); - -#ifdef SQLITE_DEBUG - { - int ii; - for(ii=1; iinMap; ii++){ - Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; - Fts5TokenDataMap *p2 = &pT->aMap[ii]; - assert( p1->iRowidiRowid - || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos) - ); - } - } -#endif - } -} - -/* -** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() -** to pass data to prefixIterSetupTokendataCb(). -*/ -typedef struct TokendataSetupCtx TokendataSetupCtx; -struct TokendataSetupCtx { - Fts5TokenDataIter *pT; /* Object being populated with mappings */ - int iTermOff; /* Offset of current term in terms.p[] */ - int nTermByte; /* Size of current term in bytes */ -}; - -/* -** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This -** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each -** position in the current position-list. It doesn't matter that some of -** these may be out of order - they will be sorted later. -*/ -static void prefixIterSetupTokendataCb( - Fts5Index *p, - void *pCtx, - Fts5Iter *p1, - const u8 *pNew, - int nNew -){ - TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; - int iPosOff = 0; - i64 iPos = 0; - - if( pNew ){ - pSetup->nTermByte = nNew-1; - pSetup->iTermOff = pSetup->pT->terms.n; - fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1); - } - - while( 0==sqlite3Fts5PoslistNext64( - p1->base.pData, p1->base.nData, &iPosOff, &iPos - ) ){ - fts5TokendataIterAppendMap(p, - pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos - ); - } -} - /* ** pIter is a prefix query. This function populates pIter->pTokenDataIter ** with an Fts5TokenDataIter object containing mappings for all rows diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 5713fccdd1..a65750f8e1 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -93,6 +93,7 @@ struct Fts5Global { #define FTS5_LOCALE_HDR_SIZE ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) #define FTS5_LOCALE_HDR(pConfig) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) +#define FTS5_INSTTOKEN_SUBTYPE 73 /* ** Each auxiliary function registered with the FTS5 module is represented @@ -1417,6 +1418,7 @@ static int fts5FilterMethod( sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ int iCol; /* Column on LHS of MATCH operator */ char **pzErrmsg = pConfig->pzErrmsg; + int bPrefixInsttoken = pConfig->bPrefixInsttoken; int i; int iIdxStr = 0; Fts5Expr *pExpr = 0; @@ -1452,6 +1454,9 @@ static int fts5FilterMethod( rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset); if( rc!=SQLITE_OK ) goto filter_out; if( zText==0 ) zText = ""; + if( sqlite3_value_subtype(apVal[i])==FTS5_INSTTOKEN_SUBTYPE ){ + pConfig->bPrefixInsttoken = 1; + } iCol = 0; do{ @@ -1592,6 +1597,7 @@ static int fts5FilterMethod( filter_out: sqlite3Fts5ExprFree(pExpr); pConfig->pzErrmsg = pzErrmsg; + pConfig->bPrefixInsttoken = bPrefixInsttoken; return rc; } @@ -3651,6 +3657,19 @@ static void fts5LocaleFunc( } } +/* +** Implementation of fts5_insttoken() function. +*/ +static void fts5InsttokenFunc( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apArg /* Function arguments */ +){ + assert( nArg==1 ); + sqlite3_result_value(pCtx, apArg[0]); + sqlite3_result_subtype(pCtx, FTS5_INSTTOKEN_SUBTYPE); +} + /* ** Return true if zName is the extension on one of the shadow tables used ** by this module. @@ -3780,10 +3799,17 @@ static int fts5Init(sqlite3 *db){ if( rc==SQLITE_OK ){ rc = sqlite3_create_function( db, "fts5_locale", 2, - SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE, + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE|SQLITE_SUBTYPE, p, fts5LocaleFunc, 0, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + db, "fts5_insttoken", 1, + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE, + p, fts5InsttokenFunc, 0, 0 + ); + } } /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 9741f786e8..be77cbfca5 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -22,34 +22,40 @@ ifcapable !fts5 { } foreach_detail_mode $testprefix { +foreach {tn insttoken} { + 1 0 + 2 1 +} { +reset_db sqlite3_fts5_register_origintext db -do_execsql_test 1.0 { +do_execsql_test $tn.1.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); } -do_execsql_test 1.1 { +do_execsql_test $tn.1.1 { INSERT INTO ft VALUES('Hello world'); } -do_execsql_test 1.2 { +do_execsql_test $tn.1.2 { INSERT INTO ft(ft) VALUES('integrity-check'); } proc b {x} { string map [list "\0" "."] $x } db func b b -do_execsql_test 1.3 { +do_execsql_test $tn.1.3 { select b(term) from vocab; } { hello.Hello world } -do_execsql_test 1.4 { +do_execsql_test $tn.1.4 { SELECT rowid FROM ft('Hello'); } {1} @@ -88,33 +94,34 @@ proc document {} { db func document document sqlite3_fts5_register_origintext db -do_execsql_test 2.0 { +do_execsql_test $tn.2.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft(ft, rank) VALUES('pgsz', 128); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); } -do_test 2.1 { +do_test $tn.2.1 { for {set ii 0} {$ii < 500} {incr ii} { execsql { INSERT INTO ft VALUES( document() ) } } } {} -do_execsql_test 2.2 { +do_execsql_test $tn.2.2 { INSERT INTO ft(ft) VALUES('integrity-check'); } -do_execsql_test 2.3 { +do_execsql_test $tn.2.3 { INSERT INTO ft(ft, rank) VALUES('merge', 16); } -do_execsql_test 2.4 { +do_execsql_test $tn.2.4 { INSERT INTO ft(ft) VALUES('integrity-check'); } -do_execsql_test 2.5 { +do_execsql_test $tn.2.5 { INSERT INTO ft(ft) VALUES('optimize'); } @@ -122,10 +129,11 @@ do_execsql_test 2.5 { reset_db sqlite3_fts5_register_origintext db -do_execsql_test 3.0 { +do_execsql_test $tn.3.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); INSERT INTO ft(rowid, x) VALUES(1, 'hello'); @@ -137,16 +145,17 @@ do_execsql_test 3.0 { #db func b b #execsql_pp { SELECT b(term) FROM vocab } -do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1 -do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2 -do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 +do_execsql_test $tn.3.1.1 { SELECT rowid FROM ft('hello') } 1 +do_execsql_test $tn.3.1.2 { SELECT rowid FROM ft('Hello') } 2 +do_execsql_test $tn.3.1.3 { SELECT rowid FROM ft('HELLO') } 3 -do_execsql_test 3.2 { +do_execsql_test $tn.3.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% ); + INSERT INTO ft2(ft2, rank) VALUES('insttoken', $insttoken); CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance); INSERT INTO ft2(rowid, x) VALUES(1, 'hello'); @@ -160,16 +169,16 @@ do_execsql_test 3.2 { #db func b b #execsql_pp { SELECT b(term) FROM vocab } -do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3} -do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} -do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} +do_execsql_test $tn.3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3} +do_execsql_test $tn.3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} +do_execsql_test $tn.3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} -do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} +do_execsql_test $tn.3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} -do_execsql_test 3.3.5.1 { SELECT rowid FROM ft2('HELLO') ORDER BY rowid DESC} { +do_execsql_test $tn.3.3.5.1 { SELECT rowid FROM ft2('HELLO') ORDER BY rowid DESC} { 3 2 1 } -do_execsql_test 3.3.5.2 { SELECT rowid FROM ft2('HELLO') ORDER BY +rowid DESC} { +do_execsql_test $tn.3.3.5.2 { SELECT rowid FROM ft2('HELLO') ORDER BY +rowid DESC} { 3 2 1 } @@ -183,36 +192,37 @@ proc querytoken {cmd iPhrase iToken} { } sqlite3_fts5_create_function db querytoken querytoken -do_execsql_test 4.0 { +do_execsql_test $tn.4.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft VALUES('one two three four'); } -do_execsql_test 4.1 { +do_execsql_test $tn.4.1 { SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO') } {1 two.TwO} -do_execsql_test 4.2 { +do_execsql_test $tn.4.2 { SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE') } {1 one} -do_execsql_test 4.3 { +do_execsql_test $tn.4.3 { SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE') } {1 two.TWO} if {"%DETAIL%"=="full"} { # Phrase queries are only supported for detail=full. # - do_execsql_test 4.4 { + do_execsql_test $tn.4.4 { SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"') } {1 three.ThreE} - do_catchsql_test 4.5 { + do_catchsql_test $tn.4.5 { SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} - do_catchsql_test 4.6 { + do_catchsql_test $tn.4.6 { SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} - do_catchsql_test 4.7 { + do_catchsql_test $tn.4.7 { SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} } @@ -228,14 +238,15 @@ proc insttoken {cmd iIdx iToken} { sqlite3_fts5_create_function db insttoken insttoken fts5_aux_test_functions db -do_execsql_test 5.0 { +do_execsql_test $tn.5.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft VALUES('one ONE One oNe oNE one'); } -do_execsql_test 5.1 { +do_execsql_test $tn.5.1 { SELECT insttoken(ft, 0, 0), insttoken(ft, 1, 0), insttoken(ft, 2, 0), @@ -247,13 +258,37 @@ do_execsql_test 5.1 { one one.ONE one.One one.oNe one.oNE one } -do_execsql_test 5.2 { +do_execsql_test $tn.5.2 { + SELECT insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0), + insttoken(ft, 3, 0), + insttoken(ft, 4, 0), + insttoken(ft, 5, 0) + FROM ft('on*'); +} { + one one.ONE one.One one.oNe one.oNE one +} + +do_execsql_test $tn.5.3 { + SELECT insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0), + insttoken(ft, 3, 0), + insttoken(ft, 4, 0), + insttoken(ft, 5, 0) + FROM ft(fts5_insttoken('on*')); +} { + one one.ONE one.One one.oNe one.oNE one +} + +do_execsql_test $tn.5.4 { SELECT insttoken(ft, 1, 0) FROM ft('one'); } { one.ONE } -do_execsql_test 5.3 { +do_execsql_test $tn.5.5 { SELECT fts5_test_poslist(ft) FROM ft('one'); } { {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5} @@ -267,10 +302,11 @@ do_execsql_test 5.3 { # reset_db sqlite3_fts5_register_origintext db -do_execsql_test 6.0 { +do_execsql_test $tn.6.0 { CREATE VIRTUAL TABLE ft USING fts5( x, y, tokenize='origintext unicode61', detail=%DETAIL%, tokendata=0 ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft VALUES('One Two', 'Three two'); INSERT INTO ft VALUES('three Three', 'one One'); @@ -286,34 +322,35 @@ proc tokens {cmd} { } sqlite3_fts5_create_function db tokens tokens -do_execsql_test 6.1 { +do_execsql_test $tn.6.1 { SELECT rowid, tokens(ft) FROM ft('One'); } {1 one.One 2 one.One} -do_execsql_test 6.2 { +do_execsql_test $tn.6.2 { SELECT rowid, tokens(ft) FROM ft('on*'); } {1 one.One 2 {one one.One}} -do_execsql_test 6.3 { +do_execsql_test $tn.6.3 { SELECT rowid, tokens(ft) FROM ft('Three*'); } {1 three.Three 2 three.Three} fts5_aux_test_functions db -do_catchsql_test 6.4 { +do_catchsql_test $tn.6.4 { SELECT fts5_test_insttoken(ft, -1, 0) FROM ft('one'); } {1 SQLITE_RANGE} -do_catchsql_test 6.5 { +do_catchsql_test $tn.6.5 { SELECT fts5_test_insttoken(ft, 1, 0) FROM ft('one'); } {1 SQLITE_RANGE} -do_catchsql_test 6.6 { +do_catchsql_test $tn.6.6 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokendata=2); } {1 {malformed tokendata=... directive}} -do_catchsql_test 6.7 { +do_catchsql_test $tn.6.7 { CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', tokendata=11); } {1 {malformed tokendata=... directive}} +} } finish_test diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test index a4bca0de9b..351ab1f617 100644 --- a/ext/fts5/test/fts5origintext3.test +++ b/ext/fts5/test/fts5origintext3.test @@ -22,6 +22,11 @@ ifcapable !fts5 { } foreach_detail_mode $testprefix { + foreach {tn insttoken} { + 1 0 + 2 1 + } { + reset_db sqlite3_fts5_register_origintext db @@ -32,21 +37,25 @@ foreach_detail_mode $testprefix { } sqlite3_fts5_create_function db insttoken insttoken - do_execsql_test 1.0 { + do_execsql_test $tn.1.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% ); } + + do_execsql_test $tn.1.0.1 { + INSERT INTO ft(ft, rank) VALUES('insttoken', 1); + } - do_execsql_test 1.1 { + do_execsql_test $tn.1.1 { INSERT INTO ft VALUES('Hello world HELLO WORLD hello'); } - do_execsql_test 1.2 { + do_execsql_test $tn.1.2 { SELECT fts5_test_poslist(ft) FROM ft('hello'); } {{0.0.0 0.0.2 0.0.4}} - do_execsql_test 1.3 { + do_execsql_test $tn.1.3 { SELECT insttoken(ft, 0, 0), insttoken(ft, 1, 0), @@ -54,7 +63,15 @@ foreach_detail_mode $testprefix { FROM ft('hello'); } {hello.Hello hello.HELLO hello} - do_execsql_test 1.4 { + do_execsql_test $tn.1.3.1 { + SELECT + insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0) + FROM ft('hel*'); + } {hello.Hello hello.HELLO hello} + + do_execsql_test $tn.1.4 { SELECT insttoken(ft, 0, 0), insttoken(ft, 1, 0), @@ -62,7 +79,7 @@ foreach_detail_mode $testprefix { FROM ft('hello') ORDER BY rank; } {hello.Hello hello.HELLO hello} - do_execsql_test 1.5 { + do_execsql_test $tn.1.5 { CREATE VIRTUAL TABLE ft2 USING fts5( x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% ); @@ -71,11 +88,11 @@ foreach_detail_mode $testprefix { INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE'); } - do_execsql_test 1.6 { + do_execsql_test $tn.1.6 { SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank; } {three.THREE 3 three 1 three 2} - do_execsql_test 1.7 { + do_execsql_test $tn.1.7 { INSERT INTO ft2(rowid, x) VALUES(10, 'aaa bbb BBB'); INSERT INTO ft2(rowid, x) VALUES(12, 'bbb bbb bbb'); INSERT INTO ft2(rowid, x) VALUES(13, 'bbb bbb bbb'); @@ -92,16 +109,16 @@ foreach_detail_mode $testprefix { INSERT INTO ft2(rowid, x) VALUES(24, 'aaa bbb BBB'); } - do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24} - do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24} + do_execsql_test $tn.1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24} + do_execsql_test $tn.1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24} - do_execsql_test 2.0 { + do_execsql_test $tn.2.0 { CREATE VIRTUAL TABLE ft3 USING fts5( x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%, prefix=2 ); } - do_execsql_test 2.1 { + do_execsql_test $tn.2.1 { INSERT INTO ft3(rowid, x) VALUES(1, 'one'); INSERT INTO ft3(rowid, x) VALUES(2, 'ONE'); INSERT INTO ft3(rowid, x) VALUES(3, 'ONT'); @@ -109,15 +126,15 @@ foreach_detail_mode $testprefix { INSERT INTO ft3(rowid, x) VALUES(5, 'On'); } - do_execsql_test 2.2 { + do_execsql_test $tn.2.2 { SELECT rowid FROM ft3('on*'); } {1 2 3 4 5} - do_execsql_test 2.3 { + do_execsql_test $tn.2.3 { SELECT rowid, insttoken(ft3, 0, 0) FROM ft3('on*'); } {1 one 2 one.ONE 3 ont.ONT 4 on 5 on.On} - + } } finish_test diff --git a/manifest b/manifest index 2bd0c9cf8d..603ced6491 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\sinto\sthis\sbranch. -D 2024-11-01T19:41:22.452 +C Allow\san\sfts5\stable\sor\squery\sto\sbe\sconfigured\sto\scollect\sxInstToken\sdata\sfor\sany\sprefix\sterms\sas\spart\sof\sthe\sfirst\sparse\sof\sthe\smain\sindex,\sif\sany. +D 2024-11-02T19:10:50.264 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md c5b4009dca54d127d2d6033c22fd9cc34f53bedb6ef12c7cbaa468381c74ab28 @@ -49,7 +49,7 @@ F autosetup/cc.tcl 7e2fe943ae9d45cf39e9f5b05b6230df8e719415edea5af06c30eb68680bd F autosetup/default.auto 5cdf016de2140e50f1db190a02039dc42fb390af1dda4cc4853e3042a9ef0e82 F autosetup/jimsh0.c 27ea5f221359ef6c58780fc6c185aadbf8d3bee9a021331a3e5de0eba0dc6de6 F autosetup/pkg-config.tcl 4e635bf39022ff65e0d5434339dd41503ea48fc53822c9c5bde88b02d3d952ba -F autosetup/proj.tcl 68362ca12e1a32fe73ece32b59a8e8e02a3983295f2dd82c9c4700507bade02c w autosetup/hwaci-common.tcl +F autosetup/proj.tcl 68362ca12e1a32fe73ece32b59a8e8e02a3983295f2dd82c9c4700507bade02c F autosetup/system.tcl 3a39d6e0b3bfba526fd39afe07c1d0d325e5a31925013a1ba7c671e1128e31bb F autosetup/tmake.auto eaebc74ad538dfdd3c817c27eefc31930c20510c4f3a3704071f6cb0629ed71f F autosetup/tmake.tcl a275793ec1b6f8708179af0acef1f6f10d46c2920739743f7a8720c6d700c7a9 @@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7e F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl 009cf59c77afa86d137b0cca3e3b1a5efbe2264faa2df233f9a7aa8563926d15 F ext/fts5/fts5.h 6b4b92df890965567360db5f1ead24fd13a72cb23b95e4ed2ff58d1d89f7aa42 -F ext/fts5/fts5Int.h a282f33a260ddce09dc2b0334d41d83aab0893b2b1656eb83c595a3d0eec2975 +F ext/fts5/fts5Int.h 6abff7dd770dc5969c994c281e6e77fc277ce414d56cc4a62c145cc7036b0b67 F ext/fts5/fts5_aux.c 65a0468dd177d6093aa9ae1622e6d86b0136b8d267c62c0ad6493ad1e9a3d759 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 -F ext/fts5/fts5_config.c a6633d88596758941c625b526075b85d3d9fd1089d8d9eab5db6e8a71fd347ad +F ext/fts5/fts5_config.c e7d8dd062b44a66cd77e5a0f74f23a2354cd1f3f8575afb967b2773c3384f7f8 F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c ee650a838fc0591776f7582de578f414959a76cc0118851e4c1f7d13e7365379 -F ext/fts5/fts5_main.c 50eb059e51d730e8e0c77df4e568b018079e112a755c094488b0d5b1aa06afbb +F ext/fts5/fts5_index.c 2cef40d6fdd761229dd4127e0b1ddcb61dfd6a4ac7e73653b7fddbe0075e50be +F ext/fts5/fts5_main.c b2ec6bf97fc378906c0e78c61f10ca8e64f15e03237f2521f7d81736983be378 F ext/fts5/fts5_storage.c 337b05e4c66fc822d031e264d65bde807ec2fab08665ca2cc8aaf9c5fa06792c F ext/fts5/fts5_tcl.c 5b16a249962809b2aaaab964bf58838ea72f30b8b12373cafe612f8cc71e2a40 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -214,9 +214,9 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 264b9101721c17d06d1d174feb743fda3ddc89fad41dee980fef821428258e47 F ext/fts5/test/fts5optimize2.test 795d4ae5f66a7239cf8d5aef4c2ea96aeb8bcd907bd9be0cfe22064fc71a44ed F ext/fts5/test/fts5optimize3.test 1653029284e10e0715246819893ba30565c4ead0d0fc470adae92c353ea857d3 -F ext/fts5/test/fts5origintext.test 63d5b0dc00f0104add8960da0705a70bffd4d86b6feb6ddbb38bff21141d42f0 +F ext/fts5/test/fts5origintext.test 3b73aa036ce5244bb7c5782c5441b979585bdca026accf75d16026a2a8119c09 F ext/fts5/test/fts5origintext2.test f4505ff79bf7369f2b8b10b9cef7476049d844e20b37f29cad3a8b8d5ac6f9ba -F ext/fts5/test/fts5origintext3.test 1f5174a9f4cf42f58f833dbfb314940793ca4723854ec2651e7530ddb35a66a6 +F ext/fts5/test/fts5origintext3.test 4988b6375acc3bbb0515667765f57e389caf449814af9c1095c053f7de2b4223 F ext/fts5/test/fts5origintext4.test 0d3ef0a8038f471dbc83001c34fe5f7ae39b571bfc209670771eb28bc0fc50e8 F ext/fts5/test/fts5origintext5.test ee12b440ec335e5b422d1668aca0051b52ff28b6ee67073e8bbc29f509fd562b F ext/fts5/test/fts5phrase.test bb2554bb61d15f859678c96dc89a7de415cd5fc3b7b54c29b82a0d0ad138091c @@ -2199,8 +2199,8 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bce9a524de6dda87daa90395cd55713d2d3ccfc090e53a947548e434db5eef5e 2dcc465a7090811ddbc210673f37b4c3a4439c501874abefd403efe1e98f5b17 -R 86617b1841d68e4c10c087c197cdd65c +P 790c56d493c66a2136e24d349d169639809d70bfab6996975a403be568a267a5 +R 71ba4975c4c76073cda6dd2f314d94d1 U dan -Z c01ad82d76d71ffe9e5032c32219c470 +Z 95da39a03d7bb4b9bc58c6dbf7b809e5 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 05f1de1961..94e1307210 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -790c56d493c66a2136e24d349d169639809d70bfab6996975a403be568a267a5 +46929ae92b26f02bc70de9931b21a8a7cf9a2453d5fb07f68b712f62e28e9152 -- 2.47.2