From: dan Date: Sat, 2 Nov 2024 19:10:50 +0000 (+0000) Subject: Allow an fts5 table or query to be configured to collect xInstToken data for any... X-Git-Tag: major-relase~109^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=090b8649be3603920327a0ae45c2cf9384d8615b;p=thirdparty%2Fsqlite.git Allow an fts5 table or query to be configured to collect xInstToken data for any prefix terms as part of the first parse of the main index, if any. FossilOrigin-Name: 46929ae92b26f02bc70de9931b21a8a7cf9a2453d5fb07f68b712f62e28e9152 --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index bb72f45c7b..832f9ad477 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -247,7 +247,8 @@ struct Fts5Config { char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ int bSecureDelete; /* 'secure-delete' */ - int nDeleteMerge; /* 'deletemerge' */ + int nDeleteMerge; /* 'deletemerge' */ + int bPrefixInsttoken; /* 'prefix-insttoken' */ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index a674b44d0b..eea82b046d 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -1026,6 +1026,19 @@ int sqlite3Fts5ConfigSetValue( }else{ pConfig->bSecureDelete = (bVal ? 1 : 0); } + } + + else if( 0==sqlite3_stricmp(zKey, "insttoken") ){ + int bVal = -1; + if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ + bVal = sqlite3_value_int(pVal); + } + if( bVal<0 ){ + *pbBadkey = 1; + }else{ + pConfig->bPrefixInsttoken = (bVal ? 1 : 0); + } + }else{ *pbBadkey = 1; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 04f0a6740c..e7028e411c 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6260,6 +6260,7 @@ static int fts5VisitEntries( const u8 *pNew = 0; p1->xSetOutputs(p1, pSeg); + if( p->rc ) break; if( bNewTerm ){ nNew = pSeg->term.n; @@ -6275,6 +6276,247 @@ static int fts5VisitEntries( return p->rc; } + +/* +** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an +** array of these for each row it visits (so all iRowid fields are the same). +** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an +** array of these for the entire query (in which case iRowid fields may take +** a variety of values). +** +** Each instance in the array indicates the iterator (and therefore term) +** associated with position iPos of rowid iRowid. This is used by the +** xInstToken() API. +** +** iRowid: +** Rowid for the current entry. +** +** iPos: +** Position of current entry within row. In the usual ((iCol<<32)+iOff) +** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()). +** +** iIter: +** If the Fts5TokenDataIter iterator that the entry is part of is +** actually an iterator (i.e. with nIter>0, not just a container for +** Fts5TokenDataMap structures), then this variable is an index into +** the apIter[] array. The corresponding term is that which the iterator +** at apIter[iIter] currently points to. +** +** Or, if the Fts5TokenDataIter iterator is just a container object +** (nIter==0), then iIter is an index into the term.p[] buffer where +** the term is stored. +** +** nByte: +** In the case where iIter is an index into term.p[], this variable +** is the size of the term in bytes. If iIter is an index into apIter[], +** this variable is unused. +*/ +struct Fts5TokenDataMap { + i64 iRowid; /* Row this token is located in */ + i64 iPos; /* Position of token */ + int iIter; /* Iterator token was read from */ + int nByte; /* Length of token in bytes (or 0) */ +}; + +/* +** An object used to supplement Fts5Iter for tokendata=1 iterators. +** +** This object serves two purposes. The first is as a container for an array +** of Fts5TokenDataMap structures, which are used to find the token required +** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and +** aMap[] variables. +*/ +struct Fts5TokenDataIter { + int nMapAlloc; /* Allocated size of aMap[] in entries */ + int nMap; /* Number of valid entries in aMap[] */ + Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ + + /* The following are used for prefix-queries only. */ + Fts5Buffer terms; + + /* The following are used for other full-token tokendata queries only. */ + int nIter; + int nIterAlloc; + Fts5PoslistReader *aPoslistReader; + int *aPoslistToIter; + Fts5Iter *apIter[1]; +}; + +/* +** The two input arrays - a1[] and a2[] - are in sorted order. This function +** merges the two arrays together and writes the result to output array +** aOut[]. aOut[] is guaranteed to be large enough to hold the result. +** +** Duplicate entries are copied into the output. So the size of the output +** array is always (n1+n2) entries. +*/ +static void fts5TokendataMerge( + Fts5TokenDataMap *a1, int n1, /* Input array 1 */ + Fts5TokenDataMap *a2, int n2, /* Input array 2 */ + Fts5TokenDataMap *aOut /* Output array */ +){ + int i1 = 0; + int i2 = 0; + + assert( n1>=0 && n2>=0 ); + while( i1=n2 || (i1rc==SQLITE_OK ){ + if( pT->nMap==pT->nMapAlloc ){ + int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; + int nByte = nNew * sizeof(Fts5TokenDataMap); + Fts5TokenDataMap *aNew; + + aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pT->aMap[pT->nMap].iRowid = iRowid; + pT->aMap[pT->nMap].iPos = iPos; + pT->aMap[pT->nMap].iIter = iIter; + pT->aMap[pT->nMap].nByte = nByte; + pT->nMap++; + } +} + +/* +** Sort the contents of the pT->aMap[] array. +** +** The sorting algorithm requries a malloc(). If this fails, an error code +** is left in Fts5Index.rc before returning. +*/ +static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ + Fts5TokenDataMap *aTmp = 0; + int nByte = pT->nMap * sizeof(Fts5TokenDataMap); + + aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( aTmp ){ + Fts5TokenDataMap *a1 = pT->aMap; + Fts5TokenDataMap *a2 = aTmp; + i64 nHalf; + + for(nHalf=1; nHalfnMap; nHalf=nHalf*2){ + int i1; + for(i1=0; i1nMap; i1+=(nHalf*2)){ + int n1 = MIN(nHalf, pT->nMap-i1); + int n2 = MIN(nHalf, pT->nMap-i1-n1); + fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); + } + SWAPVAL(Fts5TokenDataMap*, a1, a2); + } + + if( a1!=pT->aMap ){ + memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); + } + sqlite3_free(aTmp); + +#ifdef SQLITE_DEBUG + { + int ii; + for(ii=1; iinMap; ii++){ + Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; + Fts5TokenDataMap *p2 = &pT->aMap[ii]; + assert( p1->iRowidiRowid + || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos) + ); + } + } +#endif + } +} + +/* +** Delete an Fts5TokenDataIter structure and its contents. +*/ +static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ + if( pSet ){ + int ii; + for(ii=0; iinIter; ii++){ + fts5MultiIterFree(pSet->apIter[ii]); + } + fts5BufferFree(&pSet->terms); + sqlite3_free(pSet->aPoslistReader); + sqlite3_free(pSet->aMap); + sqlite3_free(pSet); + } +} + + +/* +** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() +** to pass data to prefixIterSetupTokendataCb(). +*/ +typedef struct TokendataSetupCtx TokendataSetupCtx; +struct TokendataSetupCtx { + Fts5TokenDataIter *pT; /* Object being populated with mappings */ + int iTermOff; /* Offset of current term in terms.p[] */ + int nTermByte; /* Size of current term in bytes */ +}; + +/* +** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This +** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each +** position in the current position-list. It doesn't matter that some of +** these may be out of order - they will be sorted later. +*/ +static void prefixIterSetupTokendataCb( + Fts5Index *p, + void *pCtx, + Fts5Iter *p1, + const u8 *pNew, + int nNew +){ + TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; + int iPosOff = 0; + i64 iPos = 0; + + if( pNew ){ + pSetup->nTermByte = nNew-1; + pSetup->iTermOff = pSetup->pT->terms.n; + fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1); + } + + while( 0==sqlite3Fts5PoslistNext64( + p1->base.pData, p1->base.nData, &iPosOff, &iPos + ) ){ + fts5TokendataIterAppendMap(p, + pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos + ); + } +} + + /* ** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries(). */ @@ -6287,6 +6529,7 @@ struct PrefixSetupCtx { Fts5Buffer *aBuf; int nBuf; Fts5Buffer doclist; + TokendataSetupCtx *pTokendata; }; /* @@ -6331,6 +6574,10 @@ static void prefixIterSetupCb( ); pSetup->iLastRowid = p1->base.iRowid; } + + if( pSetup->pTokendata ){ + prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew); + } } static void fts5SetupPrefixIter( @@ -6344,11 +6591,21 @@ static void fts5SetupPrefixIter( ){ Fts5Structure *pStruct; PrefixSetupCtx s; + TokendataSetupCtx s2; memset(&s, 0, sizeof(s)); + memset(&s2, 0, sizeof(s2)); + s.nMerge = 1; s.iLastRowid = 0; s.nBuf = 32; + if( iIdx==0 + && p->pConfig->eDetail==FTS5_DETAIL_FULL + && p->pConfig->bPrefixInsttoken + ){ + s.pTokendata = &s2; + s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, sizeof(*s2.pT)); + } if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ s.xMerge = fts5MergeRowidLists; @@ -6400,8 +6657,15 @@ static void fts5SetupPrefixIter( if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n); fts5MultiIterNew2(p, pData, bDesc, ppIter); } + + if( p->rc==SQLITE_OK && s.pTokendata ){ + fts5TokendataIterSortMap(p, s2.pT); + (*ppIter)->pTokenDataIter = s2.pT; + s2.pT = 0; + } } + fts5TokendataIterDelete(s2.pT); fts5BufferFree(&s.doclist); fts5StructureRelease(pStruct); sqlite3_free(s.aBuf); @@ -6658,71 +6922,6 @@ static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ pSeg->pLeaf = 0; } -/* -** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an -** array of these for each row it visits (so all iRowid fields are the same). -** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an -** array of these for the entire query (in which case iRowid fields may take -** a variety of values). -** -** Each instance in the array indicates the iterator (and therefore term) -** associated with position iPos of rowid iRowid. This is used by the -** xInstToken() API. -** -** iRowid: -** Rowid for the current entry. -** -** iPos: -** Position of current entry within row. In the usual ((iCol<<32)+iOff) -** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()). -** -** iIter: -** If the Fts5TokenDataIter iterator that the entry is part of is -** actually an iterator (i.e. with nIter>0, not just a container for -** Fts5TokenDataMap structures), then this variable is an index into -** the apIter[] array. The corresponding term is that which the iterator -** at apIter[iIter] currently points to. -** -** Or, if the Fts5TokenDataIter iterator is just a container object -** (nIter==0), then iIter is an index into the term.p[] buffer where -** the term is stored. -** -** nByte: -** In the case where iIter is an index into term.p[], this variable -** is the size of the term in bytes. If iIter is an index into apIter[], -** this variable is unused. -*/ -struct Fts5TokenDataMap { - i64 iRowid; /* Row this token is located in */ - i64 iPos; /* Position of token */ - int iIter; /* Iterator token was read from */ - int nByte; /* Length of token in bytes (or 0) */ -}; - -/* -** An object used to supplement Fts5Iter for tokendata=1 iterators. -** -** This object serves two purposes. The first is as a container for an array -** of Fts5TokenDataMap structures, which are used to find the token required -** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and -** aMap[] variables. -*/ -struct Fts5TokenDataIter { - int nMapAlloc; /* Allocated size of aMap[] in entries */ - int nMap; /* Number of valid entries in aMap[] */ - Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ - - /* The following are used for prefix-queries only. */ - Fts5Buffer terms; - - /* The following are used for other full-token tokendata queries only. */ - int nIter; - int nIterAlloc; - Fts5PoslistReader *aPoslistReader; - int *aPoslistToIter; - Fts5Iter *apIter[1]; -}; - /* ** This function appends iterator pAppend to Fts5TokenDataIter pIn and ** returns the result. @@ -6759,57 +6958,6 @@ static Fts5TokenDataIter *fts5AppendTokendataIter( return pRet; } -/* -** Delete an Fts5TokenDataIter structure and its contents. -*/ -static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ - if( pSet ){ - int ii; - for(ii=0; iinIter; ii++){ - fts5MultiIterFree(pSet->apIter[ii]); - } - fts5BufferFree(&pSet->terms); - sqlite3_free(pSet->aPoslistReader); - sqlite3_free(pSet->aMap); - sqlite3_free(pSet); - } -} - -/* -** Append a mapping to the token-map belonging to object pT. -*/ -static void fts5TokendataIterAppendMap( - Fts5Index *p, - Fts5TokenDataIter *pT, - int iIter, - int nByte, - i64 iRowid, - i64 iPos -){ - if( p->rc==SQLITE_OK ){ - if( pT->nMap==pT->nMapAlloc ){ - int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; - int nByte = nNew * sizeof(Fts5TokenDataMap); - Fts5TokenDataMap *aNew; - - aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - return; - } - - pT->aMap = aNew; - pT->nMapAlloc = nNew; - } - - pT->aMap[pT->nMap].iRowid = iRowid; - pT->aMap[pT->nMap].iPos = iPos; - pT->aMap[pT->nMap].iIter = iIter; - pT->aMap[pT->nMap].nByte = nByte; - pT->nMap++; - } -} - /* ** The iterator passed as the only argument must be a tokendata=1 iterator ** (pIter->pTokenDataIter!=0). This function sets the iterator output @@ -7285,127 +7433,6 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ return (z ? &z[1] : 0); } -/* -** The two input arrays - a1[] and a2[] - are in sorted order. This function -** merges the two arrays together and writes the result to output array -** aOut[]. aOut[] is guaranteed to be large enough to hold the result. -** -** Duplicate entries are copied into the output. So the size of the output -** array is always (n1+n2) entries. -*/ -static void fts5TokendataMerge( - Fts5TokenDataMap *a1, int n1, /* Input array 1 */ - Fts5TokenDataMap *a2, int n2, /* Input array 2 */ - Fts5TokenDataMap *aOut /* Output array */ -){ - int i1 = 0; - int i2 = 0; - - assert( n1>=0 && n2>=0 ); - while( i1=n2 || (i1aMap[] array. -** -** The sorting algorithm requries a malloc(). If this fails, an error code -** is left in Fts5Index.rc before returning. -*/ -static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ - Fts5TokenDataMap *aTmp = 0; - int nByte = pT->nMap * sizeof(Fts5TokenDataMap); - - aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); - if( aTmp ){ - Fts5TokenDataMap *a1 = pT->aMap; - Fts5TokenDataMap *a2 = aTmp; - i64 nHalf; - - for(nHalf=1; nHalfnMap; nHalf=nHalf*2){ - int i1; - for(i1=0; i1nMap; i1+=(nHalf*2)){ - int n1 = MIN(nHalf, pT->nMap-i1); - int n2 = MIN(nHalf, pT->nMap-i1-n1); - fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); - } - SWAPVAL(Fts5TokenDataMap*, a1, a2); - } - - if( a1!=pT->aMap ){ - memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); - } - sqlite3_free(aTmp); - -#ifdef SQLITE_DEBUG - { - int ii; - for(ii=1; iinMap; ii++){ - Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; - Fts5TokenDataMap *p2 = &pT->aMap[ii]; - assert( p1->iRowidiRowid - || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos) - ); - } - } -#endif - } -} - -/* -** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() -** to pass data to prefixIterSetupTokendataCb(). -*/ -typedef struct TokendataSetupCtx TokendataSetupCtx; -struct TokendataSetupCtx { - Fts5TokenDataIter *pT; /* Object being populated with mappings */ - int iTermOff; /* Offset of current term in terms.p[] */ - int nTermByte; /* Size of current term in bytes */ -}; - -/* -** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This -** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each -** position in the current position-list. It doesn't matter that some of -** these may be out of order - they will be sorted later. -*/ -static void prefixIterSetupTokendataCb( - Fts5Index *p, - void *pCtx, - Fts5Iter *p1, - const u8 *pNew, - int nNew -){ - TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; - int iPosOff = 0; - i64 iPos = 0; - - if( pNew ){ - pSetup->nTermByte = nNew-1; - pSetup->iTermOff = pSetup->pT->terms.n; - fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1); - } - - while( 0==sqlite3Fts5PoslistNext64( - p1->base.pData, p1->base.nData, &iPosOff, &iPos - ) ){ - fts5TokendataIterAppendMap(p, - pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos - ); - } -} - /* ** pIter is a prefix query. This function populates pIter->pTokenDataIter ** with an Fts5TokenDataIter object containing mappings for all rows diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 5713fccdd1..a65750f8e1 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -93,6 +93,7 @@ struct Fts5Global { #define FTS5_LOCALE_HDR_SIZE ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) #define FTS5_LOCALE_HDR(pConfig) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) +#define FTS5_INSTTOKEN_SUBTYPE 73 /* ** Each auxiliary function registered with the FTS5 module is represented @@ -1417,6 +1418,7 @@ static int fts5FilterMethod( sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ int iCol; /* Column on LHS of MATCH operator */ char **pzErrmsg = pConfig->pzErrmsg; + int bPrefixInsttoken = pConfig->bPrefixInsttoken; int i; int iIdxStr = 0; Fts5Expr *pExpr = 0; @@ -1452,6 +1454,9 @@ static int fts5FilterMethod( rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset); if( rc!=SQLITE_OK ) goto filter_out; if( zText==0 ) zText = ""; + if( sqlite3_value_subtype(apVal[i])==FTS5_INSTTOKEN_SUBTYPE ){ + pConfig->bPrefixInsttoken = 1; + } iCol = 0; do{ @@ -1592,6 +1597,7 @@ static int fts5FilterMethod( filter_out: sqlite3Fts5ExprFree(pExpr); pConfig->pzErrmsg = pzErrmsg; + pConfig->bPrefixInsttoken = bPrefixInsttoken; return rc; } @@ -3651,6 +3657,19 @@ static void fts5LocaleFunc( } } +/* +** Implementation of fts5_insttoken() function. +*/ +static void fts5InsttokenFunc( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apArg /* Function arguments */ +){ + assert( nArg==1 ); + sqlite3_result_value(pCtx, apArg[0]); + sqlite3_result_subtype(pCtx, FTS5_INSTTOKEN_SUBTYPE); +} + /* ** Return true if zName is the extension on one of the shadow tables used ** by this module. @@ -3780,10 +3799,17 @@ static int fts5Init(sqlite3 *db){ if( rc==SQLITE_OK ){ rc = sqlite3_create_function( db, "fts5_locale", 2, - SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE, + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE|SQLITE_SUBTYPE, p, fts5LocaleFunc, 0, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + db, "fts5_insttoken", 1, + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE, + p, fts5InsttokenFunc, 0, 0 + ); + } } /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 9741f786e8..be77cbfca5 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -22,34 +22,40 @@ ifcapable !fts5 { } foreach_detail_mode $testprefix { +foreach {tn insttoken} { + 1 0 + 2 1 +} { +reset_db sqlite3_fts5_register_origintext db -do_execsql_test 1.0 { +do_execsql_test $tn.1.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); } -do_execsql_test 1.1 { +do_execsql_test $tn.1.1 { INSERT INTO ft VALUES('Hello world'); } -do_execsql_test 1.2 { +do_execsql_test $tn.1.2 { INSERT INTO ft(ft) VALUES('integrity-check'); } proc b {x} { string map [list "\0" "."] $x } db func b b -do_execsql_test 1.3 { +do_execsql_test $tn.1.3 { select b(term) from vocab; } { hello.Hello world } -do_execsql_test 1.4 { +do_execsql_test $tn.1.4 { SELECT rowid FROM ft('Hello'); } {1} @@ -88,33 +94,34 @@ proc document {} { db func document document sqlite3_fts5_register_origintext db -do_execsql_test 2.0 { +do_execsql_test $tn.2.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft(ft, rank) VALUES('pgsz', 128); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); } -do_test 2.1 { +do_test $tn.2.1 { for {set ii 0} {$ii < 500} {incr ii} { execsql { INSERT INTO ft VALUES( document() ) } } } {} -do_execsql_test 2.2 { +do_execsql_test $tn.2.2 { INSERT INTO ft(ft) VALUES('integrity-check'); } -do_execsql_test 2.3 { +do_execsql_test $tn.2.3 { INSERT INTO ft(ft, rank) VALUES('merge', 16); } -do_execsql_test 2.4 { +do_execsql_test $tn.2.4 { INSERT INTO ft(ft) VALUES('integrity-check'); } -do_execsql_test 2.5 { +do_execsql_test $tn.2.5 { INSERT INTO ft(ft) VALUES('optimize'); } @@ -122,10 +129,11 @@ do_execsql_test 2.5 { reset_db sqlite3_fts5_register_origintext db -do_execsql_test 3.0 { +do_execsql_test $tn.3.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); INSERT INTO ft(rowid, x) VALUES(1, 'hello'); @@ -137,16 +145,17 @@ do_execsql_test 3.0 { #db func b b #execsql_pp { SELECT b(term) FROM vocab } -do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1 -do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2 -do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 +do_execsql_test $tn.3.1.1 { SELECT rowid FROM ft('hello') } 1 +do_execsql_test $tn.3.1.2 { SELECT rowid FROM ft('Hello') } 2 +do_execsql_test $tn.3.1.3 { SELECT rowid FROM ft('HELLO') } 3 -do_execsql_test 3.2 { +do_execsql_test $tn.3.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% ); + INSERT INTO ft2(ft2, rank) VALUES('insttoken', $insttoken); CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance); INSERT INTO ft2(rowid, x) VALUES(1, 'hello'); @@ -160,16 +169,16 @@ do_execsql_test 3.2 { #db func b b #execsql_pp { SELECT b(term) FROM vocab } -do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3} -do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} -do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} +do_execsql_test $tn.3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3} +do_execsql_test $tn.3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} +do_execsql_test $tn.3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} -do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} +do_execsql_test $tn.3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} -do_execsql_test 3.3.5.1 { SELECT rowid FROM ft2('HELLO') ORDER BY rowid DESC} { +do_execsql_test $tn.3.3.5.1 { SELECT rowid FROM ft2('HELLO') ORDER BY rowid DESC} { 3 2 1 } -do_execsql_test 3.3.5.2 { SELECT rowid FROM ft2('HELLO') ORDER BY +rowid DESC} { +do_execsql_test $tn.3.3.5.2 { SELECT rowid FROM ft2('HELLO') ORDER BY +rowid DESC} { 3 2 1 } @@ -183,36 +192,37 @@ proc querytoken {cmd iPhrase iToken} { } sqlite3_fts5_create_function db querytoken querytoken -do_execsql_test 4.0 { +do_execsql_test $tn.4.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft VALUES('one two three four'); } -do_execsql_test 4.1 { +do_execsql_test $tn.4.1 { SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO') } {1 two.TwO} -do_execsql_test 4.2 { +do_execsql_test $tn.4.2 { SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE') } {1 one} -do_execsql_test 4.3 { +do_execsql_test $tn.4.3 { SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE') } {1 two.TWO} if {"%DETAIL%"=="full"} { # Phrase queries are only supported for detail=full. # - do_execsql_test 4.4 { + do_execsql_test $tn.4.4 { SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"') } {1 three.ThreE} - do_catchsql_test 4.5 { + do_catchsql_test $tn.4.5 { SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} - do_catchsql_test 4.6 { + do_catchsql_test $tn.4.6 { SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} - do_catchsql_test 4.7 { + do_catchsql_test $tn.4.7 { SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} } @@ -228,14 +238,15 @@ proc insttoken {cmd iIdx iToken} { sqlite3_fts5_create_function db insttoken insttoken fts5_aux_test_functions db -do_execsql_test 5.0 { +do_execsql_test $tn.5.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft VALUES('one ONE One oNe oNE one'); } -do_execsql_test 5.1 { +do_execsql_test $tn.5.1 { SELECT insttoken(ft, 0, 0), insttoken(ft, 1, 0), insttoken(ft, 2, 0), @@ -247,13 +258,37 @@ do_execsql_test 5.1 { one one.ONE one.One one.oNe one.oNE one } -do_execsql_test 5.2 { +do_execsql_test $tn.5.2 { + SELECT insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0), + insttoken(ft, 3, 0), + insttoken(ft, 4, 0), + insttoken(ft, 5, 0) + FROM ft('on*'); +} { + one one.ONE one.One one.oNe one.oNE one +} + +do_execsql_test $tn.5.3 { + SELECT insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0), + insttoken(ft, 3, 0), + insttoken(ft, 4, 0), + insttoken(ft, 5, 0) + FROM ft(fts5_insttoken('on*')); +} { + one one.ONE one.One one.oNe one.oNE one +} + +do_execsql_test $tn.5.4 { SELECT insttoken(ft, 1, 0) FROM ft('one'); } { one.ONE } -do_execsql_test 5.3 { +do_execsql_test $tn.5.5 { SELECT fts5_test_poslist(ft) FROM ft('one'); } { {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5} @@ -267,10 +302,11 @@ do_execsql_test 5.3 { # reset_db sqlite3_fts5_register_origintext db -do_execsql_test 6.0 { +do_execsql_test $tn.6.0 { CREATE VIRTUAL TABLE ft USING fts5( x, y, tokenize='origintext unicode61', detail=%DETAIL%, tokendata=0 ); + INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken); INSERT INTO ft VALUES('One Two', 'Three two'); INSERT INTO ft VALUES('three Three', 'one One'); @@ -286,34 +322,35 @@ proc tokens {cmd} { } sqlite3_fts5_create_function db tokens tokens -do_execsql_test 6.1 { +do_execsql_test $tn.6.1 { SELECT rowid, tokens(ft) FROM ft('One'); } {1 one.One 2 one.One} -do_execsql_test 6.2 { +do_execsql_test $tn.6.2 { SELECT rowid, tokens(ft) FROM ft('on*'); } {1 one.One 2 {one one.One}} -do_execsql_test 6.3 { +do_execsql_test $tn.6.3 { SELECT rowid, tokens(ft) FROM ft('Three*'); } {1 three.Three 2 three.Three} fts5_aux_test_functions db -do_catchsql_test 6.4 { +do_catchsql_test $tn.6.4 { SELECT fts5_test_insttoken(ft, -1, 0) FROM ft('one'); } {1 SQLITE_RANGE} -do_catchsql_test 6.5 { +do_catchsql_test $tn.6.5 { SELECT fts5_test_insttoken(ft, 1, 0) FROM ft('one'); } {1 SQLITE_RANGE} -do_catchsql_test 6.6 { +do_catchsql_test $tn.6.6 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokendata=2); } {1 {malformed tokendata=... directive}} -do_catchsql_test 6.7 { +do_catchsql_test $tn.6.7 { CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', tokendata=11); } {1 {malformed tokendata=... directive}} +} } finish_test diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test index a4bca0de9b..351ab1f617 100644 --- a/ext/fts5/test/fts5origintext3.test +++ b/ext/fts5/test/fts5origintext3.test @@ -22,6 +22,11 @@ ifcapable !fts5 { } foreach_detail_mode $testprefix { + foreach {tn insttoken} { + 1 0 + 2 1 + } { + reset_db sqlite3_fts5_register_origintext db @@ -32,21 +37,25 @@ foreach_detail_mode $testprefix { } sqlite3_fts5_create_function db insttoken insttoken - do_execsql_test 1.0 { + do_execsql_test $tn.1.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% ); } + + do_execsql_test $tn.1.0.1 { + INSERT INTO ft(ft, rank) VALUES('insttoken', 1); + } - do_execsql_test 1.1 { + do_execsql_test $tn.1.1 { INSERT INTO ft VALUES('Hello world HELLO WORLD hello'); } - do_execsql_test 1.2 { + do_execsql_test $tn.1.2 { SELECT fts5_test_poslist(ft) FROM ft('hello'); } {{0.0.0 0.0.2 0.0.4}} - do_execsql_test 1.3 { + do_execsql_test $tn.1.3 { SELECT insttoken(ft, 0, 0), insttoken(ft, 1, 0), @@ -54,7 +63,15 @@ foreach_detail_mode $testprefix { FROM ft('hello'); } {hello.Hello hello.HELLO hello} - do_execsql_test 1.4 { + do_execsql_test $tn.1.3.1 { + SELECT + insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0) + FROM ft('hel*'); + } {hello.Hello hello.HELLO hello} + + do_execsql_test $tn.1.4 { SELECT insttoken(ft, 0, 0), insttoken(ft, 1, 0), @@ -62,7 +79,7 @@ foreach_detail_mode $testprefix { FROM ft('hello') ORDER BY rank; } {hello.Hello hello.HELLO hello} - do_execsql_test 1.5 { + do_execsql_test $tn.1.5 { CREATE VIRTUAL TABLE ft2 USING fts5( x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% ); @@ -71,11 +88,11 @@ foreach_detail_mode $testprefix { INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE'); } - do_execsql_test 1.6 { + do_execsql_test $tn.1.6 { SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank; } {three.THREE 3 three 1 three 2} - do_execsql_test 1.7 { + do_execsql_test $tn.1.7 { INSERT INTO ft2(rowid, x) VALUES(10, 'aaa bbb BBB'); INSERT INTO ft2(rowid, x) VALUES(12, 'bbb bbb bbb'); INSERT INTO ft2(rowid, x) VALUES(13, 'bbb bbb bbb'); @@ -92,16 +109,16 @@ foreach_detail_mode $testprefix { INSERT INTO ft2(rowid, x) VALUES(24, 'aaa bbb BBB'); } - do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24} - do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24} + do_execsql_test $tn.1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24} + do_execsql_test $tn.1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24} - do_execsql_test 2.0 { + do_execsql_test $tn.2.0 { CREATE VIRTUAL TABLE ft3 USING fts5( x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%, prefix=2 ); } - do_execsql_test 2.1 { + do_execsql_test $tn.2.1 { INSERT INTO ft3(rowid, x) VALUES(1, 'one'); INSERT INTO ft3(rowid, x) VALUES(2, 'ONE'); INSERT INTO ft3(rowid, x) VALUES(3, 'ONT'); @@ -109,15 +126,15 @@ foreach_detail_mode $testprefix { INSERT INTO ft3(rowid, x) VALUES(5, 'On'); } - do_execsql_test 2.2 { + do_execsql_test $tn.2.2 { SELECT rowid FROM ft3('on*'); } {1 2 3 4 5} - do_execsql_test 2.3 { + do_execsql_test $tn.2.3 { SELECT rowid, insttoken(ft3, 0, 0) FROM ft3('on*'); } {1 one 2 one.ONE 3 ont.ONT 4 on 5 on.On} - + } } finish_test diff --git a/manifest b/manifest index 2bd0c9cf8d..603ced6491 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\sinto\sthis\sbranch. -D 2024-11-01T19:41:22.452 +C Allow\san\sfts5\stable\sor\squery\sto\sbe\sconfigured\sto\scollect\sxInstToken\sdata\sfor\sany\sprefix\sterms\sas\spart\sof\sthe\sfirst\sparse\sof\sthe\smain\sindex,\sif\sany. +D 2024-11-02T19:10:50.264 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md c5b4009dca54d127d2d6033c22fd9cc34f53bedb6ef12c7cbaa468381c74ab28 @@ -49,7 +49,7 @@ F autosetup/cc.tcl 7e2fe943ae9d45cf39e9f5b05b6230df8e719415edea5af06c30eb68680bd F autosetup/default.auto 5cdf016de2140e50f1db190a02039dc42fb390af1dda4cc4853e3042a9ef0e82 F autosetup/jimsh0.c 27ea5f221359ef6c58780fc6c185aadbf8d3bee9a021331a3e5de0eba0dc6de6 F autosetup/pkg-config.tcl 4e635bf39022ff65e0d5434339dd41503ea48fc53822c9c5bde88b02d3d952ba -F autosetup/proj.tcl 68362ca12e1a32fe73ece32b59a8e8e02a3983295f2dd82c9c4700507bade02c w autosetup/hwaci-common.tcl +F autosetup/proj.tcl 68362ca12e1a32fe73ece32b59a8e8e02a3983295f2dd82c9c4700507bade02c F autosetup/system.tcl 3a39d6e0b3bfba526fd39afe07c1d0d325e5a31925013a1ba7c671e1128e31bb F autosetup/tmake.auto eaebc74ad538dfdd3c817c27eefc31930c20510c4f3a3704071f6cb0629ed71f F autosetup/tmake.tcl a275793ec1b6f8708179af0acef1f6f10d46c2920739743f7a8720c6d700c7a9 @@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7e F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl 009cf59c77afa86d137b0cca3e3b1a5efbe2264faa2df233f9a7aa8563926d15 F ext/fts5/fts5.h 6b4b92df890965567360db5f1ead24fd13a72cb23b95e4ed2ff58d1d89f7aa42 -F ext/fts5/fts5Int.h a282f33a260ddce09dc2b0334d41d83aab0893b2b1656eb83c595a3d0eec2975 +F ext/fts5/fts5Int.h 6abff7dd770dc5969c994c281e6e77fc277ce414d56cc4a62c145cc7036b0b67 F ext/fts5/fts5_aux.c 65a0468dd177d6093aa9ae1622e6d86b0136b8d267c62c0ad6493ad1e9a3d759 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 -F ext/fts5/fts5_config.c a6633d88596758941c625b526075b85d3d9fd1089d8d9eab5db6e8a71fd347ad +F ext/fts5/fts5_config.c e7d8dd062b44a66cd77e5a0f74f23a2354cd1f3f8575afb967b2773c3384f7f8 F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c ee650a838fc0591776f7582de578f414959a76cc0118851e4c1f7d13e7365379 -F ext/fts5/fts5_main.c 50eb059e51d730e8e0c77df4e568b018079e112a755c094488b0d5b1aa06afbb +F ext/fts5/fts5_index.c 2cef40d6fdd761229dd4127e0b1ddcb61dfd6a4ac7e73653b7fddbe0075e50be +F ext/fts5/fts5_main.c b2ec6bf97fc378906c0e78c61f10ca8e64f15e03237f2521f7d81736983be378 F ext/fts5/fts5_storage.c 337b05e4c66fc822d031e264d65bde807ec2fab08665ca2cc8aaf9c5fa06792c F ext/fts5/fts5_tcl.c 5b16a249962809b2aaaab964bf58838ea72f30b8b12373cafe612f8cc71e2a40 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -214,9 +214,9 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 264b9101721c17d06d1d174feb743fda3ddc89fad41dee980fef821428258e47 F ext/fts5/test/fts5optimize2.test 795d4ae5f66a7239cf8d5aef4c2ea96aeb8bcd907bd9be0cfe22064fc71a44ed F ext/fts5/test/fts5optimize3.test 1653029284e10e0715246819893ba30565c4ead0d0fc470adae92c353ea857d3 -F ext/fts5/test/fts5origintext.test 63d5b0dc00f0104add8960da0705a70bffd4d86b6feb6ddbb38bff21141d42f0 +F ext/fts5/test/fts5origintext.test 3b73aa036ce5244bb7c5782c5441b979585bdca026accf75d16026a2a8119c09 F ext/fts5/test/fts5origintext2.test f4505ff79bf7369f2b8b10b9cef7476049d844e20b37f29cad3a8b8d5ac6f9ba -F ext/fts5/test/fts5origintext3.test 1f5174a9f4cf42f58f833dbfb314940793ca4723854ec2651e7530ddb35a66a6 +F ext/fts5/test/fts5origintext3.test 4988b6375acc3bbb0515667765f57e389caf449814af9c1095c053f7de2b4223 F ext/fts5/test/fts5origintext4.test 0d3ef0a8038f471dbc83001c34fe5f7ae39b571bfc209670771eb28bc0fc50e8 F ext/fts5/test/fts5origintext5.test ee12b440ec335e5b422d1668aca0051b52ff28b6ee67073e8bbc29f509fd562b F ext/fts5/test/fts5phrase.test bb2554bb61d15f859678c96dc89a7de415cd5fc3b7b54c29b82a0d0ad138091c @@ -2199,8 +2199,8 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bce9a524de6dda87daa90395cd55713d2d3ccfc090e53a947548e434db5eef5e 2dcc465a7090811ddbc210673f37b4c3a4439c501874abefd403efe1e98f5b17 -R 86617b1841d68e4c10c087c197cdd65c +P 790c56d493c66a2136e24d349d169639809d70bfab6996975a403be568a267a5 +R 71ba4975c4c76073cda6dd2f314d94d1 U dan -Z c01ad82d76d71ffe9e5032c32219c470 +Z 95da39a03d7bb4b9bc58c6dbf7b809e5 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 05f1de1961..94e1307210 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -790c56d493c66a2136e24d349d169639809d70bfab6996975a403be568a267a5 +46929ae92b26f02bc70de9931b21a8a7cf9a2453d5fb07f68b712f62e28e9152