From: dan Date: Thu, 23 Jun 2011 17:09:51 +0000 (+0000) Subject: Fix some of the code issues (missing comments etc.) in the new FTS code. X-Git-Tag: version-3.7.8~38^2~35^2~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=17fb042578f7c2333be771ac6a5f1a4e103283df;p=thirdparty%2Fsqlite.git Fix some of the code issues (missing comments etc.) in the new FTS code. FossilOrigin-Name: 8230d83120e0f4d217fde56e22c6f05aa5adee09 --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 67a0ea2096..40a0297add 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -312,6 +312,11 @@ SQLITE_EXTENSION_INIT1 #endif +static int fts3EvalNext(Fts3Cursor *pCsr); +static int fts3EvalStart(Fts3Cursor *pCsr); +static int fts3TermSegReaderCursor( + Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); + /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. @@ -820,9 +825,23 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ return zRet; } +/* +** This function interprets the string at (*pp) as a non-negative integer +** value. It reads the integer and sets *pnOut to the value read, then +** sets *pp to point to the byte immediately following the last byte of +** the integer value. +** +** Only decimal digits ('0'..'9') may be part of an integer value. +** +** If *pp does not being with a decimal digit SQLITE_ERROR is returned and +** the output value undefined. Otherwise SQLITE_OK is returned. +** +** This function is used when parsing the "prefix=" FTS4 parameter. +*/ static int fts3GobbleInt(const char **pp, int *pnOut){ - const char *p = *pp; - int nInt = 0; + const char *p = *pp; /* Iterator pointer */ + int nInt = 0; /* Output value */ + for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ nInt = nInt * 10 + (p[0] - '0'); } @@ -832,15 +851,30 @@ static int fts3GobbleInt(const char **pp, int *pnOut){ return SQLITE_OK; } - +/* +** This function is called to allocate an array of Fts3Index structures +** representing the indexes maintained by the current FTS table. FTS tables +** always maintain the main "terms" index, but may also maintain one or +** more "prefix" indexes, depending on the value of the "prefix=" parameter +** (if any) specified as part of the CREATE VIRTUAL TABLE statement. +** +** Argument zParam is passed the value of the "prefix=" option if one was +** specified, or NULL otherwise. +** +** If no error occurs, SQLITE_OK is returned and *apIndex set to point to +** the allocated array. *pnIndex is set to the number of elements in the +** array. If an error does occur, an SQLite error code is returned. +** +** Regardless of whether or not an error is returned, it is the responsibility +** of the caller to call sqlite3_free() on the output array to free it. +*/ static int fts3PrefixParameter( const char *zParam, /* ABC in prefix=ABC parameter to parse */ int *pnIndex, /* OUT: size of *apIndex[] array */ - struct Fts3Index **apIndex, /* OUT: Array of indexes for this table */ - struct Fts3Index **apFree /* OUT: Free this with sqlite3_free() */ + struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ ){ - struct Fts3Index *aIndex; - int nIndex = 1; + struct Fts3Index *aIndex; /* Allocated array */ + int nIndex = 1; /* Number of entries in array */ if( zParam && zParam[0] ){ const char *p; @@ -851,7 +885,7 @@ static int fts3PrefixParameter( } aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); - *apIndex = *apFree = aIndex; + *apIndex = aIndex; *pnIndex = nIndex; if( !aIndex ){ return SQLITE_NOMEM; @@ -908,8 +942,7 @@ static int fts3InitVtab( sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ int nIndex; /* Size of aIndex[] array */ - struct Fts3Index *aIndex; /* Array of indexes for this table */ - struct Fts3Index *aFree = 0; /* Free this before returning */ + struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ /* The results of parsing supported FTS4 key=value options: */ int bNoDocsize = 0; /* True to omit %_docsize table */ @@ -1046,7 +1079,7 @@ static int fts3InitVtab( } assert( pTokenizer ); - rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex, &aFree); + rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex); if( rc==SQLITE_ERROR ){ assert( zPrefix ); *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix); @@ -1133,7 +1166,7 @@ static int fts3InitVtab( fts3_init_out: sqlite3_free(zPrefix); - sqlite3_free(aFree); + sqlite3_free(aIndex); sqlite3_free(zCompress); sqlite3_free(zUncompress); sqlite3_free((void *)aCol); @@ -1724,8 +1757,6 @@ static void fts3PoslistMerge( } /* -** nToken==1 searches for adjacent positions. -** ** This function is used to merge two position lists into one. When it is ** called, *pp1 and *pp2 must both point to position lists. A position-list is ** the part of a doclist that follows each document id. For example, if a row @@ -1745,6 +1776,8 @@ static void fts3PoslistMerge( ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. ** when the *pp1 token appears before the *pp2 token, but not more than nToken ** slots before it. +** +** e.g. nToken==1 searches for adjacent positions. */ static int fts3PoslistPhraseMerge( char **pp, /* IN/OUT: Preallocated output buffer */ @@ -1911,22 +1944,34 @@ static int fts3PoslistNearMerge( } /* -** A pointer to an instance of this structure is used as the context -** argument to sqlite3Fts3SegReaderIterate() +** An instance of this function is used to merge together the (potentially +** large number of) doclists for each term that matches a prefix query. +** See function fts3TermSelectMerge() for details. */ typedef struct TermSelect TermSelect; struct TermSelect { - int isReqPos; - char *aaOutput[16]; /* Malloc'd output buffer */ - int anOutput[16]; /* Size of output in bytes */ + char *aaOutput[16]; /* Malloc'd output buffers */ + int anOutput[16]; /* Size each output buffer in bytes */ }; - +/* +** This function is used to read a single varint from a buffer. Parameter +** pEnd points 1 byte past the end of the buffer. When this function is +** called, if *pp points to pEnd or greater, then the end of the buffer +** has been reached. In this case *pp is set to 0 and the function returns. +** +** If *pp does not point to or past pEnd, then a single varint is read +** from *pp. *pp is then set to point 1 byte past the end of the read varint. +** +** If bDescIdx is false, the value read is added to *pVal before returning. +** If it is true, the value read is subtracted from *pVal before this +** function returns. +*/ static void fts3GetDeltaVarint3( - char **pp, - char *pEnd, - int bDescIdx, - sqlite3_int64 *pVal + char **pp, /* IN/OUT: Point to read varint from */ + char *pEnd, /* End of buffer */ + int bDescIdx, /* True if docids are descending */ + sqlite3_int64 *pVal /* IN/OUT: Integer value */ ){ if( *pp>=pEnd ){ *pp = 0; @@ -1941,6 +1986,21 @@ static void fts3GetDeltaVarint3( } } +/* +** This function is used to write a single varint to a buffer. The varint +** is written to *pp. Before returning, *pp is set to point 1 byte past the +** end of the value written. +** +** If *pbFirst is zero when this function is called, the value written to +** the buffer is that of parameter iVal. +** +** If *pbFirst is non-zero when this function is called, then the value +** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal) +** (if bDescIdx is non-zero). +** +** Before returning, this function always sets *pbFirst to 1 and *piPrev +** to the value of parameter iVal. +*/ static void fts3PutDeltaVarint3( char **pp, /* IN/OUT: Output pointer */ int bDescIdx, /* True for descending docids */ @@ -1961,10 +2021,34 @@ static void fts3PutDeltaVarint3( *pbFirst = 1; } -#define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2)) +/* +** This macro is used by various functions that merge doclists. The two +** arguments are 64-bit docid values. If the value of the stack variable +** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). +** Otherwise, (i2-i1). +** +** Using this makes it easier to write code that can merge doclists that are +** sorted in either ascending or descending order. +*/ +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) + +/* +** This function does an "OR" merge of two doclists (output contains all +** positions contained in either argument doclist). If the docids in the +** input doclists are sorted in ascending order, parameter bDescDoclist +** should be false. If they are sorted in ascending order, it should be +** passed a non-zero value. +** +** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer +** containing the output doclist and SQLITE_OK is returned. In this case +** *pnOut is set to the number of bytes in the output doclist. +** +** If an error occurs, an SQLite error code is returned. The output values +** are undefined in this case. +*/ static int fts3DoclistOrMerge( - int bDescIdx, /* True if arguments are desc */ + int bDescDoclist, /* True if arguments are desc */ char *a1, int n1, /* First doclist */ char *a2, int n2, /* Second doclist */ char **paOut, int *pnOut /* OUT: Malloc'd doclist */ @@ -1989,21 +2073,21 @@ static int fts3DoclistOrMerge( fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); while( p1 || p2 ){ - sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2); + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); if( p2 && p1 && iDiff==0 ){ - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); fts3PoslistMerge(&p, &p1, &p2); - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); }else if( !p2 || (p1 && iDiff<0) ){ - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); fts3PoslistCopy(&p, &p1); - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); }else{ - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i2); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); fts3PoslistCopy(&p, &p2); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } } @@ -2012,8 +2096,20 @@ static int fts3DoclistOrMerge( return SQLITE_OK; } +/* +** This function does a "phrase" merge of two doclists. In a phrase merge, +** the output contains a copy of each position from the right-hand input +** doclist for which there is a position in the left-hand input doclist +** exactly nDist tokens before it. +** +** If the docids in the input doclists are sorted in ascending order, +** parameter bDescDoclist should be false. If they are sorted in ascending +** order, it should be passed a non-zero value. +** +** The right-hand input doclist is overwritten by this function. +*/ static void fts3DoclistPhraseMerge( - int bDescIdx, /* True if arguments are desc */ + int bDescDoclist, /* True if arguments are desc */ int nDist, /* Distance from left to right (1=adjacent) */ char *aLeft, int nLeft, /* Left doclist */ char *aRight, int *pnRight /* IN/OUT: Right/output doclist */ @@ -2036,26 +2132,26 @@ static void fts3DoclistPhraseMerge( fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); while( p1 && p2 ){ - sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2); + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); if( iDiff==0 ){ char *pSave = p; sqlite3_int64 iPrevSave = iPrev; int bFirstOutSave = bFirstOut; - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ p = pSave; iPrev = iPrevSave; bFirstOut = bFirstOutSave; } - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); }else if( iDiff<0 ){ fts3PoslistCopy(0, &p1); - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); }else{ fts3PoslistCopy(0, &p2); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } } @@ -2072,7 +2168,7 @@ static void fts3DoclistPhraseMerge( ** the responsibility of the caller to free any doclists left in the ** TermSelect.aaOutput[] array. */ -static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){ +static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){ char *aOut = 0; int nOut = 0; int i; @@ -2113,24 +2209,25 @@ static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){ } /* -** This function is used as the sqlite3Fts3SegReaderIterate() callback when -** querying the full-text index for a doclist associated with a term or -** term-prefix. +** Merge the doclist aDoclist/nDoclist into the TermSelect object passed +** as the first argument. The merge is an "OR" merge (see function +** fts3DoclistOrMerge() for details). +** +** This function is called with the doclist for each term that matches +** a queried prefix. It merges all these doclists into one, the doclist +** for the specified prefix. Since there can be a very large number of +** doclists to merge, the merging is done pair-wise using the TermSelect +** object. +** +** This function returns SQLITE_OK if the merge is successful, or an +** SQLite error code (SQLITE_NOMEM) if an error occurs. */ -static int fts3TermSelectCb( - Fts3Table *p, /* Virtual table object */ - void *pContext, /* Pointer to TermSelect structure */ - char *zTerm, - int nTerm, - char *aDoclist, - int nDoclist +static int fts3TermSelectMerge( + Fts3Table *p, /* FTS table handle */ + TermSelect *pTS, /* TermSelect object to merge into */ + char *aDoclist, /* Pointer to doclist */ + int nDoclist /* Size of aDoclist in bytes */ ){ - TermSelect *pTS = (TermSelect *)pContext; - - UNUSED_PARAMETER(p); - UNUSED_PARAMETER(zTerm); - UNUSED_PARAMETER(nTerm); - if( pTS->aaOutput[0]==0 ){ /* If this is the first term selected, copy the doclist to the output ** buffer using memcpy(). */ @@ -2201,6 +2298,13 @@ static int fts3SegReaderCursorAppend( return SQLITE_OK; } +/* +** Add seg-reader objects to the Fts3MultiSegReader object passed as the +** 8th argument. +** +** This function returns SQLITE_OK if successful, or an SQLite error code +** otherwise. +*/ static int fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ @@ -2209,11 +2313,11 @@ static int fts3SegReaderCursor( int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ - Fts3MultiSegReader *pCsr /* Cursor object to populate */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ - int rc = SQLITE_OK; - int rc2; - sqlite3_stmt *pStmt = 0; + int rc = SQLITE_OK; /* Error code */ + sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ + int rc2; /* Result of sqlite3_reset() */ /* If iLevel is less than 0 and this is not a scan, include a seg-reader ** for the pending-terms. If this is a scan, then this call must be being @@ -2302,24 +2406,42 @@ int sqlite3Fts3SegReaderCursor( ); } +/* +** In addition to its current configuration, have the Fts3MultiSegReader +** passed as the 4th argument also scan the doclist for term zTerm/nTerm. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ static int fts3SegReaderCursorAddZero( - Fts3Table *p, - const char *zTerm, - int nTerm, - Fts3MultiSegReader *pCsr + Fts3Table *p, /* FTS virtual table handle */ + const char *zTerm, /* Term to scan doclist of */ + int nTerm, /* Number of bytes in zTerm */ + Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */ ){ return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr); } - -int sqlite3Fts3TermSegReaderCursor( +/* +** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or, +** if isPrefix is true, to scan the doclist for all terms for which +** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write +** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return +** an SQLite error code. +** +** It is the responsibility of the caller to free this object by eventually +** passing it to fts3SegReaderCursorFree() +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +** Output parameter *ppSegcsr is set to 0 if an error occurs. +*/ +static int fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ){ - Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ + Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); @@ -2363,6 +2485,9 @@ int sqlite3Fts3TermSegReaderCursor( return rc; } +/* +** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). +*/ static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ sqlite3Fts3SegReaderFinish(pSegcsr); sqlite3_free(pSegcsr); @@ -2370,35 +2495,25 @@ static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ /* ** This function retreives the doclist for the specified term (or term -** prefix) from the database. -** -** The returned doclist may be in one of two formats, depending on the -** value of parameter isReqPos. If isReqPos is zero, then the doclist is -** a sorted list of delta-compressed docids (a bare doclist). If isReqPos -** is non-zero, then the returned list is in the same format as is stored -** in the database without the found length specifier at the start of on-disk -** doclists. +** prefix) from the database. */ static int fts3TermSelect( Fts3Table *p, /* Virtual table handle */ Fts3PhraseToken *pTok, /* Token to query for */ int iColumn, /* Column to query (or -ve for all columns) */ - int isReqPos, /* True to include position lists in output */ int *pnOut, /* OUT: Size of buffer at *ppOut */ char **ppOut /* OUT: Malloced result buffer */ ){ int rc; /* Return code */ - Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ - TermSelect tsc; /* Context object for fts3TermSelectCb() */ + Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ + TermSelect tsc; /* Object for pair-wise doclist merging */ Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); - tsc.isReqPos = isReqPos; - filter.flags = FTS3_SEGMENT_IGNORE_EMPTY + filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) - | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0) | (iColumnnColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; @@ -2408,13 +2523,11 @@ static int fts3TermSelect( while( SQLITE_OK==rc && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) ){ - rc = fts3TermSelectCb(p, (void *)&tsc, - pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist - ); + rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); } if( rc==SQLITE_OK ){ - rc = fts3TermSelectMerge(p, &tsc); + rc = fts3TermSelectFinishMerge(p, &tsc); } if( rc==SQLITE_OK ){ *ppOut = tsc.aaOutput[0]; @@ -2487,7 +2600,7 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ rc = SQLITE_OK; } }else{ - rc = sqlite3Fts3EvalNext((Fts3Cursor *)pCursor); + rc = fts3EvalNext((Fts3Cursor *)pCursor); } assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; @@ -2564,7 +2677,7 @@ static int fts3FilterMethod( rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; - rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1); + rc = fts3EvalStart(pCsr); sqlite3Fts3SegmentsClose(p); if( rc!=SQLITE_OK ) return rc; @@ -2971,6 +3084,11 @@ static int fts3RenameMethod( return rc; } +/* +** The xSavepoint() method. +** +** Flush the contents of the pending-terms table to disk. +*/ static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ UNUSED_PARAMETER(iSavepoint); assert( ((Fts3Table *)pVtab)->inTransaction ); @@ -2978,6 +3096,12 @@ static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); return fts3SyncMethod(pVtab); } + +/* +** The xRelease() method. +** +** This is a no-op. +*/ static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); UNUSED_PARAMETER(iSavepoint); @@ -2987,6 +3111,12 @@ static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ TESTONLY( p->mxSavepoint = iSavepoint-1 ); return SQLITE_OK; } + +/* +** The xRollbackTo() method. +** +** Discard the contents of the pending terms table. +*/ static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts3Table *p = (Fts3Table*)pVtab; UNUSED_PARAMETER(iSavepoint); @@ -3136,18 +3266,6 @@ int sqlite3Fts3Init(sqlite3 *db){ return rc; } -#if !SQLITE_CORE -int sqlite3_extension_init( - sqlite3 *db, - char **pzErrMsg, - const sqlite3_api_routines *pApi -){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3Fts3Init(db); -} -#endif - - /* ** Allocate an Fts3MultiSegReader for each token in the expression headed ** by pExpr. @@ -3164,11 +3282,11 @@ int sqlite3_extension_init( ** doclist and then traversed. */ static void fts3EvalAllocateReaders( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Allocate readers for this expression */ int *pnToken, /* OUT: Total number of tokens in phrase. */ int *pnOr, /* OUT: Total number of OR nodes in expr. */ - int *pRc + int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ @@ -3177,7 +3295,7 @@ static void fts3EvalAllocateReaders( *pnToken += nToken; for(i=0; ipPhrase->aToken[i]; - int rc = sqlite3Fts3TermSegReaderCursor(pCsr, + int rc = fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ @@ -3195,12 +3313,20 @@ static void fts3EvalAllocateReaders( } } +/* +** Arguments pList/nList contain the doclist for token iToken of phrase p. +** It is merged into the main doclist stored in p->doclist.aAll/nAll. +** +** This function assumes that pList points to a buffer allocated using +** sqlite3_malloc(). This function takes responsibility for eventually +** freeing the buffer. +*/ static void fts3EvalPhraseMergeToken( - Fts3Table *pTab, - Fts3Phrase *p, - int iToken, - char *pList, - int nList + Fts3Table *pTab, /* FTS Table pointer */ + Fts3Phrase *p, /* Phrase to merge pList/nList into */ + int iToken, /* Token pList/nList corresponds to */ + char *pList, /* Pointer to doclist */ + int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); @@ -3249,9 +3375,15 @@ static void fts3EvalPhraseMergeToken( if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; } +/* +** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist +** does not take deferred tokens into account. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ static int fts3EvalPhraseLoad( - Fts3Cursor *pCsr, - Fts3Phrase *p + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p /* Phrase object */ ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int iToken; @@ -3264,7 +3396,7 @@ static int fts3EvalPhraseLoad( if( pToken->pSegcsr ){ int nThis = 0; char *pThis = 0; - rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis); + rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); if( rc==SQLITE_OK ){ fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); } @@ -3275,14 +3407,22 @@ static int fts3EvalPhraseLoad( return rc; } +/* +** This function is called on each phrase after the position lists for +** any deferred tokens have been loaded into memory. It updates the phrases +** current position list to include only those positions that are really +** instances of the phrase (after considering deferred tokens). If this +** means that the phrase does not appear in the current row, doclist.pList +** and doclist.nList are both zeroed. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ - int iToken; - int rc = SQLITE_OK; - - int nMaxUndeferred = pPhrase->iDoclistToken; - char *aPoslist = 0; - int nPoslist = 0; - int iPrev = -1; + int iToken; /* Used to iterate through phrase tokens */ + int rc = SQLITE_OK; /* Return code */ + char *aPoslist = 0; /* Position list for deferred tokens */ + int nPoslist = 0; /* Number of bytes in aPoslist */ + int iPrev = -1; /* Token number of previous deferred token */ assert( pPhrase->doclist.bFreeList==0 ); @@ -3328,6 +3468,7 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ } if( iPrev>=0 ){ + int nMaxUndeferred = pPhrase->iDoclistToken; if( nMaxUndeferred<0 ){ pPhrase->doclist.pList = aPoslist; pPhrase->doclist.nList = nPoslist; @@ -3376,9 +3517,15 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ ** expression to initialize the mechanism for returning rows. Once this ** function has been called successfully on an Fts3Phrase, it may be ** used with fts3EvalPhraseNext() to iterate through the matching docids. +** +** If parameter bOptOk is true, then the phrase may (or may not) use the +** incremental loading strategy. Otherwise, the entire doclist is loaded into +** memory within this call. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ - int rc; + int rc; /* Error code */ Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; @@ -3406,7 +3553,13 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ /* ** This function is used to iterate backwards (from the end to start) -** through doclists. +** through doclists. It is used by this module to iterate through phrase +** doclists in reverse and by the fts3_write.c module to iterate through +** pending-terms lists when writing to databases with "order=desc". +** +** The doclist may be sorted in ascending (parameter bDescIdx==0) or +** descending (parameter bDescIdx==1) order of docid. Regardless, this +** function iterates from the end of the doclist to the beginning. */ void sqlite3Fts3DoclistPrev( int bDescIdx, /* True if the doclist is desc */ @@ -3471,9 +3624,9 @@ void sqlite3Fts3DoclistPrev( ** successfully advanced, *pbEof is set to 0. */ static int fts3EvalPhraseNext( - Fts3Cursor *pCsr, - Fts3Phrase *p, - u8 *pbEof + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; @@ -3519,10 +3672,10 @@ static int fts3EvalPhraseNext( /* pIter now points just past the 0x00 that terminates the position- ** list for document pDL->iDocid. However, if this position-list was - ** edited in place by fts3EvalNearTrim2(), then pIter may not actually + ** edited in place by fts3EvalNearTrim(), then pIter may not actually ** point to the start of the next docid value. The following line deals ** with this case by advancing pIter past the zero-padding added by - ** fts3EvalNearTrim2(). */ + ** fts3EvalNearTrim(). */ while( pIterpNextDocid = pIter; @@ -3534,11 +3687,27 @@ static int fts3EvalPhraseNext( return rc; } +/* +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, fts3EvalPhraseStart() is called on all phrases within the +** expression. Also the Fts3Expr.bDeferred variable is set to true for any +** expressions for which all descendent tokens are deferred. +** +** If parameter bOptOk is zero, then it is guaranteed that the +** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for +** each phrase in the expression (subject to deferred token processing). +** Or, if bOptOk is non-zero, then one or more tokens within the expression +** may be loaded incrementally, meaning doclist.aAll/nAll is not available. +** +** If an error occurs within this function, *pRc is set to an SQLite error +** code before returning. +*/ static void fts3EvalStartReaders( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, - int bOptOk, - int *pRc + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expression to initialize phrases in */ + int bOptOk, /* True to enable incremental loading */ + int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ @@ -3557,23 +3726,42 @@ static void fts3EvalStartReaders( } } +/* +** An array of the following structures is assembled as part of the process +** of selecting tokens to defer before the query starts executing (as part +** of the xFilter() method). There is one element in the array for each +** token in the FTS expression. +** +** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong +** to phrases that are connected only by AND and NEAR operators (not OR or +** NOT). When determining tokens to defer, each AND/NEAR cluster is considered +** separately. The root of a tokens AND/NEAR cluster is stored in +** Fts3TokenAndCost.pRoot. +*/ typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3Phrase *pPhrase; /* The phrase the token belongs to */ int iToken; /* Position of token in phrase */ Fts3PhraseToken *pToken; /* The token itself */ - Fts3Expr *pRoot; - int nOvfl; + Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ + int nOvfl; /* Number of overflow pages to load doclist */ int iCol; /* The column the token must match */ }; +/* +** This function is used to populate an allocated Fts3TokenAndCost array. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if an error occurs during execution, *pRc is set to an +** SQLite error code. +*/ static void fts3EvalTokenCosts( - Fts3Cursor *pCsr, - Fts3Expr *pRoot, - Fts3Expr *pExpr, - Fts3TokenAndCost **ppTC, - Fts3Expr ***ppOr, - int *pRc + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ + Fts3Expr *pExpr, /* Expression to consider */ + Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ + Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ + int *pRc /* IN/OUT: Error code */ ){ if( *pRc==SQLITE_OK && pExpr ){ if( pExpr->eType==FTSQUERY_PHRASE ){ @@ -3605,19 +3793,30 @@ static void fts3EvalTokenCosts( } } +/* +** Determine the average document (row) size in pages. If successful, +** write this value to *pnPage and return SQLITE_OK. Otherwise, return +** an SQLite error code. +** +** The average document size in pages is calculated by first calculating +** determining the average size in bytes, B. If B is less than the amount +** of data that will fit on a single leaf page of an intkey table in +** this database, then the average docsize is 1. Otherwise, it is 1 plus +** the number of overflow pages consumed by a record B bytes in size. +*/ static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ if( pCsr->nRowAvg==0 ){ /* The average document size, which is required to calculate the cost - ** of each doclist, has not yet been determined. Read the required - ** data from the %_stat table to calculate it. - ** - ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 - ** varints, where nCol is the number of columns in the FTS3 table. - ** The first varint is the number of documents currently stored in - ** the table. The following nCol varints contain the total amount of - ** data stored in all rows of each column of the table, from left - ** to right. - */ + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ int rc; Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; sqlite3_stmt *pStmt; @@ -3652,45 +3851,69 @@ static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ return SQLITE_OK; } +/* +** This function is called to select the tokens (if any) that will be +** deferred. The array aTC[] has already been populated when this is +** called. +** +** This function is called once for each AND/NEAR cluster in the +** expression. Each invocation determines which tokens to defer within +** the cluster with root node pRoot. See comments above the definition +** of struct Fts3TokenAndCost for more details. +** +** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() +** called on each token to defer. Otherwise, an SQLite error code is +** returned. +*/ static int fts3EvalSelectDeferred( - Fts3Cursor *pCsr, - Fts3Expr *pRoot, - Fts3TokenAndCost *aTC, - int nTC + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Consider tokens with this root node */ + Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ + int nTC /* Number of entries in aTC[] */ ){ - int nDocSize = 0; - int nDocEst = 0; - int rc = SQLITE_OK; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int ii; - - int nOvfl = 0; - int nTerm = 0; + int nDocSize = 0; /* Number of pages per doc loaded */ + int nDocEst = 0; /* Est. docs if all other tokens deferred */ + int rc = SQLITE_OK; /* Return code */ + int ii; /* Iterator variable for various purposes */ + int nOvfl = 0; /* Total overflow pages used by doclists */ + int nToken = 0; /* Total number of tokens in cluster */ + /* Count the tokens in this AND/NEAR cluster. If none of the doclists + ** associated with the tokens spill onto overflow pages, or if there is + ** only 1 token, exit early. No tokens to defer in this case. */ for(ii=0; ii0 ); - for(ii=0; iinOvfl) + /* Set pTC to point to the cheapest remaining token. */ + for(iTC=0; iTCnOvfl) ){ - pTC = &aTC[jj]; + pTC = &aTC[iTC]; } } assert( pTC ); - /* At this point pTC points to the cheapest remaining token. */ + /* Determine if token pTC should be deferred. If not, update nDocEst. + ** + ** TODO: If there are performance regressions involving deferred tokens, + ** this (the logic that selects the tokens to be deferred) is probably + ** the bit that needs to change. + */ if( ii==0 ){ if( pTC->nOvfl ){ nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; @@ -3698,9 +3921,8 @@ static int fts3EvalSelectDeferred( Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; char *pList = 0; - rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); + rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); assert( rc==SQLITE_OK || pList==0 ); - if( rc==SQLITE_OK ){ nDocEst = fts3DoclistCountDocids(1, pList, nList); fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList); @@ -3721,36 +3943,29 @@ static int fts3EvalSelectDeferred( return rc; } -int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ +/* +** This function is called from within the xFilter method. It initializes +** the full-text query currently stored in pCsr->pExpr. To iterate through +** the results of a query, the caller does: +** +** fts3EvalStart(pCsr); +** while( 1 ){ +** fts3EvalNext(pCsr); +** if( pCsr->bEof ) break; +** ... return row pCsr->iPrevId to the caller ... +** } +*/ +static int fts3EvalStart(Fts3Cursor *pCsr){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; int nToken = 0; int nOr = 0; /* Allocate a MultiSegReader for each token in the expression. */ - fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc); - - /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO: - ** This call will eventually also be responsible for determining which - ** tokens are 'deferred' until the document text is loaded into memory. - ** - ** Each token in each phrase is dealt with using one of the following - ** three strategies: - ** - ** 1. Entire doclist loaded into memory as part of the - ** fts3EvalStartReaders() call. - ** - ** 2. Doclist loaded into memory incrementally, as part of each - ** sqlite3Fts3EvalNext() call. - ** - ** 3. Token doclist is never loaded. Instead, documents are loaded into - ** memory and scanned for the token as part of the sqlite3Fts3EvalNext() - ** call. This is known as a "deferred" token. - */ + fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); - /* If bOptOk is true, check if there are any tokens that should be deferred. - */ - if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){ + /* Determine which, if any, tokens in the expression should be deferred. */ + if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){ Fts3TokenAndCost *aTC; Fts3Expr **apOr; aTC = (Fts3TokenAndCost *)sqlite3_malloc( @@ -3766,7 +3981,7 @@ int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ Fts3TokenAndCost *pTC = aTC; Fts3Expr **ppOr = apOr; - fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc); + fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); nToken = pTC-aTC; nOr = ppOr-apOr; @@ -3781,11 +3996,14 @@ int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ } } - fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); + fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc); return rc; } -static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){ +/* +** Invalidate the current position list for phrase pPhrase. +*/ +static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ if( pPhrase->doclist.bFreeList ){ sqlite3_free(pPhrase->doclist.pList); } @@ -3794,8 +4012,30 @@ static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){ pPhrase->doclist.bFreeList = 0; } -static int fts3EvalNearTrim2( - int nNear, +/* +** This function is called to edit the position list associated with +** the phrase object passed as the fifth argument according to a NEAR +** condition. For example: +** +** abc NEAR/5 "def ghi" +** +** Parameter nNear is passed the NEAR distance of the expression (5 in +** the example above). When this function is called, *paPoslist points to +** the position list, and *pnToken is the number of phrase tokens in, the +** phrase on the other side of the NEAR operator to pPhrase. For example, +** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to +** the position list associated with phrase "abc". +** +** All positions in the pPhrase position list that are not sufficiently +** close to a position in the *paPoslist position list are removed. If this +** leaves 0 positions, zero is returned. Otherwise, non-zero. +** +** Before returning, *paPoslist is set to point to the position lsit +** associated with pPhrase. And *pnToken is set to the number of tokens in +** pPhrase. +*/ +static int fts3EvalNearTrim( + int nNear, /* NEAR distance. As in "NEAR/nNear". */ char *aTmp, /* Temporary space to use */ char **paPoslist, /* IN/OUT: Position list */ int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ @@ -3827,89 +4067,54 @@ static int fts3EvalNearTrim2( return res; } -static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ - int res = 1; - - /* The following block runs if pExpr is the root of a NEAR query. - ** For example, the query: - ** - ** "w" NEAR "x" NEAR "y" NEAR "z" - ** - ** which is represented in tree form as: - ** - ** | - ** +--NEAR--+ <-- root of NEAR query - ** | | - ** +--NEAR--+ "z" - ** | | - ** +--NEAR--+ "y" - ** | | - ** "w" "x" - ** - ** The right-hand child of a NEAR node is always a phrase. The - ** left-hand child may be either a phrase or a NEAR node. There are - ** no exceptions to this. - */ - if( *pRc==SQLITE_OK - && pExpr->eType==FTSQUERY_NEAR - && pExpr->bEof==0 - && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) - ){ - Fts3Expr *p; - int nTmp = 0; /* Bytes of temp space */ - char *aTmp; /* Temp space for PoslistNearMerge() */ - - /* Allocate temporary working space. */ - for(p=pExpr; p->pLeft; p=p->pLeft){ - nTmp += p->pRight->pPhrase->doclist.nList; - } - nTmp += p->pPhrase->doclist.nList; - aTmp = sqlite3_malloc(nTmp*2); - if( !aTmp ){ - *pRc = SQLITE_NOMEM; - res = 0; - }else{ - char *aPoslist = p->pPhrase->doclist.pList; - int nToken = p->pPhrase->nToken; - - for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ - Fts3Phrase *pPhrase = p->pRight->pPhrase; - int nNear = p->nNear; - res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); - } - - aPoslist = pExpr->pRight->pPhrase->doclist.pList; - nToken = pExpr->pRight->pPhrase->nToken; - for(p=pExpr->pLeft; p && res; p=p->pLeft){ - int nNear = p->pParent->nNear; - Fts3Phrase *pPhrase = ( - p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase - ); - res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); - } - } - - sqlite3_free(aTmp); - } - - return res; -} - /* -** This macro is used by the fts3EvalNext() function. The two arguments are -** 64-bit docid values. If the current query is "ORDER BY docid ASC", then -** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then -** it returns (i2 - i1). This allows the same code to be used for merging -** doclists in ascending or descending order. +** This function is a no-op if *pRc is other than SQLITE_OK when it is called. +** Otherwise, it advances the expression passed as the second argument to +** point to the next matching row in the database. Expressions iterate through +** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, +** or descending if it is non-zero. +** +** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if +** successful, the following variables in pExpr are set: +** +** Fts3Expr.bEof (non-zero if EOF - there is no next row) +** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) +** +** If the expression is of type FTSQUERY_PHRASE, and the expression is not +** at EOF, then the following variables are populated with the position list +** for the phrase for the visited row: +** +** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) +** FTs3Expr.pPhrase->doclist.pList (pointer to position list) +** +** It says above that this function advances the expression to the next +** matching row. This is usually true, but there are the following exceptions: +** +** 1. Deferred tokens are not taken into account. If a phrase consists +** entirely of deferred tokens, it is assumed to match every row in +** the db. In this case the position-list is not populated at all. +** +** Or, if a phrase contains one or more deferred tokens and one or +** more non-deferred tokens, then the expression is advanced to the +** next possible match, considering only non-deferred tokens. In other +** words, if the phrase is "A B C", and "B" is deferred, the expression +** is advanced to the next row that contains an instance of "A * C", +** where "*" may match any single token. The position list in this case +** is populated as for "A * C" before returning. +** +** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is +** advanced to point to the next row that matches "x AND y". +** +** See fts3EvalTestDeferredAndNear() for details on testing if a row is +** really a match, taking into account deferred tokens and NEAR operators. */ -#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2)) - -static void fts3EvalNext( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, - int *pRc +static void fts3EvalNextRow( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expr. to advance to next matching row */ + int *pRc /* IN/OUT: Error code */ ){ if( *pRc==SQLITE_OK ){ + int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ assert( pExpr->bEof==0 ); pExpr->bStart = 1; @@ -3919,28 +4124,32 @@ static void fts3EvalNext( Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; assert( !pLeft->bDeferred || !pRight->bDeferred ); + if( pLeft->bDeferred ){ - fts3EvalNext(pCsr, pRight, pRc); + /* LHS is entirely deferred. So we assume it matches every row. + ** Advance the RHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pRight, pRc); pExpr->iDocid = pRight->iDocid; pExpr->bEof = pRight->bEof; }else if( pRight->bDeferred ){ - fts3EvalNext(pCsr, pLeft, pRc); + /* RHS is entirely deferred. So we assume it matches every row. + ** Advance the LHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pLeft, pRc); pExpr->iDocid = pLeft->iDocid; pExpr->bEof = pLeft->bEof; }else{ - fts3EvalNext(pCsr, pLeft, pRc); - fts3EvalNext(pCsr, pRight, pRc); - + /* Neither the RHS or LHS are deferred. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); if( iDiff==0 ) break; if( iDiff<0 ){ - fts3EvalNext(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pLeft, pRc); }else{ - fts3EvalNext(pCsr, pRight, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); } } - pExpr->iDocid = pLeft->iDocid; pExpr->bEof = (pLeft->bEof || pRight->bEof); } @@ -3956,12 +4165,12 @@ static void fts3EvalNext( assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ - fts3EvalNext(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pLeft, pRc); }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ - fts3EvalNext(pCsr, pRight, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); }else{ - fts3EvalNext(pCsr, pLeft, pRc); - fts3EvalNext(pCsr, pRight, pRc); + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); } pExpr->bEof = (pLeft->bEof && pRight->bEof); @@ -3980,17 +4189,17 @@ static void fts3EvalNext( Fts3Expr *pRight = pExpr->pRight; if( pRight->bStart==0 ){ - fts3EvalNext(pCsr, pRight, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); assert( *pRc!=SQLITE_OK || pRight->bStart ); } - fts3EvalNext(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pLeft, pRc); if( pLeft->bEof==0 ){ while( !*pRc && !pRight->bEof && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 ){ - fts3EvalNext(pCsr, pRight, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); } } pExpr->iDocid = pLeft->iDocid; @@ -4000,7 +4209,7 @@ static void fts3EvalNext( default: { Fts3Phrase *pPhrase = pExpr->pPhrase; - fts3EvalZeroPoslist(pPhrase); + fts3EvalInvalidatePoslist(pPhrase); *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); pExpr->iDocid = pPhrase->doclist.iDocid; break; @@ -4009,15 +4218,113 @@ static void fts3EvalNext( } } -static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ - int bHit = 1; +/* +** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR +** cluster, then this function returns 1 immediately. +** +** Otherwise, it checks if the current row really does match the NEAR +** expression, using the data currently stored in the position lists +** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. +** +** If the current row is a match, the position list associated with each +** phrase in the NEAR expression is edited in place to contain only those +** phrase instances sufficiently close to their peers to satisfy all NEAR +** constraints. In this case it returns 1. If the NEAR expression does not +** match the current row, 0 is returned. The position lists may or may not +** be edited if 0 is returned. +*/ +static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ + int res = 1; + + /* The following block runs if pExpr is the root of a NEAR query. + ** For example, the query: + ** + ** "w" NEAR "x" NEAR "y" NEAR "z" + ** + ** which is represented in tree form as: + ** + ** | + ** +--NEAR--+ <-- root of NEAR query + ** | | + ** +--NEAR--+ "z" + ** | | + ** +--NEAR--+ "y" + ** | | + ** "w" "x" + ** + ** The right-hand child of a NEAR node is always a phrase. The + ** left-hand child may be either a phrase or a NEAR node. There are + ** no exceptions to this - it's the way the parser in fts3_expr.c works. + */ + if( *pRc==SQLITE_OK + && pExpr->eType==FTSQUERY_NEAR + && pExpr->bEof==0 + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + int nTmp = 0; /* Bytes of temp space */ + char *aTmp; /* Temp space for PoslistNearMerge() */ + + /* Allocate temporary working space. */ + for(p=pExpr; p->pLeft; p=p->pLeft){ + nTmp += p->pRight->pPhrase->doclist.nList; + } + nTmp += p->pPhrase->doclist.nList; + aTmp = sqlite3_malloc(nTmp*2); + if( !aTmp ){ + *pRc = SQLITE_NOMEM; + res = 0; + }else{ + char *aPoslist = p->pPhrase->doclist.pList; + int nToken = p->pPhrase->nToken; + + for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ + Fts3Phrase *pPhrase = p->pRight->pPhrase; + int nNear = p->nNear; + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + + aPoslist = pExpr->pRight->pPhrase->doclist.pList; + nToken = pExpr->pRight->pPhrase->nToken; + for(p=pExpr->pLeft; p && res; p=p->pLeft){ + int nNear = p->pParent->nNear; + Fts3Phrase *pPhrase = ( + p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase + ); + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + } + + sqlite3_free(aTmp); + } + + return res; +} + +/* +** This function is a helper function for fts3EvalTestDeferredAndNear(). +** Assuming no error occurs or has occurred, It returns non-zero if the +** expression passed as the second argument matches the row that pCsr +** currently points to, or zero if it does not. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** If an error occurs during execution of this function, *pRc is set to +** the appropriate SQLite error code. In this case the returned value is +** undefined. +*/ +static int fts3EvalTestExpr( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ + int *pRc /* IN/OUT: Error code */ +){ + int bHit = 1; /* Return value */ if( *pRc==SQLITE_OK ){ switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: bHit = ( - fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) - && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) && fts3EvalNearTest(pExpr, pRc) ); @@ -4043,27 +4350,27 @@ static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ Fts3Expr *p; for(p=pExpr; p->pPhrase==0; p=p->pLeft){ if( p->pRight->iDocid==pCsr->iPrevId ){ - fts3EvalZeroPoslist(p->pRight->pPhrase); + fts3EvalInvalidatePoslist(p->pRight->pPhrase); } } if( p->iDocid==pCsr->iPrevId ){ - fts3EvalZeroPoslist(p->pPhrase); + fts3EvalInvalidatePoslist(p->pPhrase); } } break; case FTSQUERY_OR: { - int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc); - int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc); + int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); + int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); bHit = bHit1 || bHit2; break; } case FTSQUERY_NOT: bHit = ( - fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) - && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) ); break; @@ -4074,7 +4381,7 @@ static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ Fts3Phrase *pPhrase = pExpr->pPhrase; assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); if( pExpr->bDeferred ){ - fts3EvalZeroPoslist(pPhrase); + fts3EvalInvalidatePoslist(pPhrase); } *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); bHit = (pPhrase->doclist.pList!=0); @@ -4090,27 +4397,49 @@ static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ } /* -** Return 1 if both of the following are true: +** This function is called as the second part of each xNext operation when +** iterating through the results of a full-text query. At this point the +** cursor points to a row that matches the query expression, with the +** following caveats: +** +** * Up until this point, "NEAR" operators in the expression have been +** treated as "AND". +** +** * Deferred tokens have not yet been considered. +** +** If *pRc is not SQLITE_OK when this function is called, it immediately +** returns 0. Otherwise, it tests whether or not after considering NEAR +** operators and deferred tokens the current row is still a match for the +** expression. It returns 1 if both of the following are true: ** ** 1. *pRc is SQLITE_OK when this function returns, and ** ** 2. After scanning the current FTS table row for the deferred tokens, -** it is determined that the row does not match the query. +** it is determined that the row does *not* match the query. ** ** Or, if no error occurs and it seems the current row does match the FTS ** query, return 0. */ -static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ +static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){ int rc = *pRc; int bMiss = 0; if( rc==SQLITE_OK ){ + + /* If there are one or more deferred tokens, load the current row into + ** memory and scan it to determine the position list for each deferred + ** token. Then, see if this row is really a match, considering deferred + ** tokens and NEAR operators (neither of which were taken into account + ** earlier, by fts3EvalNextRow()). + */ if( pCsr->pDeferred ){ rc = fts3CursorSeek(0, pCsr); if( rc==SQLITE_OK ){ rc = sqlite3Fts3CacheDeferredDoclists(pCsr); } } - bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc)); + bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); + + /* Free the position-lists accumulated for each deferred token above. */ sqlite3Fts3FreeDeferredDoclists(pCsr); *pRc = rc; } @@ -4121,7 +4450,7 @@ static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ ** Advance to the next document that matches the FTS expression in ** Fts3Cursor.pExpr. */ -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){ +static int fts3EvalNext(Fts3Cursor *pCsr){ int rc = SQLITE_OK; /* Return Code */ Fts3Expr *pExpr = pCsr->pExpr; assert( pCsr->isEof==0 ); @@ -4133,19 +4462,19 @@ int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){ sqlite3_reset(pCsr->pStmt); } assert( sqlite3_data_count(pCsr->pStmt)==0 ); - fts3EvalNext(pCsr, pExpr, &rc); + fts3EvalNextRow(pCsr, pExpr, &rc); pCsr->isEof = pExpr->bEof; pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pExpr->iDocid; - }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) ); + }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); } return rc; } /* ** Restart interation for expression pExpr so that the next call to -** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental +** fts3EvalNext() visits the first row. Do not allow incremental ** loading or merging of phrase doclists for this iteration. ** ** If *pRc is other than SQLITE_OK when this function is called, it is @@ -4161,7 +4490,7 @@ static void fts3EvalRestart( Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ - fts3EvalZeroPoslist(pPhrase); + fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ assert( pPhrase->nToken==1 ); assert( pPhrase->aToken[0].pSegcsr ); @@ -4277,14 +4606,14 @@ static int fts3EvalGatherStats( assert( sqlite3_data_count(pCsr->pStmt)==0 ); /* Advance to the next document */ - fts3EvalNext(pCsr, pRoot, &rc); + fts3EvalNextRow(pCsr, pRoot, &rc); pCsr->isEof = pRoot->bEof; pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pRoot->iDocid; }while( pCsr->isEof==0 && pRoot->eType==FTSQUERY_NEAR - && fts3EvalLoadDeferred(pCsr, &rc) + && fts3EvalTestDeferredAndNear(pCsr, &rc) ); if( rc==SQLITE_OK && pCsr->isEof==0 ){ @@ -4306,10 +4635,10 @@ static int fts3EvalGatherStats( */ fts3EvalRestart(pCsr, pRoot, &rc); do { - fts3EvalNext(pCsr, pRoot, &rc); + fts3EvalNextRow(pCsr, pRoot, &rc); assert( pRoot->bEof==0 ); }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); - fts3EvalLoadDeferred(pCsr, &rc); + fts3EvalTestDeferredAndNear(pCsr, &rc); } } return rc; @@ -4440,7 +4769,7 @@ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ if( pPhrase ){ int i; sqlite3_free(pPhrase->doclist.aAll); - fts3EvalZeroPoslist(pPhrase); + fts3EvalInvalidatePoslist(pPhrase); memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); for(i=0; inToken; i++){ fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); @@ -4449,4 +4778,18 @@ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ } } +#if !SQLITE_CORE +/* +** Initialize API pointer table, if required. +*/ +int sqlite3_extension_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3Fts3Init(db); +} +#endif + #endif diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 1664379609..694108b5c1 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -500,9 +500,6 @@ int sqlite3Fts3TermSegReaderCursor( void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); -int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int); -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr); - int sqlite3Fts3MsrIncrStart( Fts3Table*, Fts3MultiSegReader*, int, const char*, int); int sqlite3Fts3MsrIncrNext( diff --git a/manifest b/manifest index 674d6dedb7..5f55a7ba07 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\stest\sfor\sticket\s[91e2e8ba6f].\sNo\schanges\sto\scode. -D 2011-06-23T16:40:26.224 +C Fix\ssome\sof\sthe\scode\sissues\s(missing\scomments\setc.)\sin\sthe\snew\sFTS\scode. +D 2011-06-23T17:09:51.936 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -62,9 +62,9 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c ca776037493d0081da70a6afc0df80f5ce347cb7 +F ext/fts3/fts3.c c25723c7e96763fc88652a8716564e4b22c8a327 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h fa493ccbad78a2c99ad1c984f651c0c202e68536 +F ext/fts3/fts3Int.h 7b452eeb592134c7adf87720c9e56d6060d1ed5a F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691 F ext/fts3/fts3_expr.c 23791de01b3a5d313d76e02befd2601d4096bc2b F ext/fts3/fts3_hash.c aad95afa01cf2a5ffaa448e4b0ab043880cd1efb @@ -949,7 +949,10 @@ F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00 F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 347d974d143cf132f953b565fbc03026f19fcb4d -P 0b3174e0b1364ccc31853dce02bce5f7d3d431db -R f6237300c8e94b60ef9cb6b4fc0d6ad4 +P c271f7e88fc081a460dd3f4afb24aa9fb7fa2917 +R b9f14b373682048157818c9e1116d00f +T *branch * fts3-changes +T *sym-fts3-changes * +T -sym-trunk * U dan -Z 66a01206844bd880ddebccebd0a5e317 +Z a7e048a2a0bdbbc293d8f131a4bc43fc diff --git a/manifest.uuid b/manifest.uuid index bffb816668..f18b078c6a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c271f7e88fc081a460dd3f4afb24aa9fb7fa2917 \ No newline at end of file +8230d83120e0f4d217fde56e22c6f05aa5adee09 \ No newline at end of file