SQLITE_EXTENSION_INIT1
#endif
+static int fts3EvalNext(Fts3Cursor *pCsr);
+static int fts3EvalStart(Fts3Cursor *pCsr);
+static int fts3TermSegReaderCursor(
+ Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
+
/*
** Write a 64-bit variable-length integer to memory starting at p[0].
** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
return zRet;
}
+/*
+** This function interprets the string at (*pp) as a non-negative integer
+** value. It reads the integer and sets *pnOut to the value read, then
+** sets *pp to point to the byte immediately following the last byte of
+** the integer value.
+**
+** Only decimal digits ('0'..'9') may be part of an integer value.
+**
+** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
+** the output value undefined. Otherwise SQLITE_OK is returned.
+**
+** This function is used when parsing the "prefix=" FTS4 parameter.
+*/
static int fts3GobbleInt(const char **pp, int *pnOut){
- const char *p = *pp;
- int nInt = 0;
+ const char *p = *pp; /* Iterator pointer */
+ int nInt = 0; /* Output value */
+
for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
nInt = nInt * 10 + (p[0] - '0');
}
return SQLITE_OK;
}
-
+/*
+** This function is called to allocate an array of Fts3Index structures
+** representing the indexes maintained by the current FTS table. FTS tables
+** always maintain the main "terms" index, but may also maintain one or
+** more "prefix" indexes, depending on the value of the "prefix=" parameter
+** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
+**
+** Argument zParam is passed the value of the "prefix=" option if one was
+** specified, or NULL otherwise.
+**
+** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
+** the allocated array. *pnIndex is set to the number of elements in the
+** array. If an error does occur, an SQLite error code is returned.
+**
+** Regardless of whether or not an error is returned, it is the responsibility
+** of the caller to call sqlite3_free() on the output array to free it.
+*/
static int fts3PrefixParameter(
const char *zParam, /* ABC in prefix=ABC parameter to parse */
int *pnIndex, /* OUT: size of *apIndex[] array */
- struct Fts3Index **apIndex, /* OUT: Array of indexes for this table */
- struct Fts3Index **apFree /* OUT: Free this with sqlite3_free() */
+ struct Fts3Index **apIndex /* OUT: Array of indexes for this table */
){
- struct Fts3Index *aIndex;
- int nIndex = 1;
+ struct Fts3Index *aIndex; /* Allocated array */
+ int nIndex = 1; /* Number of entries in array */
if( zParam && zParam[0] ){
const char *p;
}
aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
- *apIndex = *apFree = aIndex;
+ *apIndex = aIndex;
*pnIndex = nIndex;
if( !aIndex ){
return SQLITE_NOMEM;
sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
int nIndex; /* Size of aIndex[] array */
- struct Fts3Index *aIndex; /* Array of indexes for this table */
- struct Fts3Index *aFree = 0; /* Free this before returning */
+ struct Fts3Index *aIndex = 0; /* Array of indexes for this table */
/* The results of parsing supported FTS4 key=value options: */
int bNoDocsize = 0; /* True to omit %_docsize table */
}
assert( pTokenizer );
- rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex, &aFree);
+ rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
if( rc==SQLITE_ERROR ){
assert( zPrefix );
*pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
fts3_init_out:
sqlite3_free(zPrefix);
- sqlite3_free(aFree);
+ sqlite3_free(aIndex);
sqlite3_free(zCompress);
sqlite3_free(zUncompress);
sqlite3_free((void *)aCol);
}
/*
-** nToken==1 searches for adjacent positions.
-**
** This function is used to merge two position lists into one. When it is
** called, *pp1 and *pp2 must both point to position lists. A position-list is
** the part of a doclist that follows each document id. For example, if a row
** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
** when the *pp1 token appears before the *pp2 token, but not more than nToken
** slots before it.
+**
+** e.g. nToken==1 searches for adjacent positions.
*/
static int fts3PoslistPhraseMerge(
char **pp, /* IN/OUT: Preallocated output buffer */
}
/*
-** A pointer to an instance of this structure is used as the context
-** argument to sqlite3Fts3SegReaderIterate()
+** An instance of this function is used to merge together the (potentially
+** large number of) doclists for each term that matches a prefix query.
+** See function fts3TermSelectMerge() for details.
*/
typedef struct TermSelect TermSelect;
struct TermSelect {
- int isReqPos;
- char *aaOutput[16]; /* Malloc'd output buffer */
- int anOutput[16]; /* Size of output in bytes */
+ char *aaOutput[16]; /* Malloc'd output buffers */
+ int anOutput[16]; /* Size each output buffer in bytes */
};
-
+/*
+** This function is used to read a single varint from a buffer. Parameter
+** pEnd points 1 byte past the end of the buffer. When this function is
+** called, if *pp points to pEnd or greater, then the end of the buffer
+** has been reached. In this case *pp is set to 0 and the function returns.
+**
+** If *pp does not point to or past pEnd, then a single varint is read
+** from *pp. *pp is then set to point 1 byte past the end of the read varint.
+**
+** If bDescIdx is false, the value read is added to *pVal before returning.
+** If it is true, the value read is subtracted from *pVal before this
+** function returns.
+*/
static void fts3GetDeltaVarint3(
- char **pp,
- char *pEnd,
- int bDescIdx,
- sqlite3_int64 *pVal
+ char **pp, /* IN/OUT: Point to read varint from */
+ char *pEnd, /* End of buffer */
+ int bDescIdx, /* True if docids are descending */
+ sqlite3_int64 *pVal /* IN/OUT: Integer value */
){
if( *pp>=pEnd ){
*pp = 0;
}
}
+/*
+** This function is used to write a single varint to a buffer. The varint
+** is written to *pp. Before returning, *pp is set to point 1 byte past the
+** end of the value written.
+**
+** If *pbFirst is zero when this function is called, the value written to
+** the buffer is that of parameter iVal.
+**
+** If *pbFirst is non-zero when this function is called, then the value
+** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
+** (if bDescIdx is non-zero).
+**
+** Before returning, this function always sets *pbFirst to 1 and *piPrev
+** to the value of parameter iVal.
+*/
static void fts3PutDeltaVarint3(
char **pp, /* IN/OUT: Output pointer */
int bDescIdx, /* True for descending docids */
*pbFirst = 1;
}
-#define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2))
+/*
+** This macro is used by various functions that merge doclists. The two
+** arguments are 64-bit docid values. If the value of the stack variable
+** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2).
+** Otherwise, (i2-i1).
+**
+** Using this makes it easier to write code that can merge doclists that are
+** sorted in either ascending or descending order.
+*/
+#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
+
+/*
+** This function does an "OR" merge of two doclists (output contains all
+** positions contained in either argument doclist). If the docids in the
+** input doclists are sorted in ascending order, parameter bDescDoclist
+** should be false. If they are sorted in ascending order, it should be
+** passed a non-zero value.
+**
+** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
+** containing the output doclist and SQLITE_OK is returned. In this case
+** *pnOut is set to the number of bytes in the output doclist.
+**
+** If an error occurs, an SQLite error code is returned. The output values
+** are undefined in this case.
+*/
static int fts3DoclistOrMerge(
- int bDescIdx, /* True if arguments are desc */
+ int bDescDoclist, /* True if arguments are desc */
char *a1, int n1, /* First doclist */
char *a2, int n2, /* Second doclist */
char **paOut, int *pnOut /* OUT: Malloc'd doclist */
fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
while( p1 || p2 ){
- sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
+ sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
if( p2 && p1 && iDiff==0 ){
- fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
+ fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
fts3PoslistMerge(&p, &p1, &p2);
- fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+ fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+ fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}else if( !p2 || (p1 && iDiff<0) ){
- fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
+ fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
fts3PoslistCopy(&p, &p1);
- fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
+ fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
}else{
- fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i2);
+ fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
fts3PoslistCopy(&p, &p2);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+ fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}
}
return SQLITE_OK;
}
+/*
+** This function does a "phrase" merge of two doclists. In a phrase merge,
+** the output contains a copy of each position from the right-hand input
+** doclist for which there is a position in the left-hand input doclist
+** exactly nDist tokens before it.
+**
+** If the docids in the input doclists are sorted in ascending order,
+** parameter bDescDoclist should be false. If they are sorted in ascending
+** order, it should be passed a non-zero value.
+**
+** The right-hand input doclist is overwritten by this function.
+*/
static void fts3DoclistPhraseMerge(
- int bDescIdx, /* True if arguments are desc */
+ int bDescDoclist, /* True if arguments are desc */
int nDist, /* Distance from left to right (1=adjacent) */
char *aLeft, int nLeft, /* Left doclist */
char *aRight, int *pnRight /* IN/OUT: Right/output doclist */
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
while( p1 && p2 ){
- sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
+ sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
if( iDiff==0 ){
char *pSave = p;
sqlite3_int64 iPrevSave = iPrev;
int bFirstOutSave = bFirstOut;
- fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
+ fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
p = pSave;
iPrev = iPrevSave;
bFirstOut = bFirstOutSave;
}
- fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+ fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+ fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}else if( iDiff<0 ){
fts3PoslistCopy(0, &p1);
- fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
+ fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
}else{
fts3PoslistCopy(0, &p2);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+ fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}
}
** the responsibility of the caller to free any doclists left in the
** TermSelect.aaOutput[] array.
*/
-static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){
+static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
char *aOut = 0;
int nOut = 0;
int i;
}
/*
-** This function is used as the sqlite3Fts3SegReaderIterate() callback when
-** querying the full-text index for a doclist associated with a term or
-** term-prefix.
+** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
+** as the first argument. The merge is an "OR" merge (see function
+** fts3DoclistOrMerge() for details).
+**
+** This function is called with the doclist for each term that matches
+** a queried prefix. It merges all these doclists into one, the doclist
+** for the specified prefix. Since there can be a very large number of
+** doclists to merge, the merging is done pair-wise using the TermSelect
+** object.
+**
+** This function returns SQLITE_OK if the merge is successful, or an
+** SQLite error code (SQLITE_NOMEM) if an error occurs.
*/
-static int fts3TermSelectCb(
- Fts3Table *p, /* Virtual table object */
- void *pContext, /* Pointer to TermSelect structure */
- char *zTerm,
- int nTerm,
- char *aDoclist,
- int nDoclist
+static int fts3TermSelectMerge(
+ Fts3Table *p, /* FTS table handle */
+ TermSelect *pTS, /* TermSelect object to merge into */
+ char *aDoclist, /* Pointer to doclist */
+ int nDoclist /* Size of aDoclist in bytes */
){
- TermSelect *pTS = (TermSelect *)pContext;
-
- UNUSED_PARAMETER(p);
- UNUSED_PARAMETER(zTerm);
- UNUSED_PARAMETER(nTerm);
-
if( pTS->aaOutput[0]==0 ){
/* If this is the first term selected, copy the doclist to the output
** buffer using memcpy(). */
return SQLITE_OK;
}
+/*
+** Add seg-reader objects to the Fts3MultiSegReader object passed as the
+** 8th argument.
+**
+** This function returns SQLITE_OK if successful, or an SQLite error code
+** otherwise.
+*/
static int fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
int isScan, /* True to scan from zTerm to EOF */
- Fts3MultiSegReader *pCsr /* Cursor object to populate */
+ Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
- int rc = SQLITE_OK;
- int rc2;
- sqlite3_stmt *pStmt = 0;
+ int rc = SQLITE_OK; /* Error code */
+ sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */
+ int rc2; /* Result of sqlite3_reset() */
/* If iLevel is less than 0 and this is not a scan, include a seg-reader
** for the pending-terms. If this is a scan, then this call must be being
);
}
+/*
+** In addition to its current configuration, have the Fts3MultiSegReader
+** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
static int fts3SegReaderCursorAddZero(
- Fts3Table *p,
- const char *zTerm,
- int nTerm,
- Fts3MultiSegReader *pCsr
+ Fts3Table *p, /* FTS virtual table handle */
+ const char *zTerm, /* Term to scan doclist of */
+ int nTerm, /* Number of bytes in zTerm */
+ Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
){
return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
}
-
-int sqlite3Fts3TermSegReaderCursor(
+/*
+** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
+** if isPrefix is true, to scan the doclist for all terms for which
+** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
+** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
+** an SQLite error code.
+**
+** It is the responsibility of the caller to free this object by eventually
+** passing it to fts3SegReaderCursorFree()
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+** Output parameter *ppSegcsr is set to 0 if an error occurs.
+*/
+static int fts3TermSegReaderCursor(
Fts3Cursor *pCsr, /* Virtual table cursor handle */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
){
- Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */
+ Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */
int rc = SQLITE_NOMEM; /* Return code */
pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
return rc;
}
+/*
+** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
+*/
static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
sqlite3Fts3SegReaderFinish(pSegcsr);
sqlite3_free(pSegcsr);
/*
** This function retreives the doclist for the specified term (or term
-** prefix) from the database.
-**
-** The returned doclist may be in one of two formats, depending on the
-** value of parameter isReqPos. If isReqPos is zero, then the doclist is
-** a sorted list of delta-compressed docids (a bare doclist). If isReqPos
-** is non-zero, then the returned list is in the same format as is stored
-** in the database without the found length specifier at the start of on-disk
-** doclists.
+** prefix) from the database.
*/
static int fts3TermSelect(
Fts3Table *p, /* Virtual table handle */
Fts3PhraseToken *pTok, /* Token to query for */
int iColumn, /* Column to query (or -ve for all columns) */
- int isReqPos, /* True to include position lists in output */
int *pnOut, /* OUT: Size of buffer at *ppOut */
char **ppOut /* OUT: Malloced result buffer */
){
int rc; /* Return code */
- Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */
- TermSelect tsc; /* Context object for fts3TermSelectCb() */
+ Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */
+ TermSelect tsc; /* Object for pair-wise doclist merging */
Fts3SegFilter filter; /* Segment term filter configuration */
pSegcsr = pTok->pSegcsr;
memset(&tsc, 0, sizeof(TermSelect));
- tsc.isReqPos = isReqPos;
- filter.flags = FTS3_SEGMENT_IGNORE_EMPTY
+ filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
| (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
- | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
| (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
filter.iCol = iColumn;
filter.zTerm = pTok->z;
while( SQLITE_OK==rc
&& SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr))
){
- rc = fts3TermSelectCb(p, (void *)&tsc,
- pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist
- );
+ rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
}
if( rc==SQLITE_OK ){
- rc = fts3TermSelectMerge(p, &tsc);
+ rc = fts3TermSelectFinishMerge(p, &tsc);
}
if( rc==SQLITE_OK ){
*ppOut = tsc.aaOutput[0];
rc = SQLITE_OK;
}
}else{
- rc = sqlite3Fts3EvalNext((Fts3Cursor *)pCursor);
+ rc = fts3EvalNext((Fts3Cursor *)pCursor);
}
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
return rc;
rc = sqlite3Fts3ReadLock(p);
if( rc!=SQLITE_OK ) return rc;
- rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1);
+ rc = fts3EvalStart(pCsr);
sqlite3Fts3SegmentsClose(p);
if( rc!=SQLITE_OK ) return rc;
return rc;
}
+/*
+** The xSavepoint() method.
+**
+** Flush the contents of the pending-terms table to disk.
+*/
static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
UNUSED_PARAMETER(iSavepoint);
assert( ((Fts3Table *)pVtab)->inTransaction );
TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
return fts3SyncMethod(pVtab);
}
+
+/*
+** The xRelease() method.
+**
+** This is a no-op.
+*/
static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
UNUSED_PARAMETER(iSavepoint);
TESTONLY( p->mxSavepoint = iSavepoint-1 );
return SQLITE_OK;
}
+
+/*
+** The xRollbackTo() method.
+**
+** Discard the contents of the pending terms table.
+*/
static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
Fts3Table *p = (Fts3Table*)pVtab;
UNUSED_PARAMETER(iSavepoint);
return rc;
}
-#if !SQLITE_CORE
-int sqlite3_extension_init(
- sqlite3 *db,
- char **pzErrMsg,
- const sqlite3_api_routines *pApi
-){
- SQLITE_EXTENSION_INIT2(pApi)
- return sqlite3Fts3Init(db);
-}
-#endif
-
-
/*
** Allocate an Fts3MultiSegReader for each token in the expression headed
** by pExpr.
** doclist and then traversed.
*/
static void fts3EvalAllocateReaders(
- Fts3Cursor *pCsr,
- Fts3Expr *pExpr,
+ Fts3Cursor *pCsr, /* FTS cursor handle */
+ Fts3Expr *pExpr, /* Allocate readers for this expression */
int *pnToken, /* OUT: Total number of tokens in phrase. */
int *pnOr, /* OUT: Total number of OR nodes in expr. */
- int *pRc
+ int *pRc /* IN/OUT: Error code */
){
if( pExpr && SQLITE_OK==*pRc ){
if( pExpr->eType==FTSQUERY_PHRASE ){
*pnToken += nToken;
for(i=0; i<nToken; i++){
Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
- int rc = sqlite3Fts3TermSegReaderCursor(pCsr,
+ int rc = fts3TermSegReaderCursor(pCsr,
pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
);
if( rc!=SQLITE_OK ){
}
}
+/*
+** Arguments pList/nList contain the doclist for token iToken of phrase p.
+** It is merged into the main doclist stored in p->doclist.aAll/nAll.
+**
+** This function assumes that pList points to a buffer allocated using
+** sqlite3_malloc(). This function takes responsibility for eventually
+** freeing the buffer.
+*/
static void fts3EvalPhraseMergeToken(
- Fts3Table *pTab,
- Fts3Phrase *p,
- int iToken,
- char *pList,
- int nList
+ Fts3Table *pTab, /* FTS Table pointer */
+ Fts3Phrase *p, /* Phrase to merge pList/nList into */
+ int iToken, /* Token pList/nList corresponds to */
+ char *pList, /* Pointer to doclist */
+ int nList /* Number of bytes in pList */
){
assert( iToken!=p->iDoclistToken );
if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
}
+/*
+** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
+** does not take deferred tokens into account.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
static int fts3EvalPhraseLoad(
- Fts3Cursor *pCsr,
- Fts3Phrase *p
+ Fts3Cursor *pCsr, /* FTS Cursor handle */
+ Fts3Phrase *p /* Phrase object */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int iToken;
if( pToken->pSegcsr ){
int nThis = 0;
char *pThis = 0;
- rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
+ rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
if( rc==SQLITE_OK ){
fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
}
return rc;
}
+/*
+** This function is called on each phrase after the position lists for
+** any deferred tokens have been loaded into memory. It updates the phrases
+** current position list to include only those positions that are really
+** instances of the phrase (after considering deferred tokens). If this
+** means that the phrase does not appear in the current row, doclist.pList
+** and doclist.nList are both zeroed.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
- int iToken;
- int rc = SQLITE_OK;
-
- int nMaxUndeferred = pPhrase->iDoclistToken;
- char *aPoslist = 0;
- int nPoslist = 0;
- int iPrev = -1;
+ int iToken; /* Used to iterate through phrase tokens */
+ int rc = SQLITE_OK; /* Return code */
+ char *aPoslist = 0; /* Position list for deferred tokens */
+ int nPoslist = 0; /* Number of bytes in aPoslist */
+ int iPrev = -1; /* Token number of previous deferred token */
assert( pPhrase->doclist.bFreeList==0 );
}
if( iPrev>=0 ){
+ int nMaxUndeferred = pPhrase->iDoclistToken;
if( nMaxUndeferred<0 ){
pPhrase->doclist.pList = aPoslist;
pPhrase->doclist.nList = nPoslist;
** expression to initialize the mechanism for returning rows. Once this
** function has been called successfully on an Fts3Phrase, it may be
** used with fts3EvalPhraseNext() to iterate through the matching docids.
+**
+** If parameter bOptOk is true, then the phrase may (or may not) use the
+** incremental loading strategy. Otherwise, the entire doclist is loaded into
+** memory within this call.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
- int rc;
+ int rc; /* Error code */
Fts3PhraseToken *pFirst = &p->aToken[0];
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
/*
** This function is used to iterate backwards (from the end to start)
-** through doclists.
+** through doclists. It is used by this module to iterate through phrase
+** doclists in reverse and by the fts3_write.c module to iterate through
+** pending-terms lists when writing to databases with "order=desc".
+**
+** The doclist may be sorted in ascending (parameter bDescIdx==0) or
+** descending (parameter bDescIdx==1) order of docid. Regardless, this
+** function iterates from the end of the doclist to the beginning.
*/
void sqlite3Fts3DoclistPrev(
int bDescIdx, /* True if the doclist is desc */
** successfully advanced, *pbEof is set to 0.
*/
static int fts3EvalPhraseNext(
- Fts3Cursor *pCsr,
- Fts3Phrase *p,
- u8 *pbEof
+ Fts3Cursor *pCsr, /* FTS Cursor handle */
+ Fts3Phrase *p, /* Phrase object to advance to next docid */
+ u8 *pbEof /* OUT: Set to 1 if EOF */
){
int rc = SQLITE_OK;
Fts3Doclist *pDL = &p->doclist;
/* pIter now points just past the 0x00 that terminates the position-
** list for document pDL->iDocid. However, if this position-list was
- ** edited in place by fts3EvalNearTrim2(), then pIter may not actually
+ ** edited in place by fts3EvalNearTrim(), then pIter may not actually
** point to the start of the next docid value. The following line deals
** with this case by advancing pIter past the zero-padding added by
- ** fts3EvalNearTrim2(). */
+ ** fts3EvalNearTrim(). */
while( pIter<pEnd && *pIter==0 ) pIter++;
pDL->pNextDocid = pIter;
return rc;
}
+/*
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
+** expression. Also the Fts3Expr.bDeferred variable is set to true for any
+** expressions for which all descendent tokens are deferred.
+**
+** If parameter bOptOk is zero, then it is guaranteed that the
+** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
+** each phrase in the expression (subject to deferred token processing).
+** Or, if bOptOk is non-zero, then one or more tokens within the expression
+** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
+**
+** If an error occurs within this function, *pRc is set to an SQLite error
+** code before returning.
+*/
static void fts3EvalStartReaders(
- Fts3Cursor *pCsr,
- Fts3Expr *pExpr,
- int bOptOk,
- int *pRc
+ Fts3Cursor *pCsr, /* FTS Cursor handle */
+ Fts3Expr *pExpr, /* Expression to initialize phrases in */
+ int bOptOk, /* True to enable incremental loading */
+ int *pRc /* IN/OUT: Error code */
){
if( pExpr && SQLITE_OK==*pRc ){
if( pExpr->eType==FTSQUERY_PHRASE ){
}
}
+/*
+** An array of the following structures is assembled as part of the process
+** of selecting tokens to defer before the query starts executing (as part
+** of the xFilter() method). There is one element in the array for each
+** token in the FTS expression.
+**
+** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
+** to phrases that are connected only by AND and NEAR operators (not OR or
+** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
+** separately. The root of a tokens AND/NEAR cluster is stored in
+** Fts3TokenAndCost.pRoot.
+*/
typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {
Fts3Phrase *pPhrase; /* The phrase the token belongs to */
int iToken; /* Position of token in phrase */
Fts3PhraseToken *pToken; /* The token itself */
- Fts3Expr *pRoot;
- int nOvfl;
+ Fts3Expr *pRoot; /* Root of NEAR/AND cluster */
+ int nOvfl; /* Number of overflow pages to load doclist */
int iCol; /* The column the token must match */
};
+/*
+** This function is used to populate an allocated Fts3TokenAndCost array.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, if an error occurs during execution, *pRc is set to an
+** SQLite error code.
+*/
static void fts3EvalTokenCosts(
- Fts3Cursor *pCsr,
- Fts3Expr *pRoot,
- Fts3Expr *pExpr,
- Fts3TokenAndCost **ppTC,
- Fts3Expr ***ppOr,
- int *pRc
+ Fts3Cursor *pCsr, /* FTS Cursor handle */
+ Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */
+ Fts3Expr *pExpr, /* Expression to consider */
+ Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */
+ Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */
+ int *pRc /* IN/OUT: Error code */
){
if( *pRc==SQLITE_OK && pExpr ){
if( pExpr->eType==FTSQUERY_PHRASE ){
}
}
+/*
+** Determine the average document (row) size in pages. If successful,
+** write this value to *pnPage and return SQLITE_OK. Otherwise, return
+** an SQLite error code.
+**
+** The average document size in pages is calculated by first calculating
+** determining the average size in bytes, B. If B is less than the amount
+** of data that will fit on a single leaf page of an intkey table in
+** this database, then the average docsize is 1. Otherwise, it is 1 plus
+** the number of overflow pages consumed by a record B bytes in size.
+*/
static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
if( pCsr->nRowAvg==0 ){
/* The average document size, which is required to calculate the cost
- ** of each doclist, has not yet been determined. Read the required
- ** data from the %_stat table to calculate it.
- **
- ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
- ** varints, where nCol is the number of columns in the FTS3 table.
- ** The first varint is the number of documents currently stored in
- ** the table. The following nCol varints contain the total amount of
- ** data stored in all rows of each column of the table, from left
- ** to right.
- */
+ ** of each doclist, has not yet been determined. Read the required
+ ** data from the %_stat table to calculate it.
+ **
+ ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
+ ** varints, where nCol is the number of columns in the FTS3 table.
+ ** The first varint is the number of documents currently stored in
+ ** the table. The following nCol varints contain the total amount of
+ ** data stored in all rows of each column of the table, from left
+ ** to right.
+ */
int rc;
Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
sqlite3_stmt *pStmt;
return SQLITE_OK;
}
+/*
+** This function is called to select the tokens (if any) that will be
+** deferred. The array aTC[] has already been populated when this is
+** called.
+**
+** This function is called once for each AND/NEAR cluster in the
+** expression. Each invocation determines which tokens to defer within
+** the cluster with root node pRoot. See comments above the definition
+** of struct Fts3TokenAndCost for more details.
+**
+** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
+** called on each token to defer. Otherwise, an SQLite error code is
+** returned.
+*/
static int fts3EvalSelectDeferred(
- Fts3Cursor *pCsr,
- Fts3Expr *pRoot,
- Fts3TokenAndCost *aTC,
- int nTC
+ Fts3Cursor *pCsr, /* FTS Cursor handle */
+ Fts3Expr *pRoot, /* Consider tokens with this root node */
+ Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */
+ int nTC /* Number of entries in aTC[] */
){
- int nDocSize = 0;
- int nDocEst = 0;
- int rc = SQLITE_OK;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int ii;
-
- int nOvfl = 0;
- int nTerm = 0;
+ int nDocSize = 0; /* Number of pages per doc loaded */
+ int nDocEst = 0; /* Est. docs if all other tokens deferred */
+ int rc = SQLITE_OK; /* Return code */
+ int ii; /* Iterator variable for various purposes */
+ int nOvfl = 0; /* Total overflow pages used by doclists */
+ int nToken = 0; /* Total number of tokens in cluster */
+ /* Count the tokens in this AND/NEAR cluster. If none of the doclists
+ ** associated with the tokens spill onto overflow pages, or if there is
+ ** only 1 token, exit early. No tokens to defer in this case. */
for(ii=0; ii<nTC; ii++){
if( aTC[ii].pRoot==pRoot ){
nOvfl += aTC[ii].nOvfl;
- nTerm++;
+ nToken++;
}
}
- if( nOvfl==0 || nTerm<2 ) return SQLITE_OK;
+ if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
+ /* Obtain the average docsize (in pages). */
rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
+ assert( rc!=SQLITE_OK || nDocSize>0 );
- for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){
- int jj;
- Fts3TokenAndCost *pTC = 0;
+ for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
+ int iTC; /* Used to iterate through aTC[] array. */
+ Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */
- for(jj=0; jj<nTC; jj++){
- if( aTC[jj].pToken && aTC[jj].pRoot==pRoot
- && (!pTC || aTC[jj].nOvfl<pTC->nOvfl)
+ /* Set pTC to point to the cheapest remaining token. */
+ for(iTC=0; iTC<nTC; iTC++){
+ if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot
+ && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl)
){
- pTC = &aTC[jj];
+ pTC = &aTC[iTC];
}
}
assert( pTC );
- /* At this point pTC points to the cheapest remaining token. */
+ /* Determine if token pTC should be deferred. If not, update nDocEst.
+ **
+ ** TODO: If there are performance regressions involving deferred tokens,
+ ** this (the logic that selects the tokens to be deferred) is probably
+ ** the bit that needs to change.
+ */
if( ii==0 ){
if( pTC->nOvfl ){
nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
Fts3PhraseToken *pToken = pTC->pToken;
int nList = 0;
char *pList = 0;
- rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
+ rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
assert( rc==SQLITE_OK || pList==0 );
-
if( rc==SQLITE_OK ){
nDocEst = fts3DoclistCountDocids(1, pList, nList);
fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
return rc;
}
-int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
+/*
+** This function is called from within the xFilter method. It initializes
+** the full-text query currently stored in pCsr->pExpr. To iterate through
+** the results of a query, the caller does:
+**
+** fts3EvalStart(pCsr);
+** while( 1 ){
+** fts3EvalNext(pCsr);
+** if( pCsr->bEof ) break;
+** ... return row pCsr->iPrevId to the caller ...
+** }
+*/
+static int fts3EvalStart(Fts3Cursor *pCsr){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc = SQLITE_OK;
int nToken = 0;
int nOr = 0;
/* Allocate a MultiSegReader for each token in the expression. */
- fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc);
-
- /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO:
- ** This call will eventually also be responsible for determining which
- ** tokens are 'deferred' until the document text is loaded into memory.
- **
- ** Each token in each phrase is dealt with using one of the following
- ** three strategies:
- **
- ** 1. Entire doclist loaded into memory as part of the
- ** fts3EvalStartReaders() call.
- **
- ** 2. Doclist loaded into memory incrementally, as part of each
- ** sqlite3Fts3EvalNext() call.
- **
- ** 3. Token doclist is never loaded. Instead, documents are loaded into
- ** memory and scanned for the token as part of the sqlite3Fts3EvalNext()
- ** call. This is known as a "deferred" token.
- */
+ fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
- /* If bOptOk is true, check if there are any tokens that should be deferred.
- */
- if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){
+ /* Determine which, if any, tokens in the expression should be deferred. */
+ if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
Fts3TokenAndCost *aTC;
Fts3Expr **apOr;
aTC = (Fts3TokenAndCost *)sqlite3_malloc(
Fts3TokenAndCost *pTC = aTC;
Fts3Expr **ppOr = apOr;
- fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc);
+ fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
nToken = pTC-aTC;
nOr = ppOr-apOr;
}
}
- fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc);
+ fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
return rc;
}
-static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){
+/*
+** Invalidate the current position list for phrase pPhrase.
+*/
+static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
if( pPhrase->doclist.bFreeList ){
sqlite3_free(pPhrase->doclist.pList);
}
pPhrase->doclist.bFreeList = 0;
}
-static int fts3EvalNearTrim2(
- int nNear,
+/*
+** This function is called to edit the position list associated with
+** the phrase object passed as the fifth argument according to a NEAR
+** condition. For example:
+**
+** abc NEAR/5 "def ghi"
+**
+** Parameter nNear is passed the NEAR distance of the expression (5 in
+** the example above). When this function is called, *paPoslist points to
+** the position list, and *pnToken is the number of phrase tokens in, the
+** phrase on the other side of the NEAR operator to pPhrase. For example,
+** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
+** the position list associated with phrase "abc".
+**
+** All positions in the pPhrase position list that are not sufficiently
+** close to a position in the *paPoslist position list are removed. If this
+** leaves 0 positions, zero is returned. Otherwise, non-zero.
+**
+** Before returning, *paPoslist is set to point to the position lsit
+** associated with pPhrase. And *pnToken is set to the number of tokens in
+** pPhrase.
+*/
+static int fts3EvalNearTrim(
+ int nNear, /* NEAR distance. As in "NEAR/nNear". */
char *aTmp, /* Temporary space to use */
char **paPoslist, /* IN/OUT: Position list */
int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */
return res;
}
-static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
- int res = 1;
-
- /* The following block runs if pExpr is the root of a NEAR query.
- ** For example, the query:
- **
- ** "w" NEAR "x" NEAR "y" NEAR "z"
- **
- ** which is represented in tree form as:
- **
- ** |
- ** +--NEAR--+ <-- root of NEAR query
- ** | |
- ** +--NEAR--+ "z"
- ** | |
- ** +--NEAR--+ "y"
- ** | |
- ** "w" "x"
- **
- ** The right-hand child of a NEAR node is always a phrase. The
- ** left-hand child may be either a phrase or a NEAR node. There are
- ** no exceptions to this.
- */
- if( *pRc==SQLITE_OK
- && pExpr->eType==FTSQUERY_NEAR
- && pExpr->bEof==0
- && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
- ){
- Fts3Expr *p;
- int nTmp = 0; /* Bytes of temp space */
- char *aTmp; /* Temp space for PoslistNearMerge() */
-
- /* Allocate temporary working space. */
- for(p=pExpr; p->pLeft; p=p->pLeft){
- nTmp += p->pRight->pPhrase->doclist.nList;
- }
- nTmp += p->pPhrase->doclist.nList;
- aTmp = sqlite3_malloc(nTmp*2);
- if( !aTmp ){
- *pRc = SQLITE_NOMEM;
- res = 0;
- }else{
- char *aPoslist = p->pPhrase->doclist.pList;
- int nToken = p->pPhrase->nToken;
-
- for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
- Fts3Phrase *pPhrase = p->pRight->pPhrase;
- int nNear = p->nNear;
- res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
- }
-
- aPoslist = pExpr->pRight->pPhrase->doclist.pList;
- nToken = pExpr->pRight->pPhrase->nToken;
- for(p=pExpr->pLeft; p && res; p=p->pLeft){
- int nNear = p->pParent->nNear;
- Fts3Phrase *pPhrase = (
- p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
- );
- res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
- }
- }
-
- sqlite3_free(aTmp);
- }
-
- return res;
-}
-
/*
-** This macro is used by the fts3EvalNext() function. The two arguments are
-** 64-bit docid values. If the current query is "ORDER BY docid ASC", then
-** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then
-** it returns (i2 - i1). This allows the same code to be used for merging
-** doclists in ascending or descending order.
+** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
+** Otherwise, it advances the expression passed as the second argument to
+** point to the next matching row in the database. Expressions iterate through
+** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
+** or descending if it is non-zero.
+**
+** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
+** successful, the following variables in pExpr are set:
+**
+** Fts3Expr.bEof (non-zero if EOF - there is no next row)
+** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row)
+**
+** If the expression is of type FTSQUERY_PHRASE, and the expression is not
+** at EOF, then the following variables are populated with the position list
+** for the phrase for the visited row:
+**
+** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes)
+** FTs3Expr.pPhrase->doclist.pList (pointer to position list)
+**
+** It says above that this function advances the expression to the next
+** matching row. This is usually true, but there are the following exceptions:
+**
+** 1. Deferred tokens are not taken into account. If a phrase consists
+** entirely of deferred tokens, it is assumed to match every row in
+** the db. In this case the position-list is not populated at all.
+**
+** Or, if a phrase contains one or more deferred tokens and one or
+** more non-deferred tokens, then the expression is advanced to the
+** next possible match, considering only non-deferred tokens. In other
+** words, if the phrase is "A B C", and "B" is deferred, the expression
+** is advanced to the next row that contains an instance of "A * C",
+** where "*" may match any single token. The position list in this case
+** is populated as for "A * C" before returning.
+**
+** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is
+** advanced to point to the next row that matches "x AND y".
+**
+** See fts3EvalTestDeferredAndNear() for details on testing if a row is
+** really a match, taking into account deferred tokens and NEAR operators.
*/
-#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2))
-
-static void fts3EvalNext(
- Fts3Cursor *pCsr,
- Fts3Expr *pExpr,
- int *pRc
+static void fts3EvalNextRow(
+ Fts3Cursor *pCsr, /* FTS Cursor handle */
+ Fts3Expr *pExpr, /* Expr. to advance to next matching row */
+ int *pRc /* IN/OUT: Error code */
){
if( *pRc==SQLITE_OK ){
+ int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */
assert( pExpr->bEof==0 );
pExpr->bStart = 1;
Fts3Expr *pLeft = pExpr->pLeft;
Fts3Expr *pRight = pExpr->pRight;
assert( !pLeft->bDeferred || !pRight->bDeferred );
+
if( pLeft->bDeferred ){
- fts3EvalNext(pCsr, pRight, pRc);
+ /* LHS is entirely deferred. So we assume it matches every row.
+ ** Advance the RHS iterator to find the next row visited. */
+ fts3EvalNextRow(pCsr, pRight, pRc);
pExpr->iDocid = pRight->iDocid;
pExpr->bEof = pRight->bEof;
}else if( pRight->bDeferred ){
- fts3EvalNext(pCsr, pLeft, pRc);
+ /* RHS is entirely deferred. So we assume it matches every row.
+ ** Advance the LHS iterator to find the next row visited. */
+ fts3EvalNextRow(pCsr, pLeft, pRc);
pExpr->iDocid = pLeft->iDocid;
pExpr->bEof = pLeft->bEof;
}else{
- fts3EvalNext(pCsr, pLeft, pRc);
- fts3EvalNext(pCsr, pRight, pRc);
-
+ /* Neither the RHS or LHS are deferred. */
+ fts3EvalNextRow(pCsr, pLeft, pRc);
+ fts3EvalNextRow(pCsr, pRight, pRc);
while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
if( iDiff==0 ) break;
if( iDiff<0 ){
- fts3EvalNext(pCsr, pLeft, pRc);
+ fts3EvalNextRow(pCsr, pLeft, pRc);
}else{
- fts3EvalNext(pCsr, pRight, pRc);
+ fts3EvalNextRow(pCsr, pRight, pRc);
}
}
-
pExpr->iDocid = pLeft->iDocid;
pExpr->bEof = (pLeft->bEof || pRight->bEof);
}
assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
- fts3EvalNext(pCsr, pLeft, pRc);
+ fts3EvalNextRow(pCsr, pLeft, pRc);
}else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
- fts3EvalNext(pCsr, pRight, pRc);
+ fts3EvalNextRow(pCsr, pRight, pRc);
}else{
- fts3EvalNext(pCsr, pLeft, pRc);
- fts3EvalNext(pCsr, pRight, pRc);
+ fts3EvalNextRow(pCsr, pLeft, pRc);
+ fts3EvalNextRow(pCsr, pRight, pRc);
}
pExpr->bEof = (pLeft->bEof && pRight->bEof);
Fts3Expr *pRight = pExpr->pRight;
if( pRight->bStart==0 ){
- fts3EvalNext(pCsr, pRight, pRc);
+ fts3EvalNextRow(pCsr, pRight, pRc);
assert( *pRc!=SQLITE_OK || pRight->bStart );
}
- fts3EvalNext(pCsr, pLeft, pRc);
+ fts3EvalNextRow(pCsr, pLeft, pRc);
if( pLeft->bEof==0 ){
while( !*pRc
&& !pRight->bEof
&& DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0
){
- fts3EvalNext(pCsr, pRight, pRc);
+ fts3EvalNextRow(pCsr, pRight, pRc);
}
}
pExpr->iDocid = pLeft->iDocid;
default: {
Fts3Phrase *pPhrase = pExpr->pPhrase;
- fts3EvalZeroPoslist(pPhrase);
+ fts3EvalInvalidatePoslist(pPhrase);
*pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
pExpr->iDocid = pPhrase->doclist.iDocid;
break;
}
}
-static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
- int bHit = 1;
+/*
+** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
+** cluster, then this function returns 1 immediately.
+**
+** Otherwise, it checks if the current row really does match the NEAR
+** expression, using the data currently stored in the position lists
+** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression.
+**
+** If the current row is a match, the position list associated with each
+** phrase in the NEAR expression is edited in place to contain only those
+** phrase instances sufficiently close to their peers to satisfy all NEAR
+** constraints. In this case it returns 1. If the NEAR expression does not
+** match the current row, 0 is returned. The position lists may or may not
+** be edited if 0 is returned.
+*/
+static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
+ int res = 1;
+
+ /* The following block runs if pExpr is the root of a NEAR query.
+ ** For example, the query:
+ **
+ ** "w" NEAR "x" NEAR "y" NEAR "z"
+ **
+ ** which is represented in tree form as:
+ **
+ ** |
+ ** +--NEAR--+ <-- root of NEAR query
+ ** | |
+ ** +--NEAR--+ "z"
+ ** | |
+ ** +--NEAR--+ "y"
+ ** | |
+ ** "w" "x"
+ **
+ ** The right-hand child of a NEAR node is always a phrase. The
+ ** left-hand child may be either a phrase or a NEAR node. There are
+ ** no exceptions to this - it's the way the parser in fts3_expr.c works.
+ */
+ if( *pRc==SQLITE_OK
+ && pExpr->eType==FTSQUERY_NEAR
+ && pExpr->bEof==0
+ && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
+ ){
+ Fts3Expr *p;
+ int nTmp = 0; /* Bytes of temp space */
+ char *aTmp; /* Temp space for PoslistNearMerge() */
+
+ /* Allocate temporary working space. */
+ for(p=pExpr; p->pLeft; p=p->pLeft){
+ nTmp += p->pRight->pPhrase->doclist.nList;
+ }
+ nTmp += p->pPhrase->doclist.nList;
+ aTmp = sqlite3_malloc(nTmp*2);
+ if( !aTmp ){
+ *pRc = SQLITE_NOMEM;
+ res = 0;
+ }else{
+ char *aPoslist = p->pPhrase->doclist.pList;
+ int nToken = p->pPhrase->nToken;
+
+ for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
+ Fts3Phrase *pPhrase = p->pRight->pPhrase;
+ int nNear = p->nNear;
+ res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ }
+
+ aPoslist = pExpr->pRight->pPhrase->doclist.pList;
+ nToken = pExpr->pRight->pPhrase->nToken;
+ for(p=pExpr->pLeft; p && res; p=p->pLeft){
+ int nNear = p->pParent->nNear;
+ Fts3Phrase *pPhrase = (
+ p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
+ );
+ res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ }
+ }
+
+ sqlite3_free(aTmp);
+ }
+
+ return res;
+}
+
+/*
+** This function is a helper function for fts3EvalTestDeferredAndNear().
+** Assuming no error occurs or has occurred, It returns non-zero if the
+** expression passed as the second argument matches the row that pCsr
+** currently points to, or zero if it does not.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** If an error occurs during execution of this function, *pRc is set to
+** the appropriate SQLite error code. In this case the returned value is
+** undefined.
+*/
+static int fts3EvalTestExpr(
+ Fts3Cursor *pCsr, /* FTS cursor handle */
+ Fts3Expr *pExpr, /* Expr to test. May or may not be root. */
+ int *pRc /* IN/OUT: Error code */
+){
+ int bHit = 1; /* Return value */
if( *pRc==SQLITE_OK ){
switch( pExpr->eType ){
case FTSQUERY_NEAR:
case FTSQUERY_AND:
bHit = (
- fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
- && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
+ fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
+ && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
&& fts3EvalNearTest(pExpr, pRc)
);
Fts3Expr *p;
for(p=pExpr; p->pPhrase==0; p=p->pLeft){
if( p->pRight->iDocid==pCsr->iPrevId ){
- fts3EvalZeroPoslist(p->pRight->pPhrase);
+ fts3EvalInvalidatePoslist(p->pRight->pPhrase);
}
}
if( p->iDocid==pCsr->iPrevId ){
- fts3EvalZeroPoslist(p->pPhrase);
+ fts3EvalInvalidatePoslist(p->pPhrase);
}
}
break;
case FTSQUERY_OR: {
- int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc);
- int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc);
+ int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
+ int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
bHit = bHit1 || bHit2;
break;
}
case FTSQUERY_NOT:
bHit = (
- fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
- && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
+ fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
+ && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
);
break;
Fts3Phrase *pPhrase = pExpr->pPhrase;
assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
if( pExpr->bDeferred ){
- fts3EvalZeroPoslist(pPhrase);
+ fts3EvalInvalidatePoslist(pPhrase);
}
*pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
bHit = (pPhrase->doclist.pList!=0);
}
/*
-** Return 1 if both of the following are true:
+** This function is called as the second part of each xNext operation when
+** iterating through the results of a full-text query. At this point the
+** cursor points to a row that matches the query expression, with the
+** following caveats:
+**
+** * Up until this point, "NEAR" operators in the expression have been
+** treated as "AND".
+**
+** * Deferred tokens have not yet been considered.
+**
+** If *pRc is not SQLITE_OK when this function is called, it immediately
+** returns 0. Otherwise, it tests whether or not after considering NEAR
+** operators and deferred tokens the current row is still a match for the
+** expression. It returns 1 if both of the following are true:
**
** 1. *pRc is SQLITE_OK when this function returns, and
**
** 2. After scanning the current FTS table row for the deferred tokens,
-** it is determined that the row does not match the query.
+** it is determined that the row does *not* match the query.
**
** Or, if no error occurs and it seems the current row does match the FTS
** query, return 0.
*/
-static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
+static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
int rc = *pRc;
int bMiss = 0;
if( rc==SQLITE_OK ){
+
+ /* If there are one or more deferred tokens, load the current row into
+ ** memory and scan it to determine the position list for each deferred
+ ** token. Then, see if this row is really a match, considering deferred
+ ** tokens and NEAR operators (neither of which were taken into account
+ ** earlier, by fts3EvalNextRow()).
+ */
if( pCsr->pDeferred ){
rc = fts3CursorSeek(0, pCsr);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
}
}
- bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc));
+ bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
+
+ /* Free the position-lists accumulated for each deferred token above. */
sqlite3Fts3FreeDeferredDoclists(pCsr);
*pRc = rc;
}
** Advance to the next document that matches the FTS expression in
** Fts3Cursor.pExpr.
*/
-int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){
+static int fts3EvalNext(Fts3Cursor *pCsr){
int rc = SQLITE_OK; /* Return Code */
Fts3Expr *pExpr = pCsr->pExpr;
assert( pCsr->isEof==0 );
sqlite3_reset(pCsr->pStmt);
}
assert( sqlite3_data_count(pCsr->pStmt)==0 );
- fts3EvalNext(pCsr, pExpr, &rc);
+ fts3EvalNextRow(pCsr, pExpr, &rc);
pCsr->isEof = pExpr->bEof;
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
pCsr->iPrevId = pExpr->iDocid;
- }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) );
+ }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
}
return rc;
}
/*
** Restart interation for expression pExpr so that the next call to
-** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental
+** fts3EvalNext() visits the first row. Do not allow incremental
** loading or merging of phrase doclists for this iteration.
**
** If *pRc is other than SQLITE_OK when this function is called, it is
Fts3Phrase *pPhrase = pExpr->pPhrase;
if( pPhrase ){
- fts3EvalZeroPoslist(pPhrase);
+ fts3EvalInvalidatePoslist(pPhrase);
if( pPhrase->bIncr ){
assert( pPhrase->nToken==1 );
assert( pPhrase->aToken[0].pSegcsr );
assert( sqlite3_data_count(pCsr->pStmt)==0 );
/* Advance to the next document */
- fts3EvalNext(pCsr, pRoot, &rc);
+ fts3EvalNextRow(pCsr, pRoot, &rc);
pCsr->isEof = pRoot->bEof;
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
pCsr->iPrevId = pRoot->iDocid;
}while( pCsr->isEof==0
&& pRoot->eType==FTSQUERY_NEAR
- && fts3EvalLoadDeferred(pCsr, &rc)
+ && fts3EvalTestDeferredAndNear(pCsr, &rc)
);
if( rc==SQLITE_OK && pCsr->isEof==0 ){
*/
fts3EvalRestart(pCsr, pRoot, &rc);
do {
- fts3EvalNext(pCsr, pRoot, &rc);
+ fts3EvalNextRow(pCsr, pRoot, &rc);
assert( pRoot->bEof==0 );
}while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
- fts3EvalLoadDeferred(pCsr, &rc);
+ fts3EvalTestDeferredAndNear(pCsr, &rc);
}
}
return rc;
if( pPhrase ){
int i;
sqlite3_free(pPhrase->doclist.aAll);
- fts3EvalZeroPoslist(pPhrase);
+ fts3EvalInvalidatePoslist(pPhrase);
memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
for(i=0; i<pPhrase->nToken; i++){
fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
}
}
+#if !SQLITE_CORE
+/*
+** Initialize API pointer table, if required.
+*/
+int sqlite3_extension_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+){
+ SQLITE_EXTENSION_INIT2(pApi)
+ return sqlite3Fts3Init(db);
+}
+#endif
+
#endif