static int fts3ReadBlock(
Fts3Table *p,
sqlite3_int64 iBlock,
- char **pzBlock,
+ char const **pzBlock,
int *pnBlock
){
sqlite3_stmt *pStmt;
}
/*
-** The buffer pointed to by argument zNode (size nNode bytes) contains a
-** b-tree segment interior node. This function inspects the sub-tree headed
-** by the node to determine the range of leaf-nodes (if any) that may
-** contain a term that matches the contents of buffer zTerm (size nTerm
-** bytes). If the isPrefix parameter is true, then the range of leaves
-** returned are those that may contain any term for which zTerm/nTerm is
-** a prefix.
-**
-** If successful, SQLITE_OK is returned. The blockid of the first leaf in the
-** selected range is written to piStart before returning. The blockid of the
-** final leaf in the selected range is written to *piEnd.
+** The buffer pointed to by argument zNode (size nNode bytes) contains the
+** root node of a b-tree segment. The segment is guaranteed to be at least
+** one level high (i.e. the root node is not also a leaf). If successful,
+** this function locates the leaf node of the segment that may contain the
+** term specified by arguments zTerm and nTerm and writes its block number
+** to *piLeaf.
+**
+** It is possible that the returned leaf node does not contain the specified
+** term. However, if the segment does contain said term, it is stored on
+** the identified leaf node. Because this function only inspects interior
+** segment nodes (and never loads leaf nodes into memory), it is not possible
+** to be sure.
+**
+** If an error occurs, an error code other than SQLITE_OK is returned.
*/
-static int fts3SelectLeaves(
+static int fts3SelectLeaf(
Fts3Table *p, /* Virtual table handle */
const char *zTerm, /* Term to select leaves for */
int nTerm, /* Size of term zTerm in bytes */
- int isPrefix, /* True for a prefix search */
const char *zNode, /* Buffer containing segment interior node */
int nNode, /* Size of buffer at zNode */
- sqlite3_int64 *piStart, /* First selected leaf */
- sqlite3_int64 *piEnd /* Second selected leaf */
+ sqlite3_int64 *piLeaf /* Selected leaf node */
){
int rc = SQLITE_OK; /* Return code */
const char *zCsr = zNode; /* Cursor to iterate through node */
char *zBuffer = 0; /* Buffer to load terms into */
int nAlloc = 0; /* Size of allocated buffer */
- int iHeight; /* Height of this node in tree */
- sqlite3_int64 iChild;
- sqlite3_int64 iStart = 0;
- sqlite3_int64 iEnd;
-
- zCsr += sqlite3Fts3GetVarint32(zCsr, &iHeight);
- zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
-
- while( zCsr<zEnd ){
- int nSuffix; /* Size of term suffix */
- int nPrefix = 0; /* Size of term prefix */
- int nBuffer; /* Total term size */
- int nMin; /* Minimum of nBuffer and nTerm */
- int cmp; /* Result of comparing term and buffer */
-
- /* Load the next term on the node into zBuffer */
- if( zBuffer ){
- zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
- }
- zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
- if( nPrefix+nSuffix>nAlloc ){
- char *zNew;
- nAlloc = (nPrefix+nSuffix) * 2;
- zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
- if( !zNew ){
- sqlite3_free(zBuffer);
- return SQLITE_NOMEM;
+ while( 1 ){
+ int iHeight; /* Height of this node in tree */
+ sqlite3_int64 iChild; /* Block id of child node to descend to */
+ int nBlock; /* Size of child node in bytes */
+
+ zCsr += sqlite3Fts3GetVarint32(zCsr, &iHeight);
+ zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
+
+ while( zCsr<zEnd ){
+ int nSuffix; /* Size of term suffix */
+ int nPrefix = 0; /* Size of term prefix */
+ int nBuffer; /* Total term size */
+ int nMin; /* Minimum of nBuffer and nTerm */
+
+ /* Load the next term on the node into zBuffer */
+ if( zBuffer ){
+ zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
}
- zBuffer = zNew;
- }
- memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
- nBuffer = nPrefix + nSuffix;
- zCsr += nSuffix;
-
- /* Compare the term we are searching for with the term just loaded from
- ** the interior node. If variable cmp is greater than or equal to zero,
- ** then all terms on the sub-tree headed by node iChild are smaller than
- ** zTerm. No need to search iChild.
- **
- ** If variable cmp is less than zero, then the sub-tree headed by
+ zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
+ if( nPrefix+nSuffix>nAlloc ){
+ char *zNew;
+ nAlloc = (nPrefix+nSuffix) * 2;
+ zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
+ if( !zNew ){
+ sqlite3_free(zBuffer);
+ return SQLITE_NOMEM;
+ }
+ zBuffer = zNew;
+ }
+ memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
+ nBuffer = nPrefix + nSuffix;
+ zCsr += nSuffix;
+
+ /* Compare the term we are searching for with the term just loaded from
+ ** the interior node. If the specified term is greater than or equal
+ ** to the term from the interior node, then all terms on the sub-tree
+ ** headed by node iChild are smaller than zTerm. No need to search
+ ** iChild.
+ **
+ ** If the interior node term is larger than the specified term, then
+ ** the tree headed by iChild may contain the specified term.
+ */
+ nMin = (nBuffer>nTerm ? nTerm : nBuffer);
+ if( memcmp(zTerm, zBuffer, nMin)<0 ) break;
+ iChild++;
+ };
+
+ /* If (iHeight==1), the children of this interior node are leaves. The
+ ** specified term may be present on leaf node iChild.
*/
- nMin = (nBuffer>nTerm ? nTerm : nBuffer);
- cmp = memcmp(zTerm, zBuffer, nMin);
- if( isPrefix && cmp==0 && iStart==0 ){
- iStart = iChild;
+ if( iHeight==1 ){
+ *piLeaf = iChild;
+ break;
}
- if( cmp<0 ) break;
- iChild++;
- };
- iEnd = iChild;
- if( iStart==0 ) iStart = iChild;
- sqlite3_free(zBuffer);
- if( iHeight==1 ){
- if( piEnd ) *piEnd = iEnd;
- if( piStart ) *piStart = iStart;
- }else{
- char *zBlock;
- int nBlock;
- if( piEnd ){
- rc = fts3ReadBlock(p, iEnd, &zBlock, &nBlock);
- if( rc==SQLITE_OK ){
- rc = fts3SelectLeaves(p,zTerm,nTerm,isPrefix,zBlock,nBlock,0,piEnd);
- }
- }
- if( piStart && rc==SQLITE_OK ){
- rc = fts3ReadBlock(p, iStart, &zBlock, &nBlock);
- if( rc==SQLITE_OK ){
- rc = fts3SelectLeaves(p,zTerm,nTerm,isPrefix,zBlock,nBlock,piStart,0);
- }
- }
+ /* Descend to interior node iChild. */
+ rc = fts3ReadBlock(p, iChild, &zCsr, &nBlock);
+ if( rc!=SQLITE_OK ) break;
+ zEnd = &zCsr[nBlock];
}
-
+ sqlite3_free(zBuffer);
return rc;
}
return SQLITE_NOMEM;
}
}
- (mergetype==MERGE_NEAR ? 0 : &p);
while( p1 && p2 ){
if( i1==i2 ){
return SQLITE_OK;
}
+/*
+** A pointer to an instance of this structure is used as the context
+** argument to sqlite3Fts3SegReaderIterate()
+*/
typedef struct TermSelect TermSelect;
struct TermSelect {
- char const *zTerm;
- int nTerm;
- int isPrefix;
int isReqPos;
char *aOutput; /* Malloc'd output buffer */
int nOutput; /* Size of output in bytes */
};
static int fts3TermSelectCb(
- Fts3Table *p,
- void *pContext,
+ Fts3Table *p, /* Virtual table object */
+ void *pContext, /* Pointer to TermSelect structure */
char *zTerm,
int nTerm,
char *aDoclist,
int nDoclist
){
TermSelect *pTS = (TermSelect *)pContext;
+ int nNew = pTS->nOutput + nDoclist;
- if( (pTS->nTerm==nTerm || (pTS->isPrefix && pTS->nTerm<nTerm))
- && 0==memcmp(zTerm, pTS->zTerm, pTS->nTerm)
- ){
- int nNew = pTS->nOutput + nDoclist;
- char *aNew = sqlite3_malloc(nNew);
- if( !aNew ){
- return SQLITE_NOMEM;
- }
-
- if( pTS->nOutput==0 ){
- /* If this is the first term selected, copy the doclist to the output
- ** buffer using memcpy(). TODO: Add a way to transfer control of the
- ** aDoclist buffer from the caller so as to avoid the memcpy().
- */
- memcpy(aNew, aDoclist, nDoclist);
- }else{
- /* The output buffer is not empty. Merge doclist aDoclist with the
- ** existing output. This can only happen with prefix-searches (as
- ** searches for exact terms return exactly one doclist).
- */
- int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
- assert( pTS->isPrefix );
- fts3DoclistMerge(mergetype, 0, 0,
- aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist
- );
- }
+ char *aNew = sqlite3_malloc(nNew);
+ if( !aNew ){
+ return SQLITE_NOMEM;
+ }
- sqlite3_free(pTS->aOutput);
- pTS->aOutput = aNew;
- pTS->nOutput = nNew;
+ if( pTS->nOutput==0 ){
+ /* If this is the first term selected, copy the doclist to the output
+ ** buffer using memcpy(). TODO: Add a way to transfer control of the
+ ** aDoclist buffer from the caller so as to avoid the memcpy().
+ */
+ memcpy(aNew, aDoclist, nDoclist);
+ }else{
+ /* The output buffer is not empty. Merge doclist aDoclist with the
+ ** existing output. This can only happen with prefix-searches (as
+ ** searches for exact terms return exactly one doclist).
+ */
+ int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
+ fts3DoclistMerge(mergetype, 0, 0,
+ aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist
+ );
}
+ sqlite3_free(pTS->aOutput);
+ pTS->aOutput = aNew;
+ pTS->nOutput = nNew;
+
return SQLITE_OK;
}
){
int i;
TermSelect tsc;
+ Fts3SegFilter filter; /* Segment term filter configuration */
Fts3SegReader **apSegment = 0; /* Array of segments to read data from */
int nSegment = 0; /* Size of apSegment array */
int nAlloc = 0; /* Allocated size of segment array */
int rc; /* Return code */
sqlite3_stmt *pStmt; /* SQL statement to scan %_segdir table */
int iAge = 0; /* Used to assign ages to segments */
- int flags;
/* Loop through the entire %_segdir table. For each segment, create a
** Fts3SegReader to iterate through the subset of the segment leaves
*/
rc = sqlite3Fts3SegReaderNew(p, iAge, 0, 0, 0, zRoot, nRoot, &pNew);
}else{
- sqlite3_int64 i1, i2;
- rc = fts3SelectLeaves(p, zTerm, nTerm, isPrefix, zRoot, nRoot, &i1, &i2);
+ sqlite3_int64 i1;
+ rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &i1);
if( rc==SQLITE_OK ){
- assert( i1 && i2 );
+ sqlite3_int64 i2 = sqlite3_column_int64(pStmt, 3);
rc = sqlite3Fts3SegReaderNew(p, iAge, i1, i2, 0, 0, 0, &pNew);
}
}
}
memset(&tsc, 0, sizeof(TermSelect));
- tsc.zTerm = zTerm;
- tsc.nTerm = nTerm;
- tsc.isPrefix = isPrefix;
tsc.isReqPos = isReqPos;
- flags = FTS3_SEGMENT_IGNORE_EMPTY
+ filter.flags = FTS3_SEGMENT_IGNORE_EMPTY
+ | (isPrefix ? FTS3_SEGMENT_PREFIX : 0)
| (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
| (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
- rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, flags,
- iColumn, fts3TermSelectCb, (void *)&tsc
+ filter.iCol = iColumn;
+ filter.zTerm = zTerm;
+ filter.nTerm = nTerm;
+ rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter,
+ fts3TermSelectCb, (void *)&tsc
);
if( rc==SQLITE_OK ){
return rc;
}
+/*
+** Compare the term that the Fts3SegReader object passed as the first argument
+** points to with the term specified by arguments zTerm and nTerm.
+**
+** If the pSeg iterator is already at EOF, return 0. Otherwise, return
+** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are
+** equal, or +ve if the pSeg term is greater than zTerm/nTerm.
+*/
+static int fts3SegReaderTermCmp(
+ Fts3SegReader *pSeg, /* Segment reader object */
+ const char *zTerm, /* Term to compare to */
+ int nTerm /* Size of term zTerm in bytes */
+){
+ int res = 0;
+ if( pSeg->aNode ){
+ if( pSeg->nTerm>nTerm ){
+ res = memcmp(pSeg->zTerm, zTerm, nTerm);
+ }else{
+ res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm);
+ }
+ if( res==0 ){
+ res = pSeg->nTerm-nTerm;
+ }
+ }
+ return res;
+}
+
/*
** Argument apSegment is an array of nSegment elements. It is known that
** the final (nSegment-nSuspect) members are already in sorted order
int nList = *pnList;
char *pEnd = &pList[nList];
int iCurrent = 0;
-
char *p = pList;
+
+ assert( iCol>=0 );
while( 1 ){
char c = 0;
while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
Fts3Table *p, /* Virtual table handle */
Fts3SegReader **apSegment, /* Array of Fts3SegReader objects */
int nSegment, /* Size of apSegment array */
- int flags, /* Flags mask */
- int iCol, /* Column to filter for */
+ Fts3SegFilter *pFilter, /* Restrictions on range of iteration */
int (*xFunc)(Fts3Table *, void *, char *, int, char *, int), /* Callback */
void *pContext /* Callback context (2nd argument) */
){
int nAlloc = 0; /* Allocated size of aBuffer buffer */
int rc = SQLITE_OK; /* Return code */
- int isIgnoreEmpty = (flags&FTS3_SEGMENT_IGNORE_EMPTY);
- int isRequirePos = (flags&FTS3_SEGMENT_REQUIRE_POS);
- int isColFilter = (flags&FTS3_SEGMENT_COLUMN_FILTER);
+ int isIgnoreEmpty = (pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
+ int isRequirePos = (pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
+ int isColFilter = (pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
+ int isPrefix = (pFilter->flags & FTS3_SEGMENT_PREFIX);
+
+ /* If the Fts3SegFilter defines a specific term (or term prefix) to search
+ ** for, then advance each segment iterator until it points to a term of
+ ** equal or greater value than the specified term. This prevents many
+ ** unnecessary merge/sort operations for the case where single segment
+ ** b-tree leaf nodes contain more than one term.
+ */
+ if( pFilter->zTerm ){
+ int nTerm = pFilter->nTerm;
+ char *zTerm = pFilter->zTerm;
+ for(i=0; i<nSegment; i++){
+ Fts3SegReader *pSeg = apSegment[i];
+ while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){
+ rc = fts3SegReaderNext(pSeg);
+ if( rc!=SQLITE_OK ) goto finished;
+ }
+ }
+ }
fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
while( apSegment[0]->aNode ){
char *zTerm = apSegment[0]->zTerm;
int nMerge = 1;
+ /* If this is a prefix-search, and if the term that apSegment[0] points
+ ** to does not share a suffix with pFilter->zTerm/nTerm, then all
+ ** required callbacks have been made. In this case exit early.
+ **
+ ** Similarly, if this is a search for an exact match, and the first term
+ ** of segment apSegment[0] is not a match, exit early.
+ */
+ if( pFilter->zTerm ){
+ if( nTerm<pFilter->nTerm
+ || (!isPrefix && nTerm>pFilter->nTerm)
+ || memcmp(zTerm, pFilter->zTerm, pFilter->nTerm)
+ ){
+ goto finished;
+ }
+ }
+
while( nMerge<nSegment
&& apSegment[nMerge]->aNode
&& apSegment[nMerge]->nTerm==nTerm
j++;
}
- assert( iCol>=0 || isColFilter==0 );
if( isColFilter ){
- fts3ColumnFilter(iCol, &pList, &nList);
+ fts3ColumnFilter(pFilter->iCol, &pList, &nList);
}
if( !isIgnoreEmpty || nList>0 ){
}
}
+ /* If there is a term specified to filter on, and this is not a prefix
+ ** search, return now. The callback that corresponds to the required
+ ** term (if such a term exists in the index) has already been made.
+ */
+ if( pFilter->zTerm && !isPrefix ){
+ goto finished;
+ }
+
for(i=0; i<nMerge; i++){
rc = fts3SegReaderNext(apSegment[i]);
if( rc!=SQLITE_OK ) goto finished;
SegmentWriter *pWriter = 0;
int nSegment = 0; /* Number of segments being merged */
Fts3SegReader **apSegment = 0; /* Array of Segment iterators */
+ Fts3SegFilter filter; /* Segment term filter condition */
if( iLevel<0 ){
/* This call is to merge all segments in the database to a single
pStmt = 0;
if( rc!=SQLITE_OK ) goto finished;
- rc = sqlite3Fts3SegReaderIterate(
- p, apSegment, nSegment,
- (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0)|FTS3_SEGMENT_REQUIRE_POS,
- 0, fts3MergeCallback, (void *)&pWriter
+ memset(&filter, 0, sizeof(Fts3SegFilter));
+ filter.flags = FTS3_SEGMENT_REQUIRE_POS;
+ filter.flags |= (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
+ rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment,
+ &filter, fts3MergeCallback, (void *)&pWriter
);
if( rc!=SQLITE_OK ) goto finished;
-C Add\sa\sfew\sextra\scoverage\stest\scases\sfor\sfts3.
-D 2009-11-16T16:36:23
+C Improvements\sto\sthe\sway\sfts3\sreads\sthe\sfull-text\sindex.
+D 2009-11-17T12:52:10
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in 53f3dfa49f28ab5b80cb083fb7c9051e596bcfa1
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c 4d701e31cb32cfb9b535edcc33271d8bcb2fa76f
+F ext/fts3/fts3.c a72c19fa6270b5f88ad9b1215c821f7082164655
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
-F ext/fts3/fts3Int.h f8419da445790c0666d4b4d72dc15a07dd7ae93e
+F ext/fts3/fts3Int.h 5c040c0fb47ed81aaba589a55a7455c980592bea
F ext/fts3/fts3_expr.c bdf11f3602f62f36f0e42823680bf22033dae0de
F ext/fts3/fts3_hash.c 1af1833a4d581ee8d668bb71f5a500f7a0104982
F ext/fts3/fts3_hash.h 39524725425078bf9e814e9569c74a8e5a21b9fb
F ext/fts3/fts3_tokenizer.c 185a212670a9bbdeb5cad6942305e681bce5c87b
F ext/fts3/fts3_tokenizer.h 7ff73caa3327589bf6550f60d93ebdd1f6a0fb5c
F ext/fts3/fts3_tokenizer1.c 0a5bcc579f35de5d24a9345d7908dc25ae403ee7
-F ext/fts3/fts3_write.c 4285a2804ef308ed2eef946dae20d9d0361554d0
+F ext/fts3/fts3_write.c edf123f978fca3d26707452a380fa169849eb655
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33
F ext/icu/icu.c 12e763d288d23b5a49de37caa30737b971a2f1e2
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 05dab77387801e4900009917bb18f556037d82da
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
-F test/fts3malloc.test ed11f188d03560d7f44bd5c126ac004f011cc719
+F test/fts3malloc.test cda1b22d8c86c3e434d93b63f2fc7a0191fb6d30
F test/fts3near.test dc196dd17b4606f440c580d45b3d23aa975fd077
F test/func.test af106ed834001738246d276659406823e35cde7b
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P d3cae986ee1a176b1b015c3cebcd58ff0c3bdf92
-R 4deae89fec5a2f223cc1fc606b6691c8
+P f29c8fcade4aadeae3824975cf59f306c11c906b
+R a419bf1bcdd810be07b904f21b0a1ef0
U dan
-Z 34f2a8a40095974f35e0328293748f2b
+Z 4e40dbade582f444c0b93ffcad88c385