for(i=0; i<SizeofArray(p->aStmt); i++){
sqlite3_finalize(p->aStmt[i]);
}
- for(i=0; i<p->nLeavesStmt; i++){
- sqlite3_finalize(p->aLeavesStmt[i]);
- }
- sqlite3_free(p->zSelectLeaves);
- sqlite3_free(p->aLeavesStmt);
+ sqlite3_free(p->zSegmentsTbl);
/* Invoke the tokenizer destructor to free the tokenizer. */
p->pTokenizer->pModule->xDestroy(p->pTokenizer);
/*
** Construct one or more SQL statements from the format string given
-** and then evaluate those statements. The success code is writting
+** and then evaluate those statements. The success code is written
** into *pRc.
**
** If *pRc is initially non-zero then this routine is a no-op.
** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
** passed as the first argument. This is done as part of the xConnect()
** and xCreate() methods.
+**
+** If *pRc is non-zero when this function is called, it is a no-op.
+** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
+** before returning.
*/
-static int fts3DeclareVtab(Fts3Table *p){
- int i; /* Iterator variable */
- int rc; /* Return code */
- char *zSql; /* SQL statement passed to declare_vtab() */
- char *zCols; /* List of user defined columns */
-
- /* Create a list of user columns for the virtual table */
- zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
- for(i=1; zCols && i<p->nColumn; i++){
- zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
- }
+static void fts3DeclareVtab(int *pRc, Fts3Table *p){
+ if( *pRc==SQLITE_OK ){
+ int i; /* Iterator variable */
+ int rc; /* Return code */
+ char *zSql; /* SQL statement passed to declare_vtab() */
+ char *zCols; /* List of user defined columns */
+
+ /* Create a list of user columns for the virtual table */
+ zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
+ for(i=1; zCols && i<p->nColumn; i++){
+ zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
+ }
- /* Create the whole "CREATE TABLE" statement to pass to SQLite */
- zSql = sqlite3_mprintf(
- "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
- );
+ /* Create the whole "CREATE TABLE" statement to pass to SQLite */
+ zSql = sqlite3_mprintf(
+ "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
+ );
+ if( !zCols || !zSql ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_declare_vtab(p->db, zSql);
+ }
- if( !zCols || !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_declare_vtab(p->db, zSql);
+ sqlite3_free(zSql);
+ sqlite3_free(zCols);
+ *pRc = rc;
}
-
- sqlite3_free(zSql);
- sqlite3_free(zCols);
- return rc;
}
/*
if( rc!=SQLITE_ABORT ) *pRc = rc;
}
+/*
+** Store the current database page-size in bytes in p->nPgsz.
+**
+** If *pRc is non-zero when this function is called, it is a no-op.
+** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
+** before returning.
+*/
+static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
+ if( *pRc==SQLITE_OK ){
+ int rc; /* Return code */
+ char *zSql; /* SQL text "PRAGMA %Q.page_size" */
+ sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */
+
+ zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
+ if( !zSql ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ p->nPgsz = sqlite3_column_int(pStmt, 0);
+ }
+ rc = sqlite3_finalize(pStmt);
+ }
+ }
+ assert( p->nPgsz>0 || rc!=SQLITE_OK );
+ sqlite3_free(zSql);
+ *pRc = rc;
+ }
+}
+
/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
fts3TableExists(&rc, db, argv[1], argv[2], "_content", &p->bHasContent);
fts3TableExists(&rc, db, argv[1], argv[2], "_docsize", &p->bHasDocsize);
}
- if( rc!=SQLITE_OK ) goto fts3_init_out;
- rc = fts3DeclareVtab(p);
- if( rc!=SQLITE_OK ) goto fts3_init_out;
+ /* Figure out the page-size for the database. This is required in order to
+ ** estimate the cost of loading large doclists from the database (see
+ ** function sqlite3Fts3SegReaderCost() for details).
+ */
+ fts3DatabasePageSize(&rc, p);
- *ppVTab = &p->base;
+ /* Declare the table schema to SQLite. */
+ fts3DeclareVtab(&rc, p);
fts3_init_out:
assert( p || (pTokenizer && rc!=SQLITE_OK) );
}else{
pTokenizer->pModule->xDestroy(pTokenizer);
}
+ }else{
+ *ppVTab = &p->base;
}
return rc;
}
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
sqlite3_finalize(pCsr->pStmt);
sqlite3Fts3ExprFree(pCsr->pExpr);
+ sqlite3Fts3FreeDeferredTokens(pCsr);
sqlite3_free(pCsr->aDoclist);
sqlite3_free(pCsr->aMatchinfo);
sqlite3_free(pCsr);
}
}
-/*
-** Advance the cursor to the next row in the %_content table that
-** matches the search criteria. For a MATCH search, this will be
-** the next row that matches. For a full-table scan, this will be
-** simply the next row in the %_content table. For a docid lookup,
-** this routine simply sets the EOF flag.
-**
-** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
-** even if we reach end-of-file. The fts3EofMethod() will be called
-** subsequently to determine whether or not an EOF was hit.
-*/
-static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
- int rc = SQLITE_OK; /* Return code */
- Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
- if( pCsr->aDoclist==0 ){
- if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
- pCsr->isEof = 1;
- rc = sqlite3_reset(pCsr->pStmt);
- }
- }else if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){
- pCsr->isEof = 1;
- }else{
- sqlite3_reset(pCsr->pStmt);
- fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId);
- pCsr->isRequireSeek = 1;
- pCsr->isMatchinfoNeeded = 1;
- }
- return rc;
-}
/*
/*
** nToken==1 searches for adjacent positions.
+**
+** This function is used to merge two position lists into one. When it is
+** called, *pp1 and *pp2 must both point to position lists. A position-list is
+** the part of a doclist that follows each document id. For example, if a row
+** contains:
+**
+** 'a b c'|'x y z'|'a b b a'
+**
+** Then the position list for this row for token 'b' would consist of:
+**
+** 0x02 0x01 0x02 0x03 0x03 0x00
+**
+** When this function returns, both *pp1 and *pp2 are left pointing to the
+** byte following the 0x00 terminator of their respective position lists.
+**
+** If isSaveLeft is 0, an entry is added to the output position list for
+** each position in *pp2 for which there exists one or more positions in
+** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
+** when the *pp1 token appears before the *pp2 token, but not more than nToken
+** slots before it.
*/
static int fts3PoslistPhraseMerge(
- char **pp, /* Output buffer */
+ char **pp, /* IN/OUT: Preallocated output buffer */
int nToken, /* Maximum difference in token positions */
int isSaveLeft, /* Save the left position */
- char **pp1, /* Left input list */
- char **pp2 /* Right input list */
+ int isExact, /* If *pp1 is exactly nTokens before *pp2 */
+ char **pp1, /* IN/OUT: Left input list */
+ char **pp2 /* IN/OUT: Right input list */
){
char *p = (pp ? *pp : 0);
char *p1 = *pp1;
char *p2 = *pp2;
-
int iCol1 = 0;
int iCol2 = 0;
+
+ /* Never set both isSaveLeft and isExact for the same invocation. */
+ assert( isSaveLeft==0 || isExact==0 );
+
assert( *p1!=0 && *p2!=0 );
if( *p1==POS_COLUMN ){
p1++;
fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
while( 1 ){
- if( iPos2>iPos1 && iPos2<=iPos1+nToken ){
+ if( iPos2==iPos1+nToken
+ || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken)
+ ){
sqlite3_int64 iSave;
if( !pp ){
fts3PoslistCopy(0, &p2);
char *p2 = *pp2;
if( !pp ){
- if( fts3PoslistPhraseMerge(0, nRight, 0, pp1, pp2) ) return 1;
+ if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1;
*pp1 = p1;
*pp2 = p2;
- return fts3PoslistPhraseMerge(0, nLeft, 0, pp2, pp1);
+ return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1);
}else{
char *pTmp1 = aTmp;
char *pTmp2;
char *aTmp2;
int res = 1;
- fts3PoslistPhraseMerge(&pTmp1, nRight, 0, pp1, pp2);
+ fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
aTmp2 = pTmp2 = pTmp1;
*pp1 = p1;
*pp2 = p2;
- fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, pp2, pp1);
+ fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
fts3PoslistMerge(pp, &aTmp, &aTmp2);
}else if( pTmp1!=aTmp ){
char *pSave = p;
sqlite3_int64 iPrevSave = iPrev;
fts3PutDeltaVarint(&p, &iPrev, i1);
- if( 0==fts3PoslistPhraseMerge(ppPos, 1, 0, &p1, &p2) ){
+ if( 0==fts3PoslistPhraseMerge(ppPos, nParam1, 0, 1, &p1, &p2) ){
p = pSave;
iPrev = iPrevSave;
}
return SQLITE_OK;
}
+static int fts3DeferredTermSelect(
+ Fts3DeferredToken *pToken, /* Phrase token */
+ int isTermPos, /* True to include positions */
+ int *pnOut, /* OUT: Size of list */
+ char **ppOut /* OUT: Body of list */
+){
+ char *aSource;
+ int nSource;
+
+ aSource = sqlite3Fts3DeferredDoclist(pToken, &nSource);
+ if( !aSource ){
+ *pnOut = 0;
+ *ppOut = 0;
+ }else if( isTermPos ){
+ *ppOut = sqlite3_malloc(nSource);
+ if( !*ppOut ) return SQLITE_NOMEM;
+ memcpy(*ppOut, aSource, nSource);
+ *pnOut = nSource;
+ }else{
+ sqlite3_int64 docid;
+ *pnOut = sqlite3Fts3GetVarint(aSource, &docid);
+ *ppOut = sqlite3_malloc(*pnOut);
+ if( !*ppOut ) return SQLITE_NOMEM;
+ sqlite3Fts3PutVarint(*ppOut, docid);
+ }
+
+ return SQLITE_OK;
+}
+
/*
-** This function retreives the doclist for the specified term (or term
-** prefix) from the database.
-**
-** The returned doclist may be in one of two formats, depending on the
-** value of parameter isReqPos. If isReqPos is zero, then the doclist is
-** a sorted list of delta-compressed docids (a bare doclist). If isReqPos
-** is non-zero, then the returned list is in the same format as is stored
-** in the database without the found length specifier at the start of on-disk
-** doclists.
+** An Fts3SegReaderArray is used to store an array of Fts3SegReader objects.
+** Elements are added to the array using fts3SegReaderArrayAdd().
*/
-static int fts3TermSelect(
- Fts3Table *p, /* Virtual table handle */
- int iColumn, /* Column to query (or -ve for all columns) */
+struct Fts3SegReaderArray {
+ int nSegment; /* Number of valid entries in apSegment[] */
+ int nAlloc; /* Allocated size of apSegment[] */
+ int nCost; /* The cost of executing SegReaderIterate() */
+ Fts3SegReader *apSegment[1]; /* Array of seg-reader objects */
+};
+
+
+/*
+** Free an Fts3SegReaderArray object. Also free all seg-readers in the
+** array (using sqlite3Fts3SegReaderFree()).
+*/
+static void fts3SegReaderArrayFree(Fts3SegReaderArray *pArray){
+ if( pArray ){
+ int i;
+ for(i=0; i<pArray->nSegment; i++){
+ sqlite3Fts3SegReaderFree(0, pArray->apSegment[i]);
+ }
+ sqlite3_free(pArray);
+ }
+}
+
+static int fts3SegReaderArrayAdd(
+ Fts3SegReaderArray **ppArray,
+ Fts3SegReader *pNew
+){
+ Fts3SegReaderArray *pArray = *ppArray;
+
+ if( !pArray || pArray->nAlloc==pArray->nSegment ){
+ int nNew = (pArray ? pArray->nAlloc+16 : 16);
+ pArray = (Fts3SegReaderArray *)sqlite3_realloc(pArray,
+ sizeof(Fts3SegReaderArray) + (nNew-1) * sizeof(Fts3SegReader*)
+ );
+ if( !pArray ){
+ sqlite3Fts3SegReaderFree(0, pNew);
+ return SQLITE_NOMEM;
+ }
+ if( nNew==16 ){
+ pArray->nSegment = 0;
+ pArray->nCost = 0;
+ }
+ pArray->nAlloc = nNew;
+ *ppArray = pArray;
+ }
+
+ pArray->apSegment[pArray->nSegment++] = pNew;
+ return SQLITE_OK;
+}
+
+static int fts3TermSegReaderArray(
+ Fts3Cursor *pCsr, /* Virtual table cursor handle */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
- int isReqPos, /* True to include position lists in output */
- int *pnOut, /* OUT: Size of buffer at *ppOut */
- char **ppOut /* OUT: Malloced result buffer */
+ Fts3SegReaderArray **ppArray /* OUT: Allocated seg-reader array */
){
- int i;
- TermSelect tsc;
- Fts3SegFilter filter; /* Segment term filter configuration */
- Fts3SegReader **apSegment; /* Array of segments to read data from */
- int nSegment = 0; /* Size of apSegment array */
- int nAlloc = 16; /* Allocated size of segment array */
+ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
int rc; /* Return code */
+ Fts3SegReaderArray *pArray = 0; /* Array object to build */
+ Fts3SegReader *pReader = 0; /* Seg-reader to add to pArray */
sqlite3_stmt *pStmt = 0; /* SQL statement to scan %_segdir table */
int iAge = 0; /* Used to assign ages to segments */
- apSegment = (Fts3SegReader **)sqlite3_malloc(sizeof(Fts3SegReader*)*nAlloc);
- if( !apSegment ) return SQLITE_NOMEM;
- rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &apSegment[0]);
- if( rc!=SQLITE_OK ) goto finished;
- if( apSegment[0] ){
- nSegment = 1;
+ /* Allocate a seg-reader to scan the pending terms, if any. */
+ rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pReader);
+ if( rc==SQLITE_OK && pReader ) {
+ rc = fts3SegReaderArrayAdd(&pArray, pReader);
}
/* Loop through the entire %_segdir table. For each segment, create a
** that may contain a term that matches zTerm/nTerm. For non-prefix
** searches, this is always a single leaf. For prefix searches, this
** may be a contiguous block of leaves.
- **
- ** The code in this loop does not actually load any leaves into memory
- ** (unless the root node happens to be a leaf). It simply examines the
- ** b-tree structure to determine which leaves need to be inspected.
*/
- rc = sqlite3Fts3AllSegdirs(p, &pStmt);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts3AllSegdirs(p, &pStmt);
+ }
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
Fts3SegReader *pNew = 0;
int nRoot = sqlite3_column_bytes(pStmt, 4);
}
iAge++;
- /* If a new Fts3SegReader was allocated, add it to the apSegment array. */
+ /* If a new Fts3SegReader was allocated, add it to the array. */
assert( pNew!=0 || rc!=SQLITE_OK );
- if( pNew ){
- if( nSegment==nAlloc ){
- Fts3SegReader **pArray;
- nAlloc += 16;
- pArray = (Fts3SegReader **)sqlite3_realloc(
- apSegment, nAlloc*sizeof(Fts3SegReader *)
- );
- if( !pArray ){
- sqlite3Fts3SegReaderFree(p, pNew);
- rc = SQLITE_NOMEM;
- goto finished;
- }
- apSegment = pArray;
- }
- apSegment[nSegment++] = pNew;
+ if( rc==SQLITE_OK ){
+ rc = fts3SegReaderArrayAdd(&pArray, pNew);
+ }else{
+ sqlite3Fts3SegReaderFree(p, pNew);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts3SegReaderCost(pCsr, pNew, &pArray->nCost);
}
}
- if( rc!=SQLITE_DONE ){
- assert( rc!=SQLITE_OK );
- goto finished;
+
+ if( rc==SQLITE_DONE ){
+ rc = sqlite3_reset(pStmt);
+ }else{
+ sqlite3_reset(pStmt);
}
+ if( rc!=SQLITE_OK ){
+ fts3SegReaderArrayFree(pArray);
+ pArray = 0;
+ }
+ *ppArray = pArray;
+ return rc;
+}
+
+/*
+** This function retreives the doclist for the specified term (or term
+** prefix) from the database.
+**
+** The returned doclist may be in one of two formats, depending on the
+** value of parameter isReqPos. If isReqPos is zero, then the doclist is
+** a sorted list of delta-compressed docids (a bare doclist). If isReqPos
+** is non-zero, then the returned list is in the same format as is stored
+** in the database without the found length specifier at the start of on-disk
+** doclists.
+*/
+static int fts3TermSelect(
+ Fts3Table *p, /* Virtual table handle */
+ Fts3PhraseToken *pTok, /* Token to query for */
+ int iColumn, /* Column to query (or -ve for all columns) */
+ int isReqPos, /* True to include position lists in output */
+ int *pnOut, /* OUT: Size of buffer at *ppOut */
+ char **ppOut /* OUT: Malloced result buffer */
+){
+ int rc; /* Return code */
+ Fts3SegReaderArray *pArray; /* Seg-reader array for this term */
+ TermSelect tsc; /* Context object for fts3TermSelectCb() */
+ Fts3SegFilter filter; /* Segment term filter configuration */
+ pArray = pTok->pArray;
memset(&tsc, 0, sizeof(TermSelect));
tsc.isReqPos = isReqPos;
filter.flags = FTS3_SEGMENT_IGNORE_EMPTY
- | (isPrefix ? FTS3_SEGMENT_PREFIX : 0)
+ | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
| (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
| (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
filter.iCol = iColumn;
- filter.zTerm = zTerm;
- filter.nTerm = nTerm;
+ filter.zTerm = pTok->z;
+ filter.nTerm = pTok->n;
- rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter,
- fts3TermSelectCb, (void *)&tsc
+ rc = sqlite3Fts3SegReaderIterate(p, pArray->apSegment, pArray->nSegment,
+ &filter, fts3TermSelectCb, (void *)&tsc
);
if( rc==SQLITE_OK ){
rc = fts3TermSelectMerge(&tsc);
*ppOut = tsc.aaOutput[0];
*pnOut = tsc.anOutput[0];
}else{
+ int i;
for(i=0; i<SizeofArray(tsc.aaOutput); i++){
sqlite3_free(tsc.aaOutput[i]);
}
}
-finished:
- sqlite3_reset(pStmt);
- for(i=0; i<nSegment; i++){
- sqlite3Fts3SegReaderFree(p, apSegment[i]);
+ fts3SegReaderArrayFree(pArray);
+ pTok->pArray = 0;
+ return rc;
+}
+
+static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){
+ int nDoc = 0; /* Return value */
+ if( aList ){
+ char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */
+ char *p = aList; /* Cursor */
+ sqlite3_int64 dummy; /* For Fts3GetVarint() */
+
+ while( p<aEnd ){
+ nDoc++;
+ p += sqlite3Fts3GetVarint(p, &dummy);
+ if( isPoslist ) fts3PoslistCopy(0, &p);
+ }
+ }
+
+ return nDoc;
+}
+
+/*
+** Call sqlite3Fts3DeferToken() for each token in the expression pExpr.
+*/
+static int fts3DeferExpression(Fts3Cursor *pCsr, Fts3Expr *pExpr){
+ int rc = SQLITE_OK;
+ if( pExpr ){
+ rc = fts3DeferExpression(pCsr, pExpr->pLeft);
+ if( rc==SQLITE_OK ){
+ rc = fts3DeferExpression(pCsr, pExpr->pRight);
+ }
+ if( pExpr->eType==FTSQUERY_PHRASE ){
+ int iCol = pExpr->pPhrase->iColumn;
+ int i;
+ pExpr->bDeferred = 1;
+ for(i=0; rc==SQLITE_OK && i<pExpr->pPhrase->nToken; i++){
+ Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
+ if( pToken->pDeferred==0 ){
+ rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol);
+ }
+ }
+ }
}
- sqlite3_free(apSegment);
return rc;
}
+static void fts3DoclistStripPositions(char *aList, int *pnList){
+ if( aList ){
+ char *aEnd = &aList[*pnList]; /* Pointer to one byte after EOF */
+ char *p = aList; /* Input cursor */
+ char *pOut = aList; /* Output cursor */
+ sqlite3_int64 iPrev = 0;
+
+ while( p<aEnd ){
+ sqlite3_int64 delta;
+ p += sqlite3Fts3GetVarint(p, &delta);
+ fts3PoslistCopy(0, &p);
+ pOut += sqlite3Fts3PutVarint(pOut, delta);
+ }
+
+ *pnList = (pOut - aList);
+ }
+}
/*
** Return a DocList corresponding to the phrase *pPhrase.
+**
+** If this function returns SQLITE_OK, but *pnOut is set to a negative value,
+** then no tokens in the phrase were looked up in the full-text index. This
+** is only possible when this function is called from within xFilter(). The
+** caller should assume that all documents match the phrase. The actual
+** filtering will take place in xNext().
*/
static int fts3PhraseSelect(
- Fts3Table *p, /* Virtual table handle */
+ Fts3Cursor *pCsr, /* Virtual table cursor handle */
Fts3Phrase *pPhrase, /* Phrase to return a doclist for */
int isReqPos, /* True if output should contain positions */
char **paOut, /* OUT: Pointer to malloc'd result buffer */
int ii;
int iCol = pPhrase->iColumn;
int isTermPos = (pPhrase->nToken>1 || isReqPos);
+ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+
+ int iPrevTok = 0;
+ int nDoc = 0;
+
+ /* If this is an xFilter() evaluation, create a segment-reader for each
+ ** phrase token. Or, if this is an xNest() or snippet/offsets/matchinfo
+ ** evaluation, only create segment-readers if there are no Fts3DeferredToken
+ ** objects attached to the phrase-tokens.
+ */
+ for(ii=0; ii<pPhrase->nToken; ii++){
+ Fts3PhraseToken *pTok = &pPhrase->aToken[ii];
+ if( pTok->pArray==0 && (pCsr->doDeferred==0 || pTok->pDeferred==0) ){
+ rc = fts3TermSegReaderArray(
+ pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray
+ );
+ if( rc!=SQLITE_OK ) return rc;
+ }
+ }
for(ii=0; ii<pPhrase->nToken; ii++){
- struct PhraseToken *pTok = &pPhrase->aToken[ii];
- char *z = pTok->z; /* Next token of the phrase */
- int n = pTok->n; /* Size of z in bytes */
- int isPrefix = pTok->isPrefix;/* True if token is a prefix */
+ Fts3PhraseToken *pTok; /* Token to find doclist for */
+ int iTok; /* The token being queried this iteration */
char *pList; /* Pointer to token doclist */
int nList; /* Size of buffer at pList */
- rc = fts3TermSelect(p, iCol, z, n, isPrefix, isTermPos, &nList, &pList);
+ /* Select a token to process. If this is an xFilter() call, then tokens
+ ** are processed in order from least to most costly. Otherwise, tokens
+ ** are processed in the order in which they occur in the phrase.
+ */
+ if( pCsr->doDeferred || isReqPos ){
+ iTok = ii;
+ pTok = &pPhrase->aToken[iTok];
+ }else{
+ int nMinCost = 0x7FFFFFFF;
+ int jj;
+
+ /* Find the remaining token with the lowest cost. */
+ for(jj=0; jj<pPhrase->nToken; jj++){
+ Fts3SegReaderArray *pArray = pPhrase->aToken[jj].pArray;
+ if( pArray && pArray->nCost<nMinCost ){
+ iTok = jj;
+ nMinCost = pArray->nCost;
+ }
+ }
+ pTok = &pPhrase->aToken[iTok];
+
+ /* This branch is taken if it is determined that loading the doclist
+ ** for the next token would require more IO than loading all documents
+ ** currently identified by doclist pOut/nOut. No further doclists will
+ ** be loaded from the full-text index for this phrase.
+ */
+ if( nMinCost>nDoc && ii>0 ){
+ rc = fts3DeferExpression(pCsr, pCsr->pExpr);
+ break;
+ }
+ }
+
+ if( pCsr->doDeferred && pTok->pDeferred ){
+ rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList);
+ }else{
+ assert( pTok->pArray );
+ rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList);
+ }
+ assert( rc!=SQLITE_OK || pCsr->doDeferred || pTok->pArray==0 );
if( rc!=SQLITE_OK ) break;
if( ii==0 ){
pOut = pList;
nOut = nList;
}else{
- /* Merge the new term list and the current output. If this is the
- ** last term in the phrase, and positions are not required in the
- ** output of this function, the positions can be dropped as part
- ** of this merge. Either way, the result of this merge will be
- ** smaller than nList bytes. The code in fts3DoclistMerge() is written
- ** so that it is safe to use pList as the output as well as an input
- ** in this case.
+ /* Merge the new term list and the current output. */
+ char *aLeft, *aRight;
+ int nLeft, nRight;
+ int nDist;
+ int mt;
+
+ /* If this is the final token of the phrase, and positions were not
+ ** requested by the caller, use MERGE_PHRASE instead of POS_PHRASE.
+ ** This drops the position information from the output list.
*/
- int mergetype = MERGE_POS_PHRASE;
- if( ii==pPhrase->nToken-1 && !isReqPos ){
- mergetype = MERGE_PHRASE;
+ mt = MERGE_POS_PHRASE;
+ if( ii==pPhrase->nToken-1 && !isReqPos ) mt = MERGE_PHRASE;
+
+ assert( iPrevTok!=iTok );
+ if( iPrevTok<iTok ){
+ aLeft = pOut;
+ nLeft = nOut;
+ aRight = pList;
+ nRight = nList;
+ nDist = iTok-iPrevTok;
+ }else{
+ aRight = pOut;
+ nRight = nOut;
+ aLeft = pList;
+ nLeft = nList;
+ nDist = iPrevTok-iTok;
}
- fts3DoclistMerge(mergetype, 0, 0, pList, &nOut, pOut, nOut, pList, nList);
- sqlite3_free(pOut);
- pOut = pList;
+ pOut = aRight;
+
+ fts3DoclistMerge(mt, nDist, 0, pOut, &nOut, aLeft, nLeft, aRight, nRight);
+ sqlite3_free(aLeft);
}
assert( nOut==0 || pOut!=0 );
+
+ iPrevTok = iTok;
+ nDoc = fts3DoclistCountDocids(ii<(pPhrase->nToken-1), pOut, nOut);
}
if( rc==SQLITE_OK ){
+ if( ii!=pPhrase->nToken ){
+ assert( pCsr->doDeferred==0 && isReqPos==0 );
+ fts3DoclistStripPositions(pOut, &nOut);
+ }
*paOut = pOut;
*pnOut = nOut;
}else{
return rc;
}
+typedef struct ExprAndCost ExprAndCost;
+struct ExprAndCost {
+ Fts3Expr *pExpr;
+ int nCost;
+};
+
+int fts3ExprCost(Fts3Expr *pExpr){
+ int nCost; /* Return value */
+ if( pExpr->eType==FTSQUERY_PHRASE ){
+ Fts3Phrase *pPhrase = pExpr->pPhrase;
+ int ii;
+ nCost = 0;
+ for(ii=0; ii<pPhrase->nToken; ii++){
+ nCost += pPhrase->aToken[ii].pArray->nCost;
+ }
+ }else{
+ nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight);
+ }
+ return nCost;
+}
+
+static void fts3ExprAssignCosts(
+ Fts3Expr *pExpr, /* Expression to create seg-readers for */
+ ExprAndCost **ppExprCost /* OUT: Write to *ppExprCost */
+){
+ if( pExpr->eType==FTSQUERY_AND ){
+ fts3ExprAssignCosts(pExpr->pLeft, ppExprCost);
+ fts3ExprAssignCosts(pExpr->pRight, ppExprCost);
+ }else{
+ (*ppExprCost)->pExpr = pExpr;
+ (*ppExprCost)->nCost = fts3ExprCost(pExpr);;
+ (*ppExprCost)++;
+ }
+}
+
+static int fts3ExprAllocateSegReaders(
+ Fts3Cursor *pCsr, /* FTS3 table */
+ Fts3Expr *pExpr, /* Expression to create seg-readers for */
+ int *pnExpr /* OUT: Number of AND'd expressions */
+){
+ int rc = SQLITE_OK; /* Return code */
+
+ if( pCsr->doDeferred ) return SQLITE_OK;
+ if( pnExpr && pExpr->eType!=FTSQUERY_AND ){
+ (*pnExpr)++;
+ pnExpr = 0;
+ }
+
+ if( pExpr->eType==FTSQUERY_PHRASE ){
+ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+ Fts3Phrase *pPhrase = pExpr->pPhrase;
+ int ii;
+
+ for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){
+ Fts3PhraseToken *pTok = &pPhrase->aToken[ii];
+ if( pTok->pArray==0 ){
+ rc = fts3TermSegReaderArray(
+ pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray
+ );
+ }
+ }
+ }else{
+ rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pLeft, pnExpr);
+ if( rc==SQLITE_OK ){
+ rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pRight, pnExpr);
+ }
+ }
+ return rc;
+}
+
+static void fts3ExprFreeSegReaders(Fts3Expr *pExpr){
+ if( pExpr ){
+ Fts3Phrase *pPhrase = pExpr->pPhrase;
+ if( pPhrase ){
+ int kk;
+ for(kk=0; kk<pPhrase->nToken; kk++){
+ fts3SegReaderArrayFree(pPhrase->aToken[kk].pArray);
+ pPhrase->aToken[kk].pArray = 0;
+ }
+ }
+ fts3ExprFreeSegReaders(pExpr->pLeft);
+ fts3ExprFreeSegReaders(pExpr->pRight);
+ }
+}
+
/*
** Evaluate the full-text expression pExpr against fts3 table pTab. Store
** the resulting doclist in *paOut and *pnOut. This routine mallocs for
** the space needed to store the output. The caller is responsible for
** freeing the space when it has finished.
*/
-static int evalFts3Expr(
- Fts3Table *p, /* Virtual table handle */
+static int fts3EvalExpr(
+ Fts3Cursor *p, /* Virtual table cursor handle */
Fts3Expr *pExpr, /* Parsed fts3 expression */
char **paOut, /* OUT: Pointer to malloc'd result buffer */
int *pnOut, /* OUT: Size of buffer at *paOut */
*pnOut = 0;
if( pExpr ){
+ assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR
+ || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT
+ || pExpr->eType==FTSQUERY_PHRASE
+ );
assert( pExpr->eType==FTSQUERY_PHRASE
|| pExpr->eType==FTSQUERY_NEAR
|| isReqPos==0
);
+
if( pExpr->eType==FTSQUERY_PHRASE ){
- rc = fts3PhraseSelect(p, pExpr->pPhrase,
+ rc = fts3PhraseSelect(p, pExpr->pPhrase,
isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR),
paOut, pnOut
);
+ fts3ExprFreeSegReaders(pExpr);
+ }else if( p->doDeferred==0 && pExpr->eType==FTSQUERY_AND ){
+ ExprAndCost *aExpr = 0; /* Array of AND'd expressions and costs */
+ int nExpr = 0; /* Size of aExpr[] */
+ char *aRet = 0; /* Doclist to return to caller */
+ int nRet = 0; /* Length of aRet[] in bytes */
+ int nDoc = 0x7FFFFFFF;
+
+ assert( !isReqPos );
+
+ rc = fts3ExprAllocateSegReaders(p, pExpr, &nExpr);
+ if( rc==SQLITE_OK ){
+ aExpr = sqlite3_malloc(sizeof(ExprAndCost) * nExpr);
+ if( !aExpr ) rc = SQLITE_NOMEM;
+ }
+ if( rc==SQLITE_OK ){
+ int ii; /* Used to iterate through expressions */
+
+ fts3ExprAssignCosts(pExpr, &aExpr);
+ aExpr -= nExpr;
+ for(ii=0; ii<nExpr; ii++){
+ char *aNew;
+ int nNew;
+ int jj;
+ ExprAndCost *pBest = 0;
+
+ for(jj=0; jj<nExpr; jj++){
+ ExprAndCost *pCand = &aExpr[jj];
+ if( pCand->pExpr && (pBest==0 || pCand->nCost<pBest->nCost) ){
+ pBest = pCand;
+ }
+ }
+
+ if( pBest->nCost>nDoc ){
+ rc = fts3DeferExpression(p, p->pExpr);
+ break;
+ }else{
+ rc = fts3EvalExpr(p, pBest->pExpr, &aNew, &nNew, 0);
+ if( rc!=SQLITE_OK ) break;
+ pBest->pExpr = 0;
+ if( ii==0 ){
+ aRet = aNew;
+ nRet = nNew;
+ }else{
+ fts3DoclistMerge(
+ MERGE_AND, 0, 0, aRet, &nRet, aRet, nRet, aNew, nNew
+ );
+ sqlite3_free(aNew);
+ }
+ nDoc = fts3DoclistCountDocids(0, aRet, nRet);
+ }
+ }
+ }
+
+ *paOut = aRet;
+ *pnOut = nRet;
+ sqlite3_free(aExpr);
+ fts3ExprFreeSegReaders(pExpr);
+
}else{
char *aLeft;
char *aRight;
int nLeft;
int nRight;
- if( 0==(rc = evalFts3Expr(p, pExpr->pRight, &aRight, &nRight, isReqPos))
- && 0==(rc = evalFts3Expr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos))
+ assert( pExpr->eType==FTSQUERY_NEAR
+ || pExpr->eType==FTSQUERY_OR
+ || pExpr->eType==FTSQUERY_NOT
+ || (pExpr->eType==FTSQUERY_AND && p->doDeferred)
+ );
+
+ if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos))
+ && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos))
){
- assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR
- || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT
- );
switch( pExpr->eType ){
case FTSQUERY_NEAR: {
Fts3Expr *pLeft;
return rc;
}
+/*
+**
+*/
+static int fts3EvalDeferred(Fts3Cursor *pCsr, int *pbRes){
+ int rc = SQLITE_OK;
+ if( pCsr->pDeferred==0 ){
+ *pbRes = 1;
+ }else{
+ rc = fts3CursorSeek(0, pCsr);
+ if( rc==SQLITE_OK ){
+ sqlite3Fts3FreeDeferredDoclists(pCsr);
+ rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
+ }
+ if( rc==SQLITE_OK ){
+ char *a = 0;
+ int n = 0;
+ pCsr->doDeferred = 1;
+ rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0);
+ pCsr->doDeferred = 0;
+ assert( n>=0 );
+ *pbRes = (n>0);
+ sqlite3_free(a);
+ }
+ }
+ return rc;
+}
+
+/*
+** Advance the cursor to the next row in the %_content table that
+** matches the search criteria. For a MATCH search, this will be
+** the next row that matches. For a full-table scan, this will be
+** simply the next row in the %_content table. For a docid lookup,
+** this routine simply sets the EOF flag.
+**
+** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
+** even if we reach end-of-file. The fts3EofMethod() will be called
+** subsequently to determine whether or not an EOF was hit.
+*/
+static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
+ int res;
+ int rc = SQLITE_OK; /* Return code */
+ Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
+
+ do {
+ if( pCsr->aDoclist==0 ){
+ if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
+ pCsr->isEof = 1;
+ rc = sqlite3_reset(pCsr->pStmt);
+ break;
+ }
+ pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
+ }else{
+ if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){
+ pCsr->isEof = 1;
+ break;
+ }
+ sqlite3_reset(pCsr->pStmt);
+ fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId);
+ pCsr->isRequireSeek = 1;
+ pCsr->isMatchinfoNeeded = 1;
+ }
+ }while( SQLITE_OK==(rc = fts3EvalDeferred(pCsr, &res)) && res==0 );
+
+ return rc;
+}
+
+
/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
sqlite3Fts3ExprFree(pCsr->pExpr);
memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
- /* Compile a SELECT statement for this cursor. For a full-table-scan, the
- ** statement loops through all rows of the %_content table. For a
- ** full-text query or docid lookup, the statement retrieves a single
- ** row by docid.
- */
- zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName);
- if( !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
- sqlite3_free(zSql);
- }
- if( rc!=SQLITE_OK ) return rc;
- pCsr->eSearch = (i16)idxNum;
-
- if( idxNum==FTS3_DOCID_SEARCH ){
- rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
- }else if( idxNum!=FTS3_FULLSCAN_SEARCH ){
+ if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){
int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);
rc = sqlite3Fts3ReadLock(p);
if( rc!=SQLITE_OK ) return rc;
- rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0);
+ rc = fts3EvalExpr(pCsr, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0);
+ if( rc!=SQLITE_OK ) return rc;
pCsr->pNextId = pCsr->aDoclist;
pCsr->iPrevId = 0;
+ if( pCsr->nDoclist<0 ){
+ assert( pCsr->aDoclist==0 );
+ idxNum = FTS3_FULLSCAN_SEARCH;
+ }
+ }
+
+ /* Compile a SELECT statement for this cursor. For a full-table-scan, the
+ ** statement loops through all rows of the %_content table. For a
+ ** full-text query or docid lookup, the statement retrieves a single
+ ** row by docid.
+ */
+ zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName);
+ if( !zSql ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
+ sqlite3_free(zSql);
}
+ if( rc==SQLITE_OK && idxNum==FTS3_DOCID_SEARCH ){
+ rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
+ }
+ pCsr->eSearch = (i16)idxNum;
if( rc!=SQLITE_OK ) return rc;
return fts3NextMethod(pCursor);
** This is used by the matchinfo(), snippet() and offsets() auxillary
** functions.
*/
-int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){
- return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);
+int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){
+ int rc;
+ pCsr->doDeferred = 1;
+ rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);
+ pCsr->doDeferred = 0;
+ return rc;
}
/*
typedef struct Fts3Cursor Fts3Cursor;
typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;
-typedef struct Fts3SegReader Fts3SegReader;
+typedef struct Fts3PhraseToken Fts3PhraseToken;
+
typedef struct Fts3SegFilter Fts3SegFilter;
+typedef struct Fts3DeferredToken Fts3DeferredToken;
+typedef struct Fts3SegReader Fts3SegReader;
+typedef struct Fts3SegReaderArray Fts3SegReaderArray;
/*
** A connection to a fulltext index is an instance of the following
*/
sqlite3_stmt *aStmt[25];
- /* Pointer to string containing the SQL:
- **
- ** "SELECT block FROM %_segments WHERE blockid BETWEEN ? AND ?
- ** ORDER BY blockid"
- */
- char *zSelectLeaves;
- int nLeavesStmt; /* Valid statements in aLeavesStmt */
- int nLeavesTotal; /* Total number of prepared leaves stmts */
- int nLeavesAlloc; /* Allocated size of aLeavesStmt */
- sqlite3_stmt **aLeavesStmt; /* Array of prepared zSelectLeaves stmts */
-
+ char *zSegmentsTbl; /* Name of %_segments table */
+ int nPgsz; /* Page size for host database */
int nNodeSize; /* Soft limit for node size */
u8 bHasContent; /* True if %_content table exists */
u8 bHasDocsize; /* True if %_docsize table exists */
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
Fts3Expr *pExpr; /* Parsed MATCH query string */
+ Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
char *pNextId; /* Pointer into the body of aDoclist */
char *aDoclist; /* List of docids for full-text queries */
int nDoclist; /* Size of buffer at aDoclist */
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
u32 *aMatchinfo; /* Information about most recent match */
+
+ int doDeferred;
+ int nRowAvg; /* Average size of database rows, in pages */
};
/*
/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence. A single token is the base case and the most common case.
-** For a sequence of tokens contained in "...", nToken will be the number
-** of tokens in the string.
+** For a sequence of tokens contained in double-quotes (i.e. "one two three")
+** nToken will be the number of tokens in the string.
*/
+
+struct Fts3PhraseToken {
+ char *z; /* Text of the token */
+ int n; /* Number of bytes in buffer z */
+ int isPrefix; /* True if token ends with a "*" character */
+ Fts3SegReaderArray *pArray;
+ Fts3DeferredToken *pDeferred;
+};
+
struct Fts3Phrase {
int nToken; /* Number of tokens in the phrase */
int iColumn; /* Index of column this phrase must match */
int isNot; /* Phrase prefixed by unary not (-) operator */
- struct PhraseToken {
- char *z; /* Text of the token */
- int n; /* Number of bytes in buffer pointed to by z */
- int isPrefix; /* True if token ends in with a "*" character */
- } aToken[1]; /* One entry for each token in the phrase */
+ Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
};
/*
Fts3Expr *pRight; /* Right operand */
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
+ int bDeferred;
+
int isLoaded; /* True if aDoclist/nDoclist are initialized. */
char *aDoclist; /* Buffer containing doclist */
int nDoclist; /* Size of aDoclist in bytes */
int sqlite3Fts3MatchinfoDocsizeGlobal(Fts3Cursor*, u32*);
int sqlite3Fts3ReadLock(Fts3Table *);
+void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
+int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
+int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
+void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
+char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *);
+
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
void sqlite3Fts3Dequote(char *);
char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int);
-int sqlite3Fts3ExprLoadDoclist(Fts3Table *, Fts3Expr *);
+int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *);
int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);
/* fts3_tokenizer.c */
rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
if( rc==SQLITE_OK ){
int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
- p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken));
+ p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken));
zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
if( !p || !zTemp ){
goto no_mem;
p->pPhrase = (Fts3Phrase *)&p[1];
p->pPhrase->nToken = ii+1;
p->pPhrase->aToken[ii].n = nToken;
+ p->pPhrase->aToken[ii].pDeferred = 0;
+ p->pPhrase->aToken[ii].pArray = 0;
memcpy(&zTemp[nTemp], zToken, nToken);
nTemp += nToken;
if( iEnd<nInput && zInput[iEnd]=='*' ){
char *zNew = NULL;
int nNew = 0;
int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
- nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(struct PhraseToken);
+ nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken);
p = fts3ReallocOrFree(p, nByte + nTemp);
if( !p ){
goto no_mem;
*/
typedef struct LoadDoclistCtx LoadDoclistCtx;
struct LoadDoclistCtx {
- Fts3Table *pTab; /* FTS3 Table */
+ Fts3Cursor *pCsr; /* FTS3 Cursor */
int nPhrase; /* Number of phrases seen so far */
int nToken; /* Number of tokens seen so far */
};
p->nToken += pExpr->pPhrase->nToken;
if( pExpr->isLoaded==0 ){
- rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
+ rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr);
pExpr->isLoaded = 1;
if( rc==SQLITE_OK ){
rc = fts3ExprNearTrim(pExpr);
){
int rc; /* Return Code */
LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
- sCtx.pTab = (Fts3Table *)pCsr->base.pVtab;
+ sCtx.pCsr = pCsr;
rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx);
if( rc==SQLITE_OK ){
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0);
sqlite3_int64 iLastPos;
};
+
+/*
+** Each cursor has a (possibly empty) linked list of the following objects.
+*/
+struct Fts3DeferredToken {
+ Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */
+ int iCol; /* Column token must occur in */
+ Fts3DeferredToken *pNext; /* Next in list of deferred tokens */
+ PendingList *pList; /* Doclist is assembled here */
+};
+
/*
** An instance of this structure is used to iterate through the terms on
** a contiguous set of segment b-tree leaf nodes. Although the details of
**
** sqlite3Fts3SegReaderNew()
** sqlite3Fts3SegReaderFree()
+** sqlite3Fts3SegReaderCost()
** sqlite3Fts3SegReaderIterate()
**
** Methods used to manipulate Fts3SegReader structures:
*/
struct Fts3SegReader {
int iIdx; /* Index within level, or 0x7FFFFFFF for PT */
- sqlite3_int64 iStartBlock;
- sqlite3_int64 iEndBlock;
- sqlite3_stmt *pStmt; /* SQL Statement to access leaf nodes */
+
+ sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */
+ sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */
+ sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */
+ sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */
+ sqlite3_blob *pBlob; /* Blob open on iStartBlock */
+
char *aNode; /* Pointer to node data (or NULL) */
int nNode; /* Size of buffer at aNode (or 0) */
int nTermAlloc; /* Allocated size of zTerm buffer */
};
#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)
+#define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1])
/*
** An instance of this structure is used to create a segment b-tree in the
** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
*/
static int fts3PendingTermsAdd(
- Fts3Table *p, /* FTS table into which text will be inserted */
- const char *zText, /* Text of document to be inseted */
- int iCol, /* Column number into which text is inserted */
- u32 *pnWord /* OUT: Number of tokens inserted */
+ Fts3Table *p, /* Table into which text will be inserted */
+ const char *zText, /* Text of document to be inserted */
+ int iCol, /* Column into which text is being inserted */
+ u32 *pnWord /* OUT: Number of tokens inserted */
){
int rc;
int iStart;
return rc;
}
+/*
+** The %_segments table is declared as follows:
+**
+** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB)
+**
+** This function opens a read-only blob handle on the "block" column of
+** row iSegment of the %_segments table associated with FTS3 table p.
+**
+** If all goes well, SQLITE_OK is returned and *ppBlob set to the
+** read-only blob handle. It is the responsibility of the caller to call
+** sqlite3_blob_close() on the blob handle. Or, if an error occurs, an
+** SQLite error code is returned and *ppBlob is either not modified or
+** set to 0.
+*/
+static int fts3OpenSegmentsBlob(
+ Fts3Table *p, /* FTS3 table handle */
+ sqlite3_int64 iSegment, /* Rowid in %_segments table */
+ sqlite3_blob **ppBlob /* OUT: Read-only blob handle */
+){
+ if( 0==p->zSegmentsTbl
+ && 0==(p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName))
+ ) {
+ return SQLITE_NOMEM;
+ }
+ return sqlite3_blob_open(
+ p->db, p->zDb, p->zSegmentsTbl, "block", iSegment, 0, ppBlob
+ );
+}
+
+
/*
** Move the iterator passed as the first argument to the next term in the
** segment. If successful, SQLITE_OK is returned. If there is no next term,
** SQLITE_DONE. Otherwise, an SQLite error code.
*/
-static int fts3SegReaderNext(Fts3SegReader *pReader){
+static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){
char *pNext; /* Cursor variable */
int nPrefix; /* Number of bytes in term prefix */
int nSuffix; /* Number of bytes in term suffix */
}
if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
+ sqlite3_blob *pBlob;
int rc;
+
if( fts3SegReaderIsPending(pReader) ){
Fts3HashElem *pElem = *(pReader->ppNextElem);
if( pElem==0 ){
}
return SQLITE_OK;
}
- if( !pReader->pStmt ){
- pReader->aNode = 0;
+
+ if( !fts3SegReaderIsRootOnly(pReader) ){
+ sqlite3_free(pReader->aNode);
+ }
+ pReader->aNode = 0;
+
+ /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf
+ ** blocks have already been traversed. */
+ if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){
return SQLITE_OK;
}
- rc = sqlite3_step(pReader->pStmt);
- if( rc!=SQLITE_ROW ){
- pReader->aNode = 0;
- return (rc==SQLITE_DONE ? SQLITE_OK : rc);
+
+ rc = fts3OpenSegmentsBlob(p, ++pReader->iCurrentBlock, &pBlob);
+ if( rc==SQLITE_OK ){
+ pReader->nNode = sqlite3_blob_bytes(pBlob);
+ pReader->aNode = (char *)sqlite3_malloc(pReader->nNode);
+ if( pReader->aNode ){
+ rc = sqlite3_blob_read(pBlob, pReader->aNode, pReader->nNode, 0);
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ sqlite3_blob_close(pBlob);
+ }
+
+ if( rc!=SQLITE_OK ){
+ return rc;
}
- pReader->nNode = sqlite3_column_bytes(pReader->pStmt, 0);
- pReader->aNode = (char *)sqlite3_column_blob(pReader->pStmt, 0);
pNext = pReader->aNode;
}
}
}
+/*
+** This function is called to estimate the amount of data that will be
+** loaded from the disk If SegReaderIterate() is called on this seg-reader,
+** in units of average document size.
+**
+** This can be used as follows: If the caller has a small doclist that
+** contains references to N documents, and is considering merging it with
+** a large doclist (size X "average documents"), it may opt not to load
+** the large doclist if X>N.
+*/
+int sqlite3Fts3SegReaderCost(
+ Fts3Cursor *pCsr, /* FTS3 cursor handle */
+ Fts3SegReader *pReader, /* Segment-reader handle */
+ int *pnCost /* IN/OUT: Number of bytes read */
+){
+ Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
+ int rc = SQLITE_OK; /* Return code */
+ int nCost = 0; /* Cost in bytes to return */
+ sqlite3_int64 iLeaf; /* Used to iterate through required leaves */
+ int pgsz = p->nPgsz; /* Database page size */
+
+ /* If this seg-reader is reading the pending-terms table, or if all data
+ ** for the segment is stored on the root page of the b-tree, then the cost
+ ** is zero. In this case all required data is already in main memory.
+ */
+ if( p->bHasDocsize
+ && !fts3SegReaderIsPending(pReader)
+ && !fts3SegReaderIsRootOnly(pReader)
+ ){
+ sqlite3_blob *pBlob = 0;
+
+ if( pCsr->nRowAvg==0 ){
+ /* The average document size, which is required to calculate the cost
+ ** of each doclist, has not yet been determined. Read the required
+ ** data from the %_stat table to calculate it.
+ **
+ ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
+ ** varints, where nCol is the number of columns in the FTS3 table.
+ ** The first varint is the number of documents currently stored in
+ ** the table. The following nCol varints contain the total amount of
+ ** data stored in all rows of each column of the table, from left
+ ** to right.
+ */
+ sqlite3_stmt *pStmt;
+ rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
+ if( rc ) return rc;
+ if( sqlite3_step(pStmt)==SQLITE_ROW ){
+ sqlite3_int64 nDoc = 0;
+ sqlite3_int64 nByte = 0;
+ const char *a = sqlite3_column_blob(pStmt, 0);
+ if( a ){
+ const char *pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
+ a += sqlite3Fts3GetVarint(a, &nDoc);
+ while( a<pEnd ){
+ sqlite3_int64 nVarint;
+ a += sqlite3Fts3GetVarint(a, &nVarint);
+ nByte += nVarint;
+ }
+ }
+
+ pCsr->nRowAvg = (((nByte / nDoc) + pgsz - 1) / pgsz);
+ }
+ rc = sqlite3_reset(pStmt);
+ if( rc!=SQLITE_OK || pCsr->nRowAvg==0 ) return rc;
+ }
+
+ rc = fts3OpenSegmentsBlob(p, pReader->iStartBlock, &pBlob);
+ if( rc==SQLITE_OK ){
+ /* Assume that a blob flows over onto overflow pages if it is larger
+ ** than (pgsz-35) bytes in size (the file-format documentation
+ ** confirms this).
+ */
+ int nBlob = sqlite3_blob_bytes(pBlob);
+ if( (nBlob+35)>pgsz ){
+ int nOvfl = (nBlob + 34)/pgsz;
+ nCost += ((nOvfl + pCsr->nRowAvg - 1)/pCsr->nRowAvg);
+ }
+ }
+ assert( rc==SQLITE_OK || pBlob==0 );
+ sqlite3_blob_close(pBlob);
+ }
+
+ *pnCost += nCost;
+ return rc;
+}
+
/*
** Free all allocations associated with the iterator passed as the
** second argument.
*/
void sqlite3Fts3SegReaderFree(Fts3Table *p, Fts3SegReader *pReader){
- if( pReader ){
- if( pReader->pStmt ){
- /* Move the leaf-range SELECT statement to the aLeavesStmt[] array,
- ** so that it can be reused when required by another query.
- */
- assert( p->nLeavesStmt<p->nLeavesTotal );
- sqlite3_reset(pReader->pStmt);
- p->aLeavesStmt[p->nLeavesStmt++] = pReader->pStmt;
- }
- if( !fts3SegReaderIsPending(pReader) ){
- sqlite3_free(pReader->zTerm);
+ if( pReader && !fts3SegReaderIsPending(pReader) ){
+ sqlite3_free(pReader->zTerm);
+ if( !fts3SegReaderIsRootOnly(pReader) ){
+ sqlite3_free(pReader->aNode);
}
- sqlite3_free(pReader);
}
+ sqlite3_free(pReader);
}
/*
return SQLITE_NOMEM;
}
memset(pReader, 0, sizeof(Fts3SegReader));
- pReader->iStartBlock = iStartLeaf;
pReader->iIdx = iAge;
+ pReader->iStartBlock = iStartLeaf;
+ pReader->iLeafEndBlock = iEndLeaf;
pReader->iEndBlock = iEndBlock;
if( nExtra ){
pReader->nNode = nRoot;
memcpy(pReader->aNode, zRoot, nRoot);
}else{
- /* If the text of the SQL statement to iterate through a contiguous
- ** set of entries in the %_segments table has not yet been composed,
- ** compose it now.
- */
- if( !p->zSelectLeaves ){
- p->zSelectLeaves = sqlite3_mprintf(
- "SELECT block FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ? "
- "ORDER BY blockid", p->zDb, p->zName
- );
- if( !p->zSelectLeaves ){
- rc = SQLITE_NOMEM;
- goto finished;
- }
- }
-
- /* If there are no free statements in the aLeavesStmt[] array, prepare
- ** a new statement now. Otherwise, reuse a prepared statement from
- ** aLeavesStmt[].
- */
- if( p->nLeavesStmt==0 ){
- if( p->nLeavesTotal==p->nLeavesAlloc ){
- int nNew = p->nLeavesAlloc + 16;
- sqlite3_stmt **aNew = (sqlite3_stmt **)sqlite3_realloc(
- p->aLeavesStmt, nNew*sizeof(sqlite3_stmt *)
- );
- if( !aNew ){
- rc = SQLITE_NOMEM;
- goto finished;
- }
- p->nLeavesAlloc = nNew;
- p->aLeavesStmt = aNew;
- }
- rc = sqlite3_prepare_v2(p->db, p->zSelectLeaves, -1, &pReader->pStmt, 0);
- if( rc!=SQLITE_OK ){
- goto finished;
- }
- p->nLeavesTotal++;
- }else{
- pReader->pStmt = p->aLeavesStmt[--p->nLeavesStmt];
- }
-
- /* Bind the start and end leaf blockids to the prepared SQL statement. */
- sqlite3_bind_int64(pReader->pStmt, 1, iStartLeaf);
- sqlite3_bind_int64(pReader->pStmt, 2, iEndLeaf);
+ pReader->iCurrentBlock = iStartLeaf-1;
}
- rc = fts3SegReaderNext(pReader);
+ rc = fts3SegReaderNext(p, pReader);
finished:
if( rc==SQLITE_OK ){
pReader->iIdx = 0x7FFFFFFF;
pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
- fts3SegReaderNext(pReader);
+ fts3SegReaderNext(p, pReader);
}
}
for(i=0; i<nSegment; i++){
Fts3SegReader *pSeg = apSegment[i];
while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){
- rc = fts3SegReaderNext(pSeg);
+ rc = fts3SegReaderNext(p, pSeg);
if( rc!=SQLITE_OK ) goto finished; }
}
}
}
for(i=0; i<nMerge; i++){
- rc = fts3SegReaderNext(apSegment[i]);
+ rc = fts3SegReaderNext(p, apSegment[i]);
if( rc!=SQLITE_OK ) goto finished;
}
fts3SegReaderSort(apSegment, nSegment, nMerge, fts3SegReaderCmp);
return rc;
}
+/*
+** Return the deferred doclist associated with deferred token pDeferred.
+** This function assumes that sqlite3Fts3CacheDeferredDoclists() has already
+** been called to allocate and populate the doclist.
+*/
+char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *pDeferred, int *pnByte){
+ if( pDeferred->pList ){
+ *pnByte = pDeferred->pList->nData;
+ return pDeferred->pList->aData;
+ }
+ *pnByte = 0;
+ return 0;
+}
+
+/*
+** Helper fucntion for FreeDeferredDoclists(). This function removes all
+** references to deferred doclists from within the tree of Fts3Expr
+** structures headed by
+*/
+static void fts3DeferredDoclistClear(Fts3Expr *pExpr){
+ if( pExpr ){
+ fts3DeferredDoclistClear(pExpr->pLeft);
+ fts3DeferredDoclistClear(pExpr->pRight);
+ if( pExpr->bDeferred && pExpr->isLoaded ){
+ sqlite3_free(pExpr->aDoclist);
+ pExpr->isLoaded = 0;
+ pExpr->aDoclist = 0;
+ pExpr->nDoclist = 0;
+ pExpr->pCurrent = 0;
+ pExpr->iCurrent = 0;
+ }
+ }
+}
+
+/*
+** Delete all cached deferred doclists. Deferred doclists are cached
+** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
+*/
+void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
+ Fts3DeferredToken *pDef;
+ for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
+ sqlite3_free(pDef->pList);
+ pDef->pList = 0;
+ }
+ fts3DeferredDoclistClear(pCsr->pExpr);
+}
+
+/*
+** Free all entries in the pCsr->pDeffered list. Entries are added to
+** this list using sqlite3Fts3DeferToken().
+*/
+void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){
+ Fts3DeferredToken *pDef;
+ Fts3DeferredToken *pNext;
+ for(pDef=pCsr->pDeferred; pDef; pDef=pNext){
+ pNext = pDef->pNext;
+ sqlite3_free(pDef->pList);
+ sqlite3_free(pDef);
+ }
+ pCsr->pDeferred = 0;
+}
+
+/*
+** Generate deferred-doclists for all tokens in the pCsr->pDeferred list
+** based on the row that pCsr currently points to.
+**
+** A deferred-doclist is like any other doclist with position information
+** included, except that it only contains entries for a single row of the
+** table, not for all rows.
+*/
+int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
+ int rc = SQLITE_OK; /* Return code */
+ if( pCsr->pDeferred ){
+ int i; /* Used to iterate through table columns */
+ sqlite3_int64 iDocid; /* Docid of the row pCsr points to */
+ Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */
+
+ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+ sqlite3_tokenizer *pT = p->pTokenizer;
+ sqlite3_tokenizer_module const *pModule = pT->pModule;
+
+ assert( pCsr->isRequireSeek==0 );
+ iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
+
+ for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
+ const char *zText = sqlite3_column_text(pCsr->pStmt, i+1);
+ sqlite3_tokenizer_cursor *pTC = 0;
+
+ rc = pModule->xOpen(pT, zText, -1, &pTC);
+ while( rc==SQLITE_OK ){
+ char const *zToken; /* Buffer containing token */
+ int nToken; /* Number of bytes in token */
+ int iDum1, iDum2; /* Dummy variables */
+ int iPos; /* Position of token in zText */
+
+ pTC->pTokenizer = pT;
+ rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
+ for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
+ Fts3PhraseToken *pPT = pDef->pToken;
+ if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
+ && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
+ && (0==memcmp(zToken, pPT->z, pPT->n))
+ ){
+ fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
+ }
+ }
+ }
+ if( pTC ) pModule->xClose(pTC);
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+ }
+
+ for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
+ if( pDef->pList ){
+ rc = fts3PendingListAppendVarint(&pDef->pList, 0);
+ }
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Add an entry for token pToken to the pCsr->pDeferred list.
+*/
+int sqlite3Fts3DeferToken(
+ Fts3Cursor *pCsr, /* Fts3 table cursor */
+ Fts3PhraseToken *pToken, /* Token to defer */
+ int iCol /* Column that token must appear in (or -1) */
+){
+ Fts3DeferredToken *pDeferred;
+ pDeferred = sqlite3_malloc(sizeof(*pDeferred));
+ if( !pDeferred ){
+ return SQLITE_NOMEM;
+ }
+ memset(pDeferred, 0, sizeof(*pDeferred));
+ pDeferred->pToken = pToken;
+ pDeferred->pNext = pCsr->pDeferred;
+ pDeferred->iCol = iCol;
+ pCsr->pDeferred = pDeferred;
+
+ assert( pToken->pDeferred==0 );
+ pToken->pDeferred = pDeferred;
+
+ return SQLITE_OK;
+}
+
+
/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables.
--- /dev/null
+
+
+#--------------------------------------------------------------------------
+# This script contains several sub-programs used to test FTS3/FTS4
+# performance. It does not run the queries directly, but generates SQL
+# scripts that can be run using the shell tool.
+#
+# The following cases are tested:
+#
+# 1. Inserting documents into an FTS3 table.
+# 2. Optimizing an FTS3 table (i.e. "INSERT INTO t1 VALUES('optimize')").
+# 3. Deleting documents from an FTS3 table.
+# 4. Querying FTS3 tables.
+#
+
+# Number of tokens in vocabulary. And number of tokens in each document.
+#
+set VOCAB_SIZE 2000
+set DOC_SIZE 100
+
+set NUM_INSERTS 1000
+set NUM_SELECTS 1000
+
+# Force everything in this script to be deterministic.
+#
+expr {srand(0)}
+
+proc usage {} {
+ puts stderr "Usage: $::argv0 <rows> <selects>"
+ exit -1
+}
+
+proc sql {sql} {
+ puts $::fd $sql
+}
+
+
+# Return a list of $nWord randomly generated tokens each between 2 and 10
+# characters in length.
+#
+proc build_vocab {nWord} {
+ set ret [list]
+ set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
+ for {set i 0} {$i<$nWord} {incr i} {
+ set len [expr {int((rand()*9.0)+2)}]
+ set term ""
+ for {set j 0} {$j<$len} {incr j} {
+ append term [lindex $chars [expr {int(rand()*[llength $chars])}]]
+ }
+ lappend ret $term
+ }
+ set ret
+}
+
+proc select_term {} {
+ set n [llength $::vocab]
+ set t [expr int(rand()*$n*3)]
+ if {$t>=2*$n} { set t [expr {($t-2*$n)/100}] }
+ if {$t>=$n} { set t [expr {($t-$n)/10}] }
+ lindex $::vocab $t
+}
+
+proc select_doc {nTerm} {
+ set ret [list]
+ for {set i 0} {$i<$nTerm} {incr i} {
+ lappend ret [select_term]
+ }
+ set ret
+}
+
+proc test_1 {nInsert} {
+ sql "PRAGMA synchronous = OFF;"
+ sql "DROP TABLE IF EXISTS t1;"
+ sql "CREATE VIRTUAL TABLE t1 USING fts4;"
+ for {set i 0} {$i < $nInsert} {incr i} {
+ set doc [select_doc $::DOC_SIZE]
+ #sql "INSERT INTO t1 VALUES('$doc');"
+ sql "\"$doc\""
+ }
+}
+
+proc test_2 {} {
+ sql "INSERT INTO t1(t1) VALUES('optimize');"
+}
+
+proc test_3 {nSelect} {
+ for {set i 0} {$i < $nSelect} {incr i} {
+ sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term]';"
+ }
+}
+
+proc test_4 {nSelect} {
+ for {set i 0} {$i < $nSelect} {incr i} {
+ sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term] [select_term]';"
+ }
+}
+
+if {[llength $argv]!=0} usage
+
+set ::vocab [build_vocab $::VOCAB_SIZE]
+
+set ::fd [open fts3speed_insert.sql w]
+test_1 $NUM_INSERTS
+close $::fd
+
+set ::fd [open fts3speed_select.sql w]
+test_3 $NUM_SELECTS
+close $::fd
+
+set ::fd [open fts3speed_select2.sql w]
+test_4 $NUM_SELECTS
+close $::fd
+
+set ::fd [open fts3speed_optimize.sql w]
+test_2
+close $::fd
+
+puts "Success. Created files:"
+puts " fts3speed_insert.sql"
+puts " fts3speed_select.sql"
+puts " fts3speed_select2.sql"
+puts " fts3speed_optimize.sql"
+
------BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA1
-
-C Avoid\staking\slocks\son\sunused\sdatabase\sconnections\swhen\scommitting\sa\nread\stransaction.
-D 2010-10-14T01:17:30
+C Experimental\schanges\sto\sfts4\sto\stry\sto\sselectively\savoid\sloading\svery\slarge\sdoclists.
+D 2010-10-19T14:08:00
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b01fdfcfecf8a0716c29867a67959f6148b79961
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c 03be86c59ac1a60d448c6eda460a8975ff2f170d
+F ext/fts3/fts3.c 9d4ccf3b7bbfbeeef03dba91377c4d72b757dcb9
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
-F ext/fts3/fts3Int.h b4f0b05ccafe1e5b4be2f052e9840dbd78a0395f
-F ext/fts3/fts3_expr.c 42d5697731cd30fbeabd081bb3e6d3df5531f606
+F ext/fts3/fts3Int.h a640e4fbdb2fcab1457f87993ca3f4ceaa31e776
+F ext/fts3/fts3_expr.c a5aee50edde20e5c9116199bd58be869a3a22c9f
F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba
-F ext/fts3/fts3_snippet.c 2c4c921155e4b6befd272041fb903d999ac07d30
+F ext/fts3/fts3_snippet.c 474c11e718610cade73e6009f75ffc173d4c42c5
F ext/fts3/fts3_tokenizer.c b4f2d01c24573852755bc92864816785dae39318
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
-F ext/fts3/fts3_write.c 97a583b9e1d23d5af4278f3ee3c16a37c3e077f4
+F ext/fts3/fts3_write.c 29b63a98de55d4eb34b7fc6fd90b3224d6cdc7ff
+F ext/fts3/fts3speed.tcl 71b9cdc8f499822124a9eef42003e31a88f26f16
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
F ext/icu/icu.c 850e9a36567bbcce6bd85a4b68243cad8e3c2de2
F test/fts3ae.test ce32a13b34b0260928e4213b4481acf801533bda
F test/fts3af.test d394978c534eabf22dd0837e718b913fd66b499c
F test/fts3ag.test 0b7d303f61ae5d620c4efb5e825713ea34ff9441
-F test/fts3ah.test ba181d6a3dee0c929f0d69df67cac9c47cda6bff
+F test/fts3ah.test 3c5a1bd49979d7b5b5ed9fdbcdd14a7bfe5a5ff9
F test/fts3ai.test d29cee6ed653e30de478066881cec8aa766531b2
F test/fts3aj.test 584facbc9ac4381a7ec624bfde677340ffc2a5a4
F test/fts3ak.test bd14deafe9d1586e8e9bf032411026ac4f8c925d
F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa
F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984
F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958
-F test/fts3cov.test 3a9d8618a3107166530c447e808f8992372e0415
+F test/fts3cov.test 6f1ff88ff6b5abcfff6979098cb9d0c68a69202e
F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52
+F test/fts3defer.test a9f81bba6e1132dd6a2ad3cf11e4628733975c8c
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
F test/mallocI.test a88c2b9627c8506bf4703d8397420043a786cdb6
F test/mallocJ.test b5d1839da331d96223e5f458856f8ffe1366f62e
F test/mallocK.test d79968641d1b70d88f6c01bdb9a7eb4a55582cc9
-F test/malloc_common.tcl cda732c0d2365a058c2a73778cf6b6da6db54452
+F test/malloc_common.tcl 9dfb33f12173f9a8b029dae0443c569b59b980b6
F test/manydb.test b3d3bc4c25657e7f68d157f031eb4db7b3df0d3c
F test/memdb.test 0825155b2290e900264daaaf0334b6dfe69ea498
F test/memleak.test 10b9c6c57e19fc68c32941495e9ba1c50123f6e2
F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb
F test/pcache.test 4118a183908ecaed343a06fcef3ba82e87e0129d
F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16
-F test/permutations.test ca1c985cf68c692096d0325b33c62f2b576446a5
+F test/permutations.test ec9b2ebd52ff43c5a3bec4723098fab1ef29d944
F test/pragma.test fdfc09067ea104a0c247a1a79d8093b56656f850
F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47
F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P ea8c2f5f8a890dcb422e9e46298ae6ca378c74b7
-R 03d2b3a92f9647a3db71aa9ca75489f4
-U drh
-Z c2cdb52e62b9c570a5e1c8427f4ce5c4
------BEGIN PGP SIGNATURE-----
-Version: GnuPG v1.4.6 (GNU/Linux)
-
-iD8DBQFMtlotoxKgR168RlERAuf0AJ96F+hVDOt4y4GU2wooqTHtO4kKZgCeKIVj
-dUPTQwgWJgqAaN5BweGLucY=
-=QYIr
------END PGP SIGNATURE-----
+P c0ee614fd988f445c4884a37f494479bdd669185
+R 0c4aebbf44d624104504e37bec992917
+T *bgcolor * #c0ffc0
+T *branch * experimental
+T *sym-experimental *
+T -sym-trunk *
+U dan
+Z c2b99a58ccad27405ff8b0fcedef5c33
-c0ee614fd988f445c4884a37f494479bdd669185
\ No newline at end of file
+5ae0ba447a561e3b6637b52f9b83a9fc683d2572
\ No newline at end of file
-# 2006 October 31 (scaaarey)
+# 2006 October 31
#
-# The author disclaims copyright to this source code.
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The focus
-# here is testing correct handling of excessively long terms.
-#
-# $Id: fts3ah.test,v 1.1 2007/08/20 17:38:42 shess Exp $
+# here is testing correct handling of very long terms.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
-# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
-# Generate a term of len copies of char.
-proc bigterm {char len} {
- for {set term ""} {$len>0} {incr len -1} {
- append term $char
- }
- return $term
-}
-
# Generate a document of bigterms based on characters from the list
# chars.
proc bigtermdoc {chars len} {
set doc ""
foreach char $chars {
- append doc " " [bigterm $char $len]
+ append doc " " [string repeat $char $len]
}
return $doc
}
set doc2 [bigtermdoc {b d e f} $len]
set doc3 [bigtermdoc {a c e} $len]
-set aterm [bigterm a $len]
-set bterm [bigterm b $len]
-set xterm [bigterm x $len]
+set aterm [string repeat a $len]
+set bterm [string repeat b $len]
+set xterm [string repeat x $len]
db eval {
CREATE VIRTUAL TABLE t1 USING fts3(content);
INSERT INTO t1 VALUES('And she in the midnight wood will pray');
INSERT INTO t1 VALUES('For the weal of her lover that''s far away.');
COMMIT;
-
+ }
+ execsql {
INSERT INTO t1(t1) VALUES('optimize');
SELECT substr(hex(root), 1, 2) FROM t1_segdir;
}
} {03}
# Test the "missing entry" case:
-do_test fts3cov-2.1 {
+do_test fts3cov-2.2 {
set root [db one {SELECT root FROM t1_segdir}]
read_fts3varint [string range $root 1 end] left_child
execsql { DELETE FROM t1_segments WHERE blockid = $left_child }
} {}
-do_error_test fts3cov-2.2 {
+do_error_test fts3cov-2.3 {
SELECT * FROM t1 WHERE t1 MATCH 'c*'
} {database disk image is malformed}
# Test the "replaced with NULL" case:
-do_test fts3cov-2.3 {
+do_test fts3cov-2.4 {
execsql { INSERT INTO t1_segments VALUES($left_child, NULL) }
} {}
-do_error_test fts3cov-2.4 {
+do_error_test fts3cov-2.5 {
SELECT * FROM t1 WHERE t1 MATCH 'cloud'
} {database disk image is malformed}
--- /dev/null
+# 2010 October 15
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+source $testdir/malloc_common.tcl
+
+ifcapable !fts3 {
+ finish_test
+ return
+}
+
+set ::testprefix fts3defer
+
+#--------------------------------------------------------------------------
+# Test cases fts3defer-1.* are the "warm body" cases. The database contains
+# one row with 15000 instances of the token "a". This makes the doclist for
+# "a" so large that FTS3 will avoid loading it in most cases.
+#
+# To show this, test cases fts3defer-1.2.* execute a bunch of FTS3 queries
+# involving token "a". Then, fts3defer-1.3.* replaces the doclist for token
+# "a" with all zeroes and fts3defer-1.4.* repeats the tests from 1.2. If
+# the tests still work, we can conclude that the doclist for "a" was not
+# used.
+#
+
+set aaa [string repeat "a " 15000]
+
+do_execsql_test 1.1 {
+ CREATE VIRTUAL TABLE t1 USING fts4;
+ BEGIN;
+ INSERT INTO t1 VALUES('this is a dog');
+ INSERT INTO t1 VALUES('an instance of a phrase');
+ INSERT INTO t1 VALUES('an instance of a longer phrase');
+ INSERT INTO t1 VALUES($aaa);
+ COMMIT;
+} {}
+
+set tests {
+ 1 {SELECT rowid FROM t1 WHERE t1 MATCH '"a dog"'} {1}
+ 2 {SELECT rowid FROM t1 WHERE t1 MATCH '"is a dog"'} {1}
+ 3 {SELECT rowid FROM t1 WHERE t1 MATCH '"a longer phrase"'} {3}
+ 4 {SELECT snippet(t1) FROM t1 WHERE t1 MATCH '"a longer phrase"'}
+ {"an instance of <b>a</b> <b>longer</b> <b>phrase</b>"}
+ 5 {SELECT rowid FROM t1 WHERE t1 MATCH 'a dog'} {1}
+}
+
+do_select_tests 1.2 $tests
+
+do_execsql_test 1.3 {
+ SELECT count(*) FROM t1_segments WHERE length(block)>10000;
+ UPDATE t1_segments
+ SET block = zeroblob(length(block))
+ WHERE length(block)>10000;
+} {1}
+
+do_select_tests 1.4 $tests
+
+# Drop the table. It is corrupt now anyhow, so not useful for subsequent tests.
+#
+do_execsql_test 1.5 { DROP TABLE t1 }
+
+#--------------------------------------------------------------------------
+# These tests - fts3defer-2.* - are more rigorous. They test that for a
+# variety of queries, FTS3 and FTS4 return the same results. And that
+# zeroing the very large doclists that FTS4 does not load does not change
+# the results.
+#
+# They use the following pseudo-randomly generated document data. The
+# tokens "zm" and "jk" are especially common in this dataset. Additionally,
+# two documents are added to the pseudo-random data before it is loaded
+# into FTS4 containing 100,000 instances of the "zm" and "jk" tokens. This
+# makes the doclists for those tokens so large that FTS4 avoids loading them
+# into memory if possible.
+#
+set data [list]
+lappend data [string repeat "zm " 100000]
+lappend data [string repeat "jk " 100000]
+lappend data {*}{
+ "zm zm agmckuiu uhzq nsab jk rrkx duszemmzl hyq jk"
+ "jk uhzq zm zm rgpzmlnmd zm zk jk jk zm"
+ "duszemmzl zm jk xldlpy zm jk sbptoa xh jk xldlpy"
+ "zm xh zm xqf azavwm jk jk trqd rgpzmlnmd jk"
+ "zm vwq urvysbnykk ubwrfqnbjf zk lsz jk doiwavhwwo jk jk"
+ "jk xduvfhk orpfawpx zkhdvkw jk mjpavjuhw zm jk duszemmzl zm"
+ "jk igju jk jk zm hmjf xh zm gwdfhwurx zk"
+ "vgsld jk jk zm hrlipdm jn zm zsmhnf vgsld duszemmzl"
+ "gtuiexzsu aayxpmve zm zm zm drir scpgna xh azavwm uhzq"
+ "farlehdhq hkfoudzftq igju duszemmzl xnxhf ewle zm hrlipdm urvysbnykk kn"
+ "xnxhf jk jk agmckuiu duszemmzl jk zm zm jk vgsld"
+ "zm zm zm jk jk urvysbnykk ogttbykvt zm zm jk"
+ "iasrqgqv zm azavwm zidhxhbtv jk jk mjpavjuhw zm zm ajmvcydy"
+ "rgpzmlnmd tmt mjpavjuhw xh igju jk azavwm fibokdry vgsld ofm"
+ "zm jk vgsld jk xh jk csjqxhgj drir jk pmrb"
+ "xh jk jk zm rrkx duszemmzl mjpavjuhw xldlpy igju zm"
+ "jk hkfoudzftq zf rrkx wdmy jupk jk zm urvysbnykk npywgdvgz"
+ "zm jk zm zm zhbrzadb uenvbm aayxpmve urvysbnykk duszemmzl jk"
+ "uenvbm jk zm fxw xh bdilwmjw mjpavjuhw uv jk zm"
+ "nk jk bnhc pahlds jk igju dzadnqzprr jk jk jk"
+ "uhzq uv zm duszemmzl tlqix jk jk xh jk zm"
+ "jk zm agmckuiu urvysbnykk jk jk zm zm jk jk"
+ "azavwm mjpavjuhw lsgshn trqd xldlpy ogyavjvv agmckuiu ryvwwhlbc jk jk"
+ "tmt jk zk zm azavwm ofm acpgim bvgimjik iasrqgqv wuvajhwqz"
+ "igju ogyavjvv xrbdak rrkx fibokdry zf ujfhmrllq jk zm hxgwvib"
+ "zm pahlds jk uenvbm aayxpmve iaf hmjf xph vnlyvtkgx zm"
+ "jk xnxhf igju jk xh jk nvfasfh zm js jk"
+ "zm zm rwaj igju xr rrkx xnxhf nvfasfh skxbsqzvmt xatbxeqq"
+ "vgsld zm ujfhmrllq uhzq ogyavjvv nsab azavwm zm vgsld jmfiqhwnjg"
+ "ymjoym duszemmzl urvysbnykk azavwm jk jmfiqhwnjg bu qcdziqomqk vnlyvtkgx"
+ "zm nbilqcnz dzadnqzprr xh bkfgzsxn urvysbnykk xrujfzxqf zm zf agmckuiu"
+ "jk urvysbnykk nvfasfh zf xh zm zm qcdziqomqk qvxtclg wdmy"
+ "fibokdry jk urvysbnykk jk xr osff zm cvnnsl zm vgsld"
+ "jk mjpavjuhw hkfoudzftq jk zm xh xqf urvysbnykk jk iasrqgqv"
+ "jk csjqxhgj duszemmzl iasrqgqv aayxpmve zm brsuoqww jk qpmhtvl wluvgsw"
+ "jk mj azavwm jk zm jn dzadnqzprr zm jk uhzq"
+ "zk xqf jupk fxw nbilqcnz zm jk jcpiwj tznlvbfcv nvfasfh"
+ "jk jcpiwj zm xnxhf zm mjpavjuhw mj drir pa pvjrjlas"
+ "duszemmzl dzadnqzprr jk swc duszemmzl tmt jk jk pahlds jk"
+ "zk zm jk zm zm eczkjblu zm hi pmrb jk"
+ "azavwm zm iz agmckuiu jk sntk jk duszemmzl duszemmzl zm"
+ "jk zm jk eczkjblu urvysbnykk sk gnl jk ttvgf hmjf"
+ "jk bnhc jjrxpjkb mjpavjuhw fibokdry igju jk zm zm xh"
+ "wxe ogttbykvt uhzq xr iaf zf urvysbnykk aayxpmve oacaxgjoo mjpavjuhw"
+ "gazrt jk ephknonq myjp uenvbm wuvajhwqz jk zm xnxhf nvfasfh"
+ "zm aayxpmve csjqxhgj xnxhf xr jk aayxpmve xnxhf zm zm"
+ "sokcyf zm ogyavjvv jk zm fibokdry zm jk igju igju"
+ "vgsld bvgimjik xuprtlyle jk akmikrqyt jk aayxpmve hkfoudzftq ddjj ithtir"
+ "zm uhzq ovkyevlgv zk uenvbm csjqxhgj jk vgsld pgybs jk"
+ "zm agmckuiu zexh fibokdry jk uhzq bu tugflixoex xnxhf sk"
+ "zm zf uenvbm jk azavwm zm zm agmckuiu zm jk"
+ "rrkx jk zf jt zm oacaxgjoo fibokdry wdmy igju csjqxhgj"
+ "hi igju zm jk zidhxhbtv dzadnqzprr jk jk trqd duszemmzl"
+ "zm zm mjpavjuhw xrbdak qrvbjruc jk qzzqdxq guwq cvnnsl zm"
+ "ithtir jk jk qcdziqomqk zm farlehdhq zm zm xrbdak jk"
+ "ixfipk csjqxhgj azavwm sokcyf ttvgf vgsld jk sk xh zk"
+ "nvfasfh azavwm zm zm zm fxw nvfasfh zk gnl trqd"
+ "zm fibokdry csjqxhgj ofm dzadnqzprr jk akmikrqyt orpfawpx duszemmzl vwq"
+ "csjqxhgj jk jk vgsld urvysbnykk jk nxum jk jk nxum"
+ "zm hkfoudzftq jk ryvwwhlbc mjpavjuhw ephknonq jk zm ogyavjvv zm"
+ "lwa hi xnxhf qdyerbws zk njtc jk uhzq zm jk"
+ "trqd zm dzadnqzprr zm urvysbnykk jk lsz jk mjpavjuhw cmnnkna"
+ "duszemmzl zk jk jk fibokdry jseuhjnzo zm aayxpmve zk jk"
+ "fibokdry jk sviq qvxtclg wdmy jk doiwavhwwo zexh jk zm"
+ "jupk zm xh jk mjpavjuhw zm jk nsab npywgdvgz duszemmzl"
+ "zm igju zm zm nvfasfh eh hkfoudzftq fibokdry fxw xkblf"
+ "jk zm jk jk zm xh zk abthnzcv zf csjqxhgj"
+ "zm zm jk nkaotm urvysbnykk sbptoa bq jk ktxdty ubwrfqnbjf"
+ "nvfasfh aayxpmve xdcuz zm tugflixoex jcpiwj zm mjpavjuhw fibokdry doiwavhwwo"
+ "iaf jk mjpavjuhw zm duszemmzl jk jk uhzq pahlds fibokdry"
+ "ddjj zk azavwm jk swc zm gjtexkv jk xh jk"
+ "igju jk csjqxhgj zm jk dzadnqzprr duszemmzl ulvcbv jk jk"
+ "jk fibokdry zm csjqxhgj jn zm zm zm zf uhzq"
+ "duszemmzl jk xkblf zk hrlipdm aayxpmve uenvbm uhzq jk zf"
+ "dzadnqzprr jk zm zdu nvfasfh zm jk urvysbnykk hmjf jk"
+ "jk aayxpmve aserrdxm acpgim fibokdry jk drir wxe brsuoqww rrkx"
+ "uhzq csjqxhgj nvfasfh jk rrkx qbamok trqd uenvbm sntk zm"
+ "ps azavwm zkhdvkw jk zm jk jk zm csjqxhgj xedlrcfo"
+ "jk jk ogyavjvv jk zm farlehdhq duszemmzl jk agitgxamxe jk"
+ "qzzqdxq rwaj jk jk zm xqf jk uenvbm jk zk"
+ "zm hxgwvib akmikrqyt zf agmckuiu uenvbm bq npywgdvgz azavwm jk"
+ "zf jmfiqhwnjg js igju zm aayxpmve zm mbxnljomiv csjqxhgj nvfasfh"
+ "zm jk jk gazrt jk jk lkc jk nvfasfh jk"
+ "xldlpy orpfawpx zkhdvkw jk zm igju zm urvysbnykk dzadnqzprr mbxnljomiv"
+ "urvysbnykk jk zk igju zm uenvbm jk zm ithtir jk"
+ "zm zk zm zf ofm zm xdcuz dzadnqzprr zm vgsld"
+ "sbptoa jk tugflixoex jk zm zm vgsld zm xh zm"
+ "uhzq jk zk evvivo vgsld vniqnuynvf agmckuiu jk zm zm"
+ "zm nvfasfh zm zm zm abthnzcv uenvbm jk zk dzadnqzprr"
+ "zm azavwm igju qzzqdxq jk xnxhf abthnzcv jk nvfasfh zm"
+ "qbamok fxw vgsld igju cmnnkna xnxhf vniqnuynvf zk xh zm"
+ "nvfasfh zk zm mjpavjuhw dzadnqzprr jk jk duszemmzl xldlpy nvfasfh"
+ "xnxhf sviq nsab npywgdvgz osff vgsld farlehdhq fibokdry wjbkhzsa hhac"
+ "zm azavwm scpgna jk jk bq jk duszemmzl fibokdry ovkyevlgv"
+ "csjqxhgj zm jk jk duszemmzl zk xh zm jk zf"
+ "urvysbnykk dzadnqzprr csjqxhgj mjpavjuhw ubwrfqnbjf nkaotm jk jk zm drir"
+ "nvfasfh xh igju zm wluvgsw jk zm srwwnezqk ewle ovnq"
+ "jk nvfasfh eh ktxdty urvysbnykk vgsld zm jk eh uenvbm"
+ "orpfawpx pahlds jk uhzq hi zm zm zf jk dzadnqzprr"
+ "srwwnezqk csjqxhgj rbwzuf nvfasfh jcpiwj xldlpy nvfasfh jk vgsld wjybxmieki"
+}
+
+
+
+foreach {tn setup} {
+ 1 {
+ set dmt_modes 0
+ execsql { CREATE VIRTUAL TABLE t1 USING FTS3 }
+ foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
+ }
+ 2 {
+ set dmt_modes 0
+ execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
+ foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
+ }
+ 3 {
+ set dmt_modes {0 1 2}
+ execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
+ foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
+ execsql {
+ UPDATE t1_segments
+ SET block = zeroblob(length(block))
+ WHERE length(block)>10000;
+ }
+ }
+} {
+
+ execsql { DROP TABLE IF EXISTS t1 }
+ eval $setup
+ set ::testprefix fts3defer-2.$tn
+ set DO_MALLOC_TEST 0
+
+ do_execsql_test 0 {
+ SELECT count(*) FROM t1_segments WHERE length(block)>10000
+ } {2}
+
+ do_select_test 1.1 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf'
+ } {13 29 40 47 48 52 63 92}
+ do_select_test 1.2 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh'
+ } {100}
+ do_select_test 1.3 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf'
+ } {7 70 98}
+ do_select_test 1.4 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk'
+ } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97}
+ do_select_test 1.5 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'ubwrfqnbjf jk'
+ } {7 70 98}
+ do_select_test 1.6 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf jk jk jk jk'
+ } {7 70 98}
+ do_select_test 1.7 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'zm xnxhf'
+ } {12 13 29 30 40 47 48 52 63 92 93}
+ do_select_test 1.8 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'zm eh'
+ } {68 100}
+ do_select_test 1.9 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'zm ubwrfqnbjf'
+ } {7 70 98}
+
+ do_select_test 2.1 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"zm agmckuiu"'
+ } {3 24 52 53}
+ do_select_test 2.2 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"zm zf"'
+ } {33 53 75 88 101}
+ do_select_test 2.3 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"zm aayxpmve"'
+ } {48 65 84}
+ do_select_test 2.4 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"aayxpmve zm"'
+ } {11 37 84}
+ do_select_test 2.5 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"jk azavwm"'
+ } {16 53}
+ do_select_test 2.6 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"xh jk jk"'
+ } {18}
+ do_select_test 2.7 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"zm jk vgsld"'
+ } {13 17}
+
+ do_select_test 3.1 {
+ SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH '"zm agmckuiu"'
+ } {
+ {zm [zm] [agmckuiu] uhzq nsab jk rrkx duszemmzl hyq jk}
+ {jk [zm] [agmckuiu] urvysbnykk jk jk zm zm jk jk}
+ {[zm] [agmckuiu] zexh fibokdry jk uhzq bu tugflixoex xnxhf sk}
+ {zm zf uenvbm jk azavwm zm [zm] [agmckuiu] zm jk}
+ }
+
+ do_select_test 3.2 {
+ SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'xnxhf jk'
+ } {
+ {[xnxhf] [jk] [jk] agmckuiu duszemmzl [jk] zm zm [jk] vgsld}
+ {[jk] [xnxhf] igju [jk] xh [jk] nvfasfh zm js [jk]}
+ {[jk] jcpiwj zm [xnxhf] zm mjpavjuhw mj drir pa pvjrjlas}
+ {gazrt [jk] ephknonq myjp uenvbm wuvajhwqz [jk] zm [xnxhf] nvfasfh}
+ {zm aayxpmve csjqxhgj [xnxhf] xr [jk] aayxpmve [xnxhf] zm zm}
+ {zm agmckuiu zexh fibokdry [jk] uhzq bu tugflixoex [xnxhf] sk}
+ {lwa hi [xnxhf] qdyerbws zk njtc [jk] uhzq zm [jk]}
+ {zm azavwm igju qzzqdxq [jk] [xnxhf] abthnzcv [jk] nvfasfh zm}
+ }
+
+ do_select_test 4.1 {
+ SELECT offsets(t1) FROM t1 WHERE t1 MATCH '"jk uenvbm"'
+ } {
+ {0 0 10 2 0 1 13 6} {0 0 26 2 0 1 29 6}
+ }
+
+ do_select_test 4.2 {
+ SELECT offsets(t1) FROM t1 WHERE t1 MATCH 'duszemmzl jk fibokdry'
+ } {
+ {0 2 3 8 0 1 36 2 0 0 58 9}
+ {0 0 0 9 0 1 13 2 0 1 16 2 0 2 19 8 0 1 53 2}
+ {0 1 4 2 0 0 20 9 0 1 30 2 0 1 33 2 0 2 48 8}
+ {0 1 17 2 0 1 20 2 0 1 26 2 0 0 29 9 0 2 39 8}
+ }
+
+ # The following block of tests runs normally with FTS3 or FTS4 without the
+ # long doclists zeroed. And with OOM-injection for FTS4 with long doclists
+ # zeroed. Change this by messing with the [set dmt_modes] commands above.
+ #
+ foreach DO_MALLOC_TEST $dmt_modes {
+
+ # Phrase search.
+ do_select_test 5.$DO_MALLOC_TEST.1 {
+ SELECT rowid FROM t1 WHERE t1 MATCH '"jk mjpavjuhw"'
+ } {8 15 36 64 67 72}
+
+ # Multiple tokens search.
+ do_select_test 5.$DO_MALLOC_TEST.2 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl zm'
+ } {3 5 8 10 12 13 18 20 23 37 43 55 60 65 67 72 74 81 94 96 97}
+
+ # snippet() function with phrase.
+ do_select_test 5.$DO_MALLOC_TEST.3 {
+ SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH '"zm aayxpmve"'
+ } {
+ {[zm] [aayxpmve] csjqxhgj xnxhf xr jk aayxpmve xnxhf zm zm}
+ {duszemmzl zk jk jk fibokdry jseuhjnzo [zm] [aayxpmve] zk jk}
+ {zf jmfiqhwnjg js igju [zm] [aayxpmve] zm mbxnljomiv csjqxhgj nvfasfh}
+ }
+
+ # snippet() function with multiple tokens.
+ do_select_test 5.$DO_MALLOC_TEST.4 {
+ SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'zm zhbrzadb'
+ } {
+ {[zm] jk [zm] [zm] [zhbrzadb] uenvbm aayxpmve urvysbnykk duszemmzl jk}
+ }
+
+ # snippet() function with phrase.
+ do_select_test 5.$DO_MALLOC_TEST.5 {
+ SELECT offsets(t1) FROM t1 WHERE t1 MATCH '"zm aayxpmve"'
+ } {
+ {0 0 0 2 0 1 3 8} {0 0 38 2 0 1 41 8} {0 0 22 2 0 1 25 8}
+ }
+
+ # snippet() function with multiple tokens.
+ do_select_test 5.$DO_MALLOC_TEST.6 {
+ SELECT offsets(t1) FROM t1 WHERE t1 MATCH 'zm zhbrzadb'
+ } {
+ {0 0 0 2 0 0 6 2 0 0 9 2 0 1 12 8}
+ }
+ }
+}
+
+
+finish_test
# match the expected results passed via parameter $result.
#
proc do_select_test {name sql result} {
- uplevel [list doPassiveTest 0 $name $sql [list 0 $result]]
+ uplevel [list doPassiveTest 0 $name $sql [list 0 [list {*}$result]]]
}
proc do_restart_select_test {name sql result} {
proc doPassiveTest {isRestart name sql catchres} {
if {![info exists ::DO_MALLOC_TEST]} { set ::DO_MALLOC_TEST 1 }
+ if {[info exists ::testprefix]
+ && [string is integer [string range $name 0 0]]
+ } {
+ set name $::testprefix.$name
+ }
+
switch $::DO_MALLOC_TEST {
0 { # No malloc failures.
do_test $name [list set {} [uplevel [list catchsql $sql]]] $catchres
fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test
fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test
fts3e.test fts3expr.test fts3expr2.test fts3near.test
- fts3query.test fts3snippet.test
+ fts3query.test fts3snippet.test fts3defer.test
}