From: dan Date: Tue, 1 Feb 2011 16:34:32 +0000 (+0000) Subject: Add virtual table module "fts4aux", used to inspect the full-text index of an fts4... X-Git-Tag: version-3.7.6~175^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a240fd01b668177b5365226613aa2822b20c1983;p=thirdparty%2Fsqlite.git Add virtual table module "fts4aux", used to inspect the full-text index of an fts4 table directly. Also add the "compress" and "uncompress" fts4 options. FossilOrigin-Name: b010ddcc52889160af2183a33c5f483bb0ae91b9 --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 072e12e2fe..c8f8d6ba0c 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -448,6 +448,8 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ sqlite3_finalize(p->aStmt[i]); } sqlite3_free(p->zSegmentsTbl); + sqlite3_free(p->zReadExprlist); + sqlite3_free(p->zWriteExprlist); /* Invoke the tokenizer destructor to free the tokenizer. */ p->pTokenizer->pModule->xDestroy(p->pTokenizer); @@ -665,6 +667,96 @@ static int fts3IsSpecialColumn( return 1; } +/* +** Append the output of a printf() style formatting to an existing string. +*/ +static void fts3Appendf( + int *pRc, /* IN/OUT: Error code */ + char **pz, /* IN/OUT: Pointer to string buffer */ + const char *zFormat, /* Printf format string to append */ + ... /* Arguments for printf format string */ +){ + if( *pRc==SQLITE_OK ){ + va_list ap; + char *z; + va_start(ap, zFormat); + z = sqlite3_vmprintf(zFormat, ap); + if( z && *pz ){ + char *z2 = sqlite3_mprintf("%s%s", *pz, z); + sqlite3_free(z); + z = z2; + } + if( z==0 ) *pRc = SQLITE_NOMEM; + sqlite3_free(*pz); + *pz = z; + } +} + +/* +** Return a list of comma separated SQL expressions that could be used +** in a SELECT statement such as the following: +** +** SELECT FROM %_content AS x ... +** +** to return the docid, followed by each column of text data in order +** from left to write. If parameter zFunc is not NULL, then instead of +** being returned directly each column of text data is passed to an SQL +** function named zFunc first. For example, if zFunc is "unzip" and the +** table has the three user-defined columns "a", "b", and "c", the following +** string is returned: +** +** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c')" +** +** The pointer returned points to a buffer allocated by sqlite3_malloc(). It +** is the responsibility of the caller to eventually free it. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and +** a NULL pointer is returned). Otherwise, if an OOM error is encountered +** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If +** no error occurs, *pRc is left unmodified. +*/ +static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ + char *zRet = 0; + int i; + if( !zFunc ) zFunc = ""; + fts3Appendf(pRc, &zRet, "docid"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunc, i, p->azColumn[i]); + } + return zRet; +} + +/* +** Return a list of N comma separated question marks, where N is the number +** of columns in the %_content table (one for the docid plus one for each +** user-defined text column). +** +** If argument zFunc is not NULL, then all but the first question mark +** is preceded by zFunc and an open bracket, and followed by a closed +** bracket. For example, if zFunc is "zip" and the FTS3 table has three +** user-defined text columns, the following string is returned: +** +** "?, zip(?), zip(?), zip(?)" +** +** The pointer returned points to a buffer allocated by sqlite3_malloc(). It +** is the responsibility of the caller to eventually free it. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and +** a NULL pointer is returned). Otherwise, if an OOM error is encountered +** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If +** no error occurs, *pRc is left unmodified. +*/ +static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ + char *zRet = 0; + int i; + if( !zFunc ) zFunc = ""; + fts3Appendf(pRc, &zRet, "?"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ",%s(?)", zFunc); + } + return zRet; +} + /* ** This function is the implementation of both the xConnect and xCreate ** methods of the FTS3 virtual table. @@ -701,6 +793,9 @@ static int fts3InitVtab( const char **aCol; /* Array of column names */ sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ + char *zCompress = 0; + char *zUncompress = 0; + assert( strlen(argv[0])==4 ); assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) @@ -751,6 +846,12 @@ static int fts3InitVtab( *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal); rc = SQLITE_ERROR; } + }else if( nKey==8 && 0==sqlite3_strnicmp(z, "compress", 8) ){ + zCompress = zVal; + zVal = 0; + }else if( nKey==10 && 0==sqlite3_strnicmp(z, "uncompress", 10) ){ + zUncompress = zVal; + zVal = 0; }else{ *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); rc = SQLITE_ERROR; @@ -825,6 +926,15 @@ static int fts3InitVtab( assert( zCsr <= &((char *)p)[nByte] ); } + if( (zCompress==0)!=(zUncompress==0) ){ + char const *zMissing = (zCompress==0 ? "compress" : "uncompress"); + rc = SQLITE_ERROR; + *pzErr = sqlite3_mprintf("missing %s parameter", zMissing); + } + p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc); + p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc); + if( rc!=SQLITE_OK ) goto fts3_init_out; + /* If this is an xCreate call, create the underlying tables in the ** database. TODO: For xConnect(), it could verify that said tables exist. */ @@ -842,7 +952,8 @@ static int fts3InitVtab( fts3DeclareVtab(&rc, p); fts3_init_out: - + sqlite3_free(zCompress); + sqlite3_free(zUncompress); sqlite3_free((void *)aCol); if( rc!=SQLITE_OK ){ if( p ){ @@ -1935,132 +2046,131 @@ static int fts3DeferredTermSelect( return SQLITE_OK; } -/* -** An Fts3SegReaderArray is used to store an array of Fts3SegReader objects. -** Elements are added to the array using fts3SegReaderArrayAdd(). -*/ -struct Fts3SegReaderArray { - int nSegment; /* Number of valid entries in apSegment[] */ - int nAlloc; /* Allocated size of apSegment[] */ - int nCost; /* The cost of executing SegReaderIterate() */ - Fts3SegReader *apSegment[1]; /* Array of seg-reader objects */ -}; - - -/* -** Free an Fts3SegReaderArray object. Also free all seg-readers in the -** array (using sqlite3Fts3SegReaderFree()). -*/ -static void fts3SegReaderArrayFree(Fts3SegReaderArray *pArray){ - if( pArray ){ - int i; - for(i=0; inSegment; i++){ - sqlite3Fts3SegReaderFree(pArray->apSegment[i]); +int sqlite3Fts3SegReaderCursor( + Fts3Table *p, /* FTS3 table handle */ + int iLevel, /* Level of segments to scan */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + Fts3SegReaderCursor *pCsr /* Cursor object to populate */ +){ + int rc = SQLITE_OK; + int rc2; + int iAge = 0; + sqlite3_stmt *pStmt = 0; + Fts3SegReader *pPending = 0; + + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( FTS3_SEGCURSOR_PENDING<0 ); + assert( FTS3_SEGCURSOR_ALL<0 ); + assert( iLevel==FTS3_SEGCURSOR_ALL || (zTerm==0 && isPrefix==1) ); + + memset(pCsr, 0, sizeof(Fts3SegReaderCursor)); + + /* If iLevel is less than 0, include a seg-reader for the pending-terms. */ + if( iLevel<0 ){ + rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pPending); + if( rc==SQLITE_OK && pPending ){ + int nByte = (sizeof(Fts3SegReader *) * 16); + pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); + if( pCsr->apSegment==0 ){ + rc = SQLITE_NOMEM; + }else{ + pCsr->apSegment[0] = pPending; + pCsr->nSegment = 1; + pPending = 0; + } } - sqlite3_free(pArray); } -} - -static int fts3SegReaderArrayAdd( - Fts3SegReaderArray **ppArray, - Fts3SegReader *pNew -){ - Fts3SegReaderArray *pArray = *ppArray; - if( !pArray || pArray->nAlloc==pArray->nSegment ){ - int nNew = (pArray ? pArray->nAlloc+16 : 16); - pArray = (Fts3SegReaderArray *)sqlite3_realloc(pArray, - sizeof(Fts3SegReaderArray) + (nNew-1) * sizeof(Fts3SegReader*) - ); - if( !pArray ){ - sqlite3Fts3SegReaderFree(pNew); - return SQLITE_NOMEM; + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3AllSegdirs(p, iLevel, &pStmt); } - if( nNew==16 ){ - pArray->nSegment = 0; - pArray->nCost = 0; + while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ + + /* Read the values returned by the SELECT into local variables. */ + sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); + sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); + sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); + int nRoot = sqlite3_column_bytes(pStmt, 4); + char const *zRoot = sqlite3_column_blob(pStmt, 4); + + /* If nSegment is a multiple of 16 the array needs to be extended. */ + if( (pCsr->nSegment%16)==0 ){ + Fts3SegReader **apNew; + int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); + apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); + if( !apNew ){ + rc = SQLITE_NOMEM; + goto finished; + } + pCsr->apSegment = apNew; + } + + /* If zTerm is not NULL, and this segment is not stored entirely on its + ** root node, the range of leaves scanned can be reduced. Do this. */ + if( iStartBlock && zTerm ){ + sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); + rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); + if( rc!=SQLITE_OK ) goto finished; + if( isPrefix==0 ) iLeavesEndBlock = iStartBlock; + } + + rc = sqlite3Fts3SegReaderNew(iAge, iStartBlock, iLeavesEndBlock, + iEndBlock, zRoot, nRoot, &pCsr->apSegment[pCsr->nSegment] + ); + if( rc!=SQLITE_OK ) goto finished; + pCsr->nSegment++; + iAge++; } - pArray->nAlloc = nNew; - *ppArray = pArray; } - pArray->apSegment[pArray->nSegment++] = pNew; - return SQLITE_OK; + finished: + rc2 = sqlite3_reset(pStmt); + if( rc==SQLITE_DONE ) rc = rc2; + sqlite3Fts3SegReaderFree(pPending); + + return rc; } -static int fts3TermSegReaderArray( + +static int fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ - Fts3SegReaderArray **ppArray /* OUT: Allocated seg-reader array */ + Fts3SegReaderCursor **ppSegcsr /* OUT: Allocated seg-reader cursor */ ){ - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; - int rc; /* Return code */ - Fts3SegReaderArray *pArray = 0; /* Array object to build */ - Fts3SegReader *pReader = 0; /* Seg-reader to add to pArray */ - sqlite3_stmt *pStmt = 0; /* SQL statement to scan %_segdir table */ - int iAge = 0; /* Used to assign ages to segments */ - - /* Allocate a seg-reader to scan the pending terms, if any. */ - rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pReader); - if( rc==SQLITE_OK && pReader ) { - rc = fts3SegReaderArrayAdd(&pArray, pReader); - } - - /* Loop through the entire %_segdir table. For each segment, create a - ** Fts3SegReader to iterate through the subset of the segment leaves - ** that may contain a term that matches zTerm/nTerm. For non-prefix - ** searches, this is always a single leaf. For prefix searches, this - ** may be a contiguous block of leaves. - */ - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3AllSegdirs(p, &pStmt); - } - while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ - Fts3SegReader *pNew = 0; - int nRoot = sqlite3_column_bytes(pStmt, 4); - char const *zRoot = sqlite3_column_blob(pStmt, 4); - if( sqlite3_column_int64(pStmt, 1)==0 ){ - /* The entire segment is stored on the root node (which must be a - ** leaf). Do not bother inspecting any data in this case, just - ** create a Fts3SegReader to scan the single leaf. - */ - rc = sqlite3Fts3SegReaderNew(iAge, 0, 0, 0, zRoot, nRoot, &pNew); - }else{ - sqlite3_int64 i1; /* First leaf that may contain zTerm */ - sqlite3_int64 i2; /* Final leaf that may contain zTerm */ - rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &i1, (isPrefix?&i2:0)); - if( isPrefix==0 ) i2 = i1; - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3SegReaderNew(iAge, i1, i2, 0, 0, 0, &pNew); - } - } - assert( (pNew==0)==(rc!=SQLITE_OK) ); + Fts3SegReaderCursor *pSegcsr; /* Object to allocate and return */ + int rc = SQLITE_NOMEM; /* Return code */ - /* If a new Fts3SegReader was allocated, add it to the array. */ - if( rc==SQLITE_OK ){ - rc = fts3SegReaderArrayAdd(&pArray, pNew); - } - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3SegReaderCost(pCsr, pNew, &pArray->nCost); + pSegcsr = sqlite3_malloc(sizeof(Fts3SegReaderCursor)); + if( pSegcsr ){ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + int i; + int nCost = 0; + rc = sqlite3Fts3SegReaderCursor( + p, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, pSegcsr); + + for(i=0; rc==SQLITE_OK && inSegment; i++){ + rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost); } - iAge++; + pSegcsr->nCost = nCost; } - if( rc==SQLITE_DONE ){ - rc = sqlite3_reset(pStmt); - }else{ - sqlite3_reset(pStmt); - } - if( rc!=SQLITE_OK ){ - fts3SegReaderArrayFree(pArray); - pArray = 0; - } - *ppArray = pArray; + *ppSegcsr = pSegcsr; return rc; } +static void fts3SegReaderCursorFree(Fts3SegReaderCursor *pSegcsr){ + sqlite3Fts3SegReaderFinish(pSegcsr); + sqlite3_free(pSegcsr); +} + /* ** This function retreives the doclist for the specified term (or term ** prefix) from the database. @@ -2081,11 +2191,11 @@ static int fts3TermSelect( char **ppOut /* OUT: Malloced result buffer */ ){ int rc; /* Return code */ - Fts3SegReaderArray *pArray; /* Seg-reader array for this term */ - TermSelect tsc; /* Context object for fts3TermSelectCb() */ - Fts3SegFilter filter; /* Segment term filter configuration */ + Fts3SegReaderCursor *pSegcsr; /* Seg-reader cursor for this term */ + TermSelect tsc; /* Context object for fts3TermSelectCb() */ + Fts3SegFilter filter; /* Segment term filter configuration */ - pArray = pTok->pArray; + pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); tsc.isReqPos = isReqPos; @@ -2097,13 +2207,18 @@ static int fts3TermSelect( filter.zTerm = pTok->z; filter.nTerm = pTok->n; - rc = sqlite3Fts3SegReaderIterate(p, pArray->apSegment, pArray->nSegment, - &filter, fts3TermSelectCb, (void *)&tsc - ); + rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); + while( SQLITE_OK==rc + && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) + ){ + rc = fts3TermSelectCb(p, (void *)&tsc, + pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist + ); + } + if( rc==SQLITE_OK ){ rc = fts3TermSelectMerge(&tsc); } - if( rc==SQLITE_OK ){ *ppOut = tsc.aaOutput[0]; *pnOut = tsc.anOutput[0]; @@ -2114,8 +2229,8 @@ static int fts3TermSelect( } } - fts3SegReaderArrayFree(pArray); - pTok->pArray = 0; + fts3SegReaderCursorFree(pSegcsr); + pTok->pSegcsr = 0; return rc; } @@ -2238,13 +2353,13 @@ static int fts3PhraseSelect( */ for(ii=0; iinToken; ii++){ Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; - if( pTok->pArray==0 ){ + if( pTok->pSegcsr==0 ){ if( (pCsr->eEvalmode==FTS3_EVAL_FILTER) || (pCsr->eEvalmode==FTS3_EVAL_NEXT && pCsr->pDeferred==0) || (pCsr->eEvalmode==FTS3_EVAL_MATCHINFO && pTok->bFulltext) ){ - rc = fts3TermSegReaderArray( - pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray + rc = fts3TermSegReaderCursor( + pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr ); if( rc!=SQLITE_OK ) return rc; } @@ -2275,10 +2390,10 @@ static int fts3PhraseSelect( /* Find the remaining token with the lowest cost. */ for(jj=0; jjnToken; jj++){ - Fts3SegReaderArray *pArray = pPhrase->aToken[jj].pArray; - if( pArray && pArray->nCostaToken[jj].pSegcsr; + if( pSegcsr && pSegcsr->nCostnCost; + nMinCost = pSegcsr->nCost; } } pTok = &pPhrase->aToken[iTok]; @@ -2297,12 +2412,12 @@ static int fts3PhraseSelect( if( pCsr->eEvalmode==FTS3_EVAL_NEXT && pTok->pDeferred ){ rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); }else{ - if( pTok->pArray ){ + if( pTok->pSegcsr ){ rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); } pTok->bFulltext = 1; } - assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pArray==0 ); + assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pSegcsr==0 ); if( rc!=SQLITE_OK ) break; if( isFirst ){ @@ -2480,9 +2595,9 @@ static int fts3ExprAllocateSegReaders( for(ii=0; rc==SQLITE_OK && iinToken; ii++){ Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; - if( pTok->pArray==0 ){ - rc = fts3TermSegReaderArray( - pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray + if( pTok->pSegcsr==0 ){ + rc = fts3TermSegReaderCursor( + pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr ); } } @@ -2506,8 +2621,8 @@ static void fts3ExprFreeSegReaders(Fts3Expr *pExpr){ if( pPhrase ){ int kk; for(kk=0; kknToken; kk++){ - fts3SegReaderArrayFree(pPhrase->aToken[kk].pArray); - pPhrase->aToken[kk].pArray = 0; + fts3SegReaderCursorFree(pPhrase->aToken[kk].pSegcsr); + pPhrase->aToken[kk].pSegcsr = 0; } } fts3ExprFreeSegReaders(pExpr->pLeft); @@ -2527,10 +2642,8 @@ static int fts3ExprCost(Fts3Expr *pExpr){ int ii; nCost = 0; for(ii=0; iinToken; ii++){ - Fts3SegReaderArray *pArray = pPhrase->aToken[ii].pArray; - if( pArray ){ - nCost += pPhrase->aToken[ii].pArray->nCost; - } + Fts3SegReaderCursor *pSegcsr = pPhrase->aToken[ii].pSegcsr; + if( pSegcsr ) nCost += pSegcsr->nCost; } }else{ nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); @@ -2872,8 +2985,8 @@ static int fts3FilterMethod( sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ const char *azSql[] = { - "SELECT * FROM %Q.'%q_content' WHERE docid = ?", /* non-full-table-scan */ - "SELECT * FROM %Q.'%q_content'", /* full-table-scan */ + "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */ + "SELECT %s FROM %Q.'%q_content' AS x ", /* full-scan */ }; int rc; /* Return code */ char *zSql; /* SQL statement used to access %_content */ @@ -2928,7 +3041,8 @@ static int fts3FilterMethod( ** full-text query or docid lookup, the statement retrieves a single ** row by docid. */ - zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName); + zSql = (char *)azSql[idxNum==FTS3_FULLSCAN_SEARCH]; + zSql = sqlite3_mprintf(zSql, p->zReadExprlist, p->zDb, p->zName); if( !zSql ){ rc = SQLITE_NOMEM; }else{ @@ -3446,6 +3560,9 @@ int sqlite3Fts3Init(sqlite3 *db){ sqlite3Fts3IcuTokenizerModule(&pIcu); #endif + rc = sqlite3Fts3InitAux(db); + if( rc!=SQLITE_OK ) return rc; + sqlite3Fts3SimpleTokenizerModule(&pSimple); sqlite3Fts3PorterTokenizerModule(&pPorter); diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 087544323f..82413e1fd1 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -107,7 +107,7 @@ typedef struct Fts3PhraseToken Fts3PhraseToken; typedef struct Fts3SegFilter Fts3SegFilter; typedef struct Fts3DeferredToken Fts3DeferredToken; typedef struct Fts3SegReader Fts3SegReader; -typedef struct Fts3SegReaderArray Fts3SegReaderArray; +typedef struct Fts3SegReaderCursor Fts3SegReaderCursor; /* ** A connection to a fulltext index is an instance of the following @@ -130,6 +130,9 @@ struct Fts3Table { */ sqlite3_stmt *aStmt[24]; + char *zReadExprlist; + char *zWriteExprlist; + int nNodeSize; /* Soft limit for node size */ u8 bHasStat; /* True if %_stat table exists */ u8 bHasDocsize; /* True if %_docsize table exists */ @@ -217,7 +220,7 @@ struct Fts3PhraseToken { int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ int bFulltext; /* True if full-text index was used */ - Fts3SegReaderArray *pArray; /* Segment-reader for this token */ + Fts3SegReaderCursor *pSegcsr; /* Segment-reader for this token */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ }; @@ -285,12 +288,8 @@ int sqlite3Fts3SegReaderNew(int, sqlite3_int64, sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); -int sqlite3Fts3SegReaderIterate( - Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *, - int (*)(Fts3Table *, void *, char *, int, char *, int), void * -); int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *); -int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **); +int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **); int sqlite3Fts3ReadLock(Fts3Table *); int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*); @@ -302,9 +301,17 @@ int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); - void sqlite3Fts3SegmentsClose(Fts3Table *); +#define FTS3_SEGCURSOR_PENDING -1 +#define FTS3_SEGCURSOR_ALL -2 + +int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*); +int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *); +void sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor *); +int sqlite3Fts3SegReaderCursor( + Fts3Table *, int, const char *, int, int, Fts3SegReaderCursor *); + /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 @@ -319,6 +326,25 @@ struct Fts3SegFilter { int flags; }; +struct Fts3SegReaderCursor { + /* Used internally by sqlite3Fts3SegReaderXXX() calls */ + Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ + int nSegment; /* Size of apSegment array */ + int nAdvance; /* How many seg-readers to advance */ + Fts3SegFilter *pFilter; /* Pointer to filter object */ + char *aBuffer; /* Buffer to merge doclists in */ + int nBuffer; /* Allocated size of aBuffer[] in bytes */ + + /* Cost of running this iterator. Used by fts3.c only. */ + int nCost; + + /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ + char *zTerm; /* Pointer to term buffer */ + int nTerm; /* Size of zTerm in bytes */ + char *aDoclist; /* Pointer to doclist buffer */ + int nDoclist; /* Size of aDoclist[] in bytes */ +}; + /* fts3.c */ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); @@ -355,4 +381,7 @@ void sqlite3Fts3ExprFree(Fts3Expr *); int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); #endif +/* fts3_aux.c */ +int sqlite3Fts3InitAux(sqlite3 *db); + #endif /* _FTSINT_H */ diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 61def9993a..8197ce3f5a 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -212,7 +212,7 @@ static int fts3SqlStmt( /* 4 */ "DELETE FROM %Q.'%q_segdir'", /* 5 */ "DELETE FROM %Q.'%q_docsize'", /* 6 */ "DELETE FROM %Q.'%q_stat'", -/* 7 */ "SELECT * FROM %Q.'%q_content' WHERE rowid=?", +/* 7 */ "SELECT %s FROM %Q.'%q_content' AS x WHERE rowid=?", /* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", /* 9 */ "INSERT INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", @@ -229,7 +229,7 @@ static int fts3SqlStmt( /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", -/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%z)", +/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", @@ -246,20 +246,9 @@ static int fts3SqlStmt( if( !pStmt ){ char *zSql; if( eStmt==SQL_CONTENT_INSERT ){ - int i; /* Iterator variable */ - char *zVarlist; /* The "?, ?, ..." string */ - zVarlist = (char *)sqlite3_malloc(2*p->nColumn+2); - if( !zVarlist ){ - *pp = 0; - return SQLITE_NOMEM; - } - zVarlist[0] = '?'; - zVarlist[p->nColumn*2+1] = '\0'; - for(i=1; i<=p->nColumn; i++){ - zVarlist[i*2-1] = ','; - zVarlist[i*2] = '?'; - } - zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, zVarlist); + zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); + }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ + zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist, p->zDb, p->zName); }else{ zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); } @@ -401,8 +390,17 @@ int sqlite3Fts3ReadLock(Fts3Table *p){ ** 3: end_block ** 4: root */ -int sqlite3Fts3AllSegdirs(Fts3Table *p, sqlite3_stmt **ppStmt){ - return fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, ppStmt, 0); +int sqlite3Fts3AllSegdirs(Fts3Table *p, int iLevel, sqlite3_stmt **ppStmt){ + int rc; + sqlite3_stmt *pStmt = 0; + if( iLevel<0 ){ + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, &pStmt, 0); + }else{ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); + if( rc==SQLITE_OK ) sqlite3_bind_int(pStmt, 1, iLevel); + } + *ppStmt = pStmt; + return rc; } @@ -1303,42 +1301,6 @@ int sqlite3Fts3SegReaderPending( return rc; } - -/* -** The second argument to this function is expected to be a statement of -** the form: -** -** SELECT -** idx, -- col 0 -** start_block, -- col 1 -** leaves_end_block, -- col 2 -** end_block, -- col 3 -** root -- col 4 -** FROM %_segdir ... -** -** This function allocates and initializes a Fts3SegReader structure to -** iterate through the terms stored in the segment identified by the -** current row that pStmt is pointing to. -** -** If successful, the Fts3SegReader is left pointing to the first term -** in the segment and SQLITE_OK is returned. Otherwise, an SQLite error -** code is returned. -*/ -static int fts3SegReaderNew( - sqlite3_stmt *pStmt, /* See above */ - int iAge, /* Segment "age". */ - Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ -){ - return sqlite3Fts3SegReaderNew(iAge, - sqlite3_column_int64(pStmt, 1), - sqlite3_column_int64(pStmt, 2), - sqlite3_column_int64(pStmt, 3), - sqlite3_column_blob(pStmt, 4), - sqlite3_column_bytes(pStmt, 4), - ppReader - ); -} - /* ** Compare the entries pointed to by two Fts3SegReader structures. ** Comparison is as follows: @@ -1943,25 +1905,6 @@ static int fts3IsEmpty(Fts3Table *p, sqlite3_value **apVal, int *pisEmpty){ return rc; } -/* -** Set *pnSegment to the number of segments of level iLevel in the database. -** -** Return SQLITE_OK if successful, or an SQLite error code if not. -*/ -static int fts3SegmentCount(Fts3Table *p, int iLevel, int *pnSegment){ - sqlite3_stmt *pStmt; - int rc; - - assert( iLevel>=0 ); - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_COUNT, &pStmt, 0); - if( rc!=SQLITE_OK ) return rc; - sqlite3_bind_int(pStmt, 1, iLevel); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pnSegment = sqlite3_column_int(pStmt, 0); - } - return sqlite3_reset(pStmt); -} - /* ** Set *pnSegment to the total number of segments in the database. Set ** *pnMax to the largest segment level in the database (segment levels @@ -2020,15 +1963,18 @@ static int fts3DeleteSegdir( return rc; } - if( iLevel>=0 ){ + if( iLevel==FTS3_SEGCURSOR_ALL ){ + fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); + }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ + sqlite3Fts3PendingTermsClear(p); + }else{ + assert( iLevel>=0 ); rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_BY_LEVEL, &pDelete, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int(pDelete, 1, iLevel); sqlite3_step(pDelete); rc = sqlite3_reset(pDelete); } - }else{ - fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); } return rc; @@ -2077,85 +2023,15 @@ static void fts3ColumnFilter( *pnList = nList; } -/* -** sqlite3Fts3SegReaderIterate() callback used when merging multiple -** segments to create a single, larger segment. -*/ -static int fts3MergeCallback( - Fts3Table *p, /* FTS3 Virtual table handle */ - void *pContext, /* Pointer to SegmentWriter* to write with */ - char *zTerm, /* Term to write to the db */ - int nTerm, /* Number of bytes in zTerm */ - char *aDoclist, /* Doclist associated with zTerm */ - int nDoclist /* Number of bytes in doclist */ -){ - SegmentWriter **ppW = (SegmentWriter **)pContext; - return fts3SegWriterAdd(p, ppW, 1, zTerm, nTerm, aDoclist, nDoclist); -} - -/* -** sqlite3Fts3SegReaderIterate() callback used when flushing the contents -** of the pending-terms hash table to the database. -*/ -static int fts3FlushCallback( - Fts3Table *p, /* FTS3 Virtual table handle */ - void *pContext, /* Pointer to SegmentWriter* to write with */ - char *zTerm, /* Term to write to the db */ - int nTerm, /* Number of bytes in zTerm */ - char *aDoclist, /* Doclist associated with zTerm */ - int nDoclist /* Number of bytes in doclist */ -){ - SegmentWriter **ppW = (SegmentWriter **)pContext; - return fts3SegWriterAdd(p, ppW, 0, zTerm, nTerm, aDoclist, nDoclist); -} - -/* -** This function is used to iterate through a contiguous set of terms -** stored in the full-text index. It merges data contained in one or -** more segments to support this. -** -** The second argument is passed an array of pointers to SegReader objects -** allocated with sqlite3Fts3SegReaderNew(). This function merges the range -** of terms selected by each SegReader. If a single term is present in -** more than one segment, the associated doclists are merged. For each -** term and (possibly merged) doclist in the merged range, the callback -** function xFunc is invoked with its arguments set as follows. -** -** arg 0: Copy of 'p' parameter passed to this function -** arg 1: Copy of 'pContext' parameter passed to this function -** arg 2: Pointer to buffer containing term -** arg 3: Size of arg 2 buffer in bytes -** arg 4: Pointer to buffer containing doclist -** arg 5: Size of arg 2 buffer in bytes -** -** The 4th argument to this function is a pointer to a structure of type -** Fts3SegFilter, defined in fts3Int.h. The contents of this structure -** further restrict the range of terms that callbacks are made for and -** modify the behaviour of this function. See comments above structure -** definition for details. -*/ -int sqlite3Fts3SegReaderIterate( +int sqlite3Fts3SegReaderStart( Fts3Table *p, /* Virtual table handle */ - Fts3SegReader **apSegment, /* Array of Fts3SegReader objects */ - int nSegment, /* Size of apSegment array */ - Fts3SegFilter *pFilter, /* Restrictions on range of iteration */ - int (*xFunc)(Fts3Table *, void *, char *, int, char *, int), /* Callback */ - void *pContext /* Callback context (2nd argument) */ + Fts3SegReaderCursor *pCsr, /* Cursor object */ + Fts3SegFilter *pFilter /* Restrictions on range of iteration */ ){ - int i; /* Iterator variable */ - char *aBuffer = 0; /* Buffer to merge doclists in */ - int nAlloc = 0; /* Allocated size of aBuffer buffer */ - int rc = SQLITE_OK; /* Return code */ + int i; - int isIgnoreEmpty = (pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); - int isRequirePos = (pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); - int isColFilter = (pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); - int isPrefix = (pFilter->flags & FTS3_SEGMENT_PREFIX); - - /* If there are zero segments, this function is a no-op. This scenario - ** comes about only when reading from an empty database. - */ - if( nSegment==0 ) goto finished; + /* Initialize the cursor object */ + pCsr->pFilter = pFilter; /* If the Fts3SegFilter defines a specific term (or term prefix) to search ** for, then advance each segment iterator until it points to a term of @@ -2163,21 +2039,58 @@ int sqlite3Fts3SegReaderIterate( ** unnecessary merge/sort operations for the case where single segment ** b-tree leaf nodes contain more than one term. */ - for(i=0; inSegment; i++){ int nTerm = pFilter->nTerm; const char *zTerm = pFilter->zTerm; - Fts3SegReader *pSeg = apSegment[i]; + Fts3SegReader *pSeg = pCsr->apSegment[i]; do { - rc = fts3SegReaderNext(p, pSeg); - if( rc!=SQLITE_OK ) goto finished; + int rc = fts3SegReaderNext(p, pSeg); + if( rc!=SQLITE_OK ) return rc; }while( zTerm && fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ); } + fts3SegReaderSort( + pCsr->apSegment, pCsr->nSegment, pCsr->nSegment, fts3SegReaderCmp); + + return SQLITE_OK; +} + +int sqlite3Fts3SegReaderStep( + Fts3Table *p, /* Virtual table handle */ + Fts3SegReaderCursor *pCsr /* Cursor object */ +){ + int rc = SQLITE_OK; + + int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); + int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); + int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); + int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); + + Fts3SegReader **apSegment = pCsr->apSegment; + int nSegment = pCsr->nSegment; + Fts3SegFilter *pFilter = pCsr->pFilter; + + if( pCsr->nSegment==0 ) return SQLITE_OK; + + do { + int nMerge; + int i; + + /* Advance the first pCsr->nAdvance entries in the apSegment[] array + ** forward. Then sort the list in order of current term again. + */ + for(i=0; inAdvance; i++){ + rc = fts3SegReaderNext(p, apSegment[i]); + if( rc!=SQLITE_OK ) return rc; + } + fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); + pCsr->nAdvance = 0; + + /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ + assert( rc==SQLITE_OK ); + if( apSegment[0]->aNode==0 ) break; - fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp); - while( apSegment[0]->aNode ){ - int nTerm = apSegment[0]->nTerm; - char *zTerm = apSegment[0]->zTerm; - int nMerge = 1; + pCsr->nTerm = apSegment[0]->nTerm; + pCsr->zTerm = apSegment[0]->zTerm; /* If this is a prefix-search, and if the term that apSegment[0] points ** to does not share a suffix with pFilter->zTerm/nTerm, then all @@ -2187,34 +2100,35 @@ int sqlite3Fts3SegReaderIterate( ** of segment apSegment[0] is not a match, exit early. */ if( pFilter->zTerm ){ - if( nTermnTerm - || (!isPrefix && nTerm>pFilter->nTerm) - || memcmp(zTerm, pFilter->zTerm, pFilter->nTerm) - ){ - goto finished; + if( pCsr->nTermnTerm + || (!isPrefix && pCsr->nTerm>pFilter->nTerm) + || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) + ){ + break; } } + nMerge = 1; while( nMergeaNode - && apSegment[nMerge]->nTerm==nTerm - && 0==memcmp(zTerm, apSegment[nMerge]->zTerm, nTerm) + && apSegment[nMerge]->nTerm==pCsr->nTerm + && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) ){ nMerge++; } assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); if( nMerge==1 && !isIgnoreEmpty ){ - Fts3SegReader *p0 = apSegment[0]; - rc = xFunc(p, pContext, zTerm, nTerm, p0->aDoclist, p0->nDoclist); - if( rc!=SQLITE_OK ) goto finished; + pCsr->aDoclist = apSegment[0]->aDoclist; + pCsr->nDoclist = apSegment[0]->nDoclist; + rc = SQLITE_ROW; }else{ int nDoclist = 0; /* Size of doclist */ sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ /* The current term of the first nMerge entries in the array ** of Fts3SegReader objects is the same. The doclists must be merged - ** and a single term added to the new segment. + ** and a single term returned with the merged doclist. */ for(i=0; i0 ){ nByte = sqlite3Fts3VarintLen(iDocid-iPrev) + (isRequirePos?nList+1:0); - if( nDoclist+nByte>nAlloc ){ + if( nDoclist+nByte>pCsr->nBuffer ){ char *aNew; - nAlloc = (nDoclist+nByte)*2; - aNew = sqlite3_realloc(aBuffer, nAlloc); + pCsr->nBuffer = (nDoclist+nByte)*2; + aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); if( !aNew ){ - rc = SQLITE_NOMEM; - goto finished; + return SQLITE_NOMEM; } - aBuffer = aNew; + pCsr->aBuffer = aNew; } - nDoclist += sqlite3Fts3PutVarint(&aBuffer[nDoclist], iDocid-iPrev); + nDoclist += sqlite3Fts3PutVarint( + &pCsr->aBuffer[nDoclist], iDocid-iPrev + ); iPrev = iDocid; if( isRequirePos ){ - memcpy(&aBuffer[nDoclist], pList, nList); + memcpy(&pCsr->aBuffer[nDoclist], pList, nList); nDoclist += nList; - aBuffer[nDoclist++] = '\0'; + pCsr->aBuffer[nDoclist++] = '\0'; } } fts3SegReaderSort(apSegment, nMerge, j, fts3SegReaderDoclistCmp); } - if( nDoclist>0 ){ - rc = xFunc(p, pContext, zTerm, nTerm, aBuffer, nDoclist); - if( rc!=SQLITE_OK ) goto finished; + pCsr->aDoclist = pCsr->aBuffer; + pCsr->nDoclist = nDoclist; + rc = SQLITE_ROW; } } + pCsr->nAdvance = nMerge; + }while( rc==SQLITE_OK ); - /* If there is a term specified to filter on, and this is not a prefix - ** search, return now. The callback that corresponds to the required - ** term (if such a term exists in the index) has already been made. - */ - if( pFilter->zTerm && !isPrefix ){ - goto finished; - } + return rc; +} - for(i=0; inSegment; i++){ + sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); } - fts3SegReaderSort(apSegment, nSegment, nMerge, fts3SegReaderCmp); - } + sqlite3_free(pCsr->apSegment); + sqlite3_free(pCsr->aBuffer); - finished: - sqlite3_free(aBuffer); - return rc; + pCsr->nSegment = 0; + pCsr->apSegment = 0; + pCsr->aBuffer = 0; + } } /* @@ -2302,100 +2219,61 @@ int sqlite3Fts3SegReaderIterate( ** an SQLite error code is returned. */ static int fts3SegmentMerge(Fts3Table *p, int iLevel){ - int i; /* Iterator variable */ int rc; /* Return code */ int iIdx; /* Index of new segment */ int iNewLevel = 0; /* Level to create new segment at */ - sqlite3_stmt *pStmt = 0; - SegmentWriter *pWriter = 0; - int nSegment = 0; /* Number of segments being merged */ - Fts3SegReader **apSegment = 0; /* Array of Segment iterators */ - Fts3SegReader *pPending = 0; /* Iterator for pending-terms */ + SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ + Fts3SegReaderCursor csr; /* Cursor to iterate through level(s) */ - if( iLevel<0 ){ + rc = sqlite3Fts3SegReaderCursor(p, iLevel, 0, 0, 1, &csr); + if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; + + if( iLevel==FTS3_SEGCURSOR_ALL ){ /* This call is to merge all segments in the database to a single ** segment. The level of the new segment is equal to the the numerically ** greatest segment level currently present in the database. The index - ** of the new segment is always 0. - */ - iIdx = 0; - rc = sqlite3Fts3SegReaderPending(p, 0, 0, 1, &pPending); - if( rc!=SQLITE_OK ) goto finished; - rc = fts3SegmentCountMax(p, &nSegment, &iNewLevel); - if( rc!=SQLITE_OK ) goto finished; - nSegment += (pPending!=0); - if( nSegment<=1 ){ - return SQLITE_DONE; + ** of the new segment is always 0. */ + int nDummy; /* TODO: Remove this */ + if( csr.nSegment==1 ){ + rc = SQLITE_DONE; + goto finished; } + iIdx = 0; + rc = fts3SegmentCountMax(p, &nDummy, &iNewLevel); }else{ /* This call is to merge all segments at level iLevel. Find the next ** available segment index at level iLevel+1. The call to ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to - ** a single iLevel+2 segment if necessary. - */ + ** a single iLevel+2 segment if necessary. */ iNewLevel = iLevel+1; rc = fts3AllocateSegdirIdx(p, iNewLevel, &iIdx); - if( rc!=SQLITE_OK ) goto finished; - rc = fts3SegmentCount(p, iLevel, &nSegment); - if( rc!=SQLITE_OK ) goto finished; } - assert( nSegment>0 ); + if( rc!=SQLITE_OK ) goto finished; + assert( csr.nSegment>0 ); assert( iNewLevel>=0 ); - /* Allocate space for an array of pointers to segment iterators. */ - apSegment = (Fts3SegReader**)sqlite3_malloc(sizeof(Fts3SegReader *)*nSegment); - if( !apSegment ){ - rc = SQLITE_NOMEM; - goto finished; - } - memset(apSegment, 0, sizeof(Fts3SegReader *)*nSegment); + memset(&filter, 0, sizeof(Fts3SegFilter)); + filter.flags = FTS3_SEGMENT_REQUIRE_POS; + filter.flags |= (iLevel==FTS3_SEGCURSOR_ALL ? FTS3_SEGMENT_IGNORE_EMPTY : 0); - /* Allocate a Fts3SegReader structure for each segment being merged. A - ** Fts3SegReader stores the state data required to iterate through all - ** entries on all leaves of a single segment. - */ - assert( SQL_SELECT_LEVEL+1==SQL_SELECT_ALL_LEVEL); - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL+(iLevel<0), &pStmt, 0); - if( rc!=SQLITE_OK ) goto finished; - sqlite3_bind_int(pStmt, 1, iLevel); - for(i=0; SQLITE_ROW==(sqlite3_step(pStmt)); i++){ - rc = fts3SegReaderNew(pStmt, i, &apSegment[i]); - if( rc!=SQLITE_OK ){ - goto finished; - } - } - rc = sqlite3_reset(pStmt); - if( pPending ){ - apSegment[i] = pPending; - pPending = 0; + rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + while( SQLITE_OK==rc ){ + rc = sqlite3Fts3SegReaderStep(p, &csr); + if( rc!=SQLITE_ROW ) break; + rc = fts3SegWriterAdd(p, &pWriter, 1, + csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); } - pStmt = 0; if( rc!=SQLITE_OK ) goto finished; + assert( pWriter ); - memset(&filter, 0, sizeof(Fts3SegFilter)); - filter.flags = FTS3_SEGMENT_REQUIRE_POS; - filter.flags |= (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0); - rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, - &filter, fts3MergeCallback, (void *)&pWriter - ); + rc = fts3DeleteSegdir(p, iLevel, csr.apSegment, csr.nSegment); if( rc!=SQLITE_OK ) goto finished; - - rc = fts3DeleteSegdir(p, iLevel, apSegment, nSegment); - if( rc==SQLITE_OK ){ - rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); - } + rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); finished: fts3SegWriterFree(pWriter); - if( apSegment ){ - for(i=0; idb, "SAVEPOINT fts3", 0, 0, 0); if( rc==SQLITE_OK ){ - rc = fts3SegmentMerge(p, -1); + rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL); if( rc==SQLITE_OK ){ rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); if( rc==SQLITE_OK ){ diff --git a/main.mk b/main.mk index 974bbe4740..8646d03df9 100644 --- a/main.mk +++ b/main.mk @@ -53,7 +53,7 @@ TCCX += -I$(TOP)/ext/async LIBOBJ+= alter.o analyze.o attach.o auth.o \ backup.o bitvec.o btmutex.o btree.o build.o \ callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \ - fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \ + fts3.o fts3_aux.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \ fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o fts3_write.o \ func.o global.o hash.o \ icu.o insert.o journal.o legacy.o loadext.o \ @@ -187,6 +187,7 @@ SRC += \ $(TOP)/ext/fts3/fts3.c \ $(TOP)/ext/fts3/fts3.h \ $(TOP)/ext/fts3/fts3Int.h \ + $(TOP)/ext/fts3/fts3_aux.c \ $(TOP)/ext/fts3/fts3_expr.c \ $(TOP)/ext/fts3/fts3_hash.c \ $(TOP)/ext/fts3/fts3_hash.h \ @@ -293,6 +294,7 @@ TESTSRC2 = \ $(TOP)/src/where.c \ parse.c \ $(TOP)/ext/fts3/fts3.c \ + $(TOP)/ext/fts3/fts3_aux.c \ $(TOP)/ext/fts3/fts3_expr.c \ $(TOP)/ext/fts3/fts3_tokenizer.c \ $(TOP)/ext/fts3/fts3_write.c \ @@ -462,6 +464,9 @@ fts2_tokenizer1.o: $(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR) fts3.o: $(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c +fts3_aux.o: $(TOP)/ext/fts3/fts3_aux.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_aux.c + fts3_expr.o: $(TOP)/ext/fts3/fts3_expr.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_expr.c diff --git a/manifest b/manifest index 0411312b21..76a7ca82f2 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C SQLite\sversion\s3.7.5\srelease\scandidate\s2 -D 2011-01-28T17:03:50.592 +C Add\svirtual\stable\smodule\s"fts4aux",\sused\sto\sinspect\sthe\sfull-text\sindex\sof\san\sfts4\stable\sdirectly.\sAlso\sadd\sthe\s"compress"\sand\s"uncompress"\sfts4\soptions. +D 2011-02-01T16:34:32.732 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -64,9 +61,9 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 28ada7d1c700e57b072b2c95d70565b05925fa46 +F ext/fts3/fts3.c 871600c06569af007c9f7d46786a6a8618140862 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h a6c69c1c5e2c8c19172ddff42d262c087dcd7337 +F ext/fts3/fts3Int.h d833c1df0dc9d1b3c9069981001e6db16bf241ce F ext/fts3/fts3_expr.c 5f49e0deaf723724b08100bb3ff40aab02ad0c93 F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec @@ -76,7 +73,7 @@ F ext/fts3/fts3_snippet.c 196c5e6cde57bfc1907c2d60e9c29590e4f93fb6 F ext/fts3/fts3_tokenizer.c 055f3dc7369585350b28db1ee0f3b214dca6724d F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d -F ext/fts3/fts3_write.c 3eea26b9ca4219e1711b0db74fd5a9d448a6afbb +F ext/fts3/fts3_write.c ae896d78e45cbbf7d1f37f4aaa4cb7e135c3ddb3 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -102,7 +99,7 @@ F ext/rtree/tkt3363.test 142ab96eded44a3615ec79fba98c7bde7d0f96de F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 -F main.mk 05d0f3475dd331896bd607cfb45c5e21b94589ad +F main.mk 589cd0fdc9d9bf7a8220511ff5db5bd57efe3558 F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac @@ -440,8 +437,10 @@ F test/fts3am.test 218aa6ba0dfc50c7c16b2022aac5c6be593d08d8 F test/fts3an.test a49ccadc07a2f7d646ec1b81bc09da2d85a85b18 F test/fts3ao.test b83f99f70e9eec85f27d75801a974b3f820e01f9 F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa +F test/fts3aux1.test 9cf2e8499a494b92b77aaf1e5e1f50e4c4389549 F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984 F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958 +F test/fts3comp1.test ef36e5ddf9811c9801f52b2988bca1fce7dc8ce8 F test/fts3corrupt.test d874ba27975aa8e5514bf58bf97b473404de0dbb F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7 @@ -589,7 +588,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806 F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16 F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16 -F test/permutations.test c0ce0f3b741dd92a6d4c2671dbacba4b92dd81eb +F test/permutations.test e34aacdc97dfcf83e246a08917a98f610fd1340a F test/pragma.test fdfc09067ea104a0c247a1a79d8093b56656f850 F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47 F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea @@ -790,7 +789,7 @@ F test/tkt3997.test a335fa41ca3985660a139df7b734a26ef53284bd F test/tkt4018.test 7c2c9ba4df489c676a0a7a0e809a1fb9b2185bd1 F test/tokenize.test ce430a7aed48fc98301611429595883fdfcab5d7 F test/trace.test 4b36a41a3e9c7842151af6da5998f5080cdad9e5 -F test/trace2.test 092bc2c5776272700450d60a36919921095bdc21 +F test/trace2.test 0ce11265c83333d8f5beeca19e71ed93a88d386c F test/trans.test 6e1b4c6a42dba31bd65f8fa5e61a2708e08ddde6 F test/trans2.test d5337e61de45e66b1fcbf9db833fa8c82e624b22 F test/trans3.test d728abaa318ca364dc370e06576aa7e5fbed7e97 @@ -901,14 +900,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 682fe41efd3578e8c9abc7138b61f361c3adbe95 -R 7fccaf9f7c013ddef8926255f6aeb773 -U drh -Z 4223173e58f25d45d236e232b9c90989 ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFNQvb6oxKgR168RlERAtOTAJ9BaewewKXL3RGZUy5ycaHwjeEmJACeOPbw -/JKqOugR+37RH7HnLCo9DBk= -=kgPx ------END PGP SIGNATURE----- +P ed759d5a9edb3bba5f48f243df47be29e3fe8cd7 +R 6973a45eacae0b4782da387c021f5c55 +U dan +Z 7f4d4e13b48c27d8ce00c35eff8d5880 diff --git a/manifest.uuid b/manifest.uuid index bccc723cb9..e222deedc2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ed759d5a9edb3bba5f48f243df47be29e3fe8cd7 \ No newline at end of file +b010ddcc52889160af2183a33c5f483bb0ae91b9 \ No newline at end of file diff --git a/test/fts3aux1.test b/test/fts3aux1.test new file mode 100644 index 0000000000..68754ce5f0 --- /dev/null +++ b/test/fts3aux1.test @@ -0,0 +1,59 @@ +# 2011 January 27 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS3 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +ifcapable !fts3 { finish_test ; return } +set ::testprefix fts3aux1 + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts4; + INSERT INTO t1 VALUES('one two three four'); + INSERT INTO t1 VALUES('three four five six'); + INSERT INTO t1 VALUES('one three five seven'); + + CREATE VIRTUAL TABLE terms USING fts4aux(t1); + SELECT * FROM terms; +} { + five 2 2 four 2 2 one 2 2 seven 1 1 + six 1 1 three 3 3 two 1 1 +} + +do_execsql_test 1.2 { + INSERT INTO t1 VALUES('one one one three three three'); + SELECT * FROM terms; +} { + five 2 2 four 2 2 one 3 5 seven 1 1 + six 1 1 three 4 6 two 1 1 +} + +do_execsql_test 1.3 { + DELETE FROM t1; + SELECT * FROM terms; +} {} + +do_execsql_test 1.4 { + INSERT INTO t1 VALUES('a b a b a b a'); + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + SELECT * FROM terms; +} {a 256 1024 b 256 768} + +finish_test diff --git a/test/fts3comp1.test b/test/fts3comp1.test new file mode 100644 index 0000000000..69e484e6b7 --- /dev/null +++ b/test/fts3comp1.test @@ -0,0 +1,78 @@ +# 2011 January 27 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS3 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +ifcapable !fts3 { finish_test ; return } +set ::testprefix fts3comp1 + +set next_x 0 +proc zip {x} { + incr ::next_x + set ::strings($::next_x) $x + return $::next_x +} +proc unzip {x} { + return $::strings($x) +} + +db func zip zip +db func unzip unzip + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts4( + a, b, + compress=zip, uncompress=unzip + ); + INSERT INTO t1 VALUES('one two three', 'two four six'); +} + +do_execsql_test 1.1 { + SELECT a, b FROM t1; +} {{one two three} {two four six}} + +do_execsql_test 1.2 { + SELECT c0a, c1b FROM t1_content; +} {1 2} + +do_execsql_test 1.3 { + INSERT INTO t1 VALUES('three six nine', 'four eight twelve'); + SELECT a, b FROM t1; +} {{one two three} {two four six} {three six nine} {four eight twelve}} + +do_execsql_test 1.4 { + SELECT c0a, c1b FROM t1_content; +} {1 2 3 4} + +do_execsql_test 1.5 { + CREATE VIRTUAL TABLE terms USING fts4aux(t1); + SELECT * FROM terms; +} { + eight 1 1 four 2 2 nine 1 1 one 1 1 + six 2 2 three 2 2 twelve 1 1 two 1 2 +} + +do_execsql_test 1.6 { + DELETE FROM t1 WHERE docid = 1; + SELECT * FROM terms; +} { + eight 1 1 four 1 1 nine 1 1 + six 1 1 three 1 1 twelve 1 1 +} + +do_execsql_test 1.7 { + SELECT c0a, c1b FROM t1_content; +} {3 4} + +finish_test diff --git a/test/permutations.test b/test/permutations.test index d336a3a19a..0e2480206d 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -181,6 +181,8 @@ test_suite "fts3" -prefix "" -description { fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3fault.test fts3malloc.test fts3matchinfo.test + + fts3aux1.test fts3comp1.test } diff --git a/test/trace2.test b/test/trace2.test index 2cf50d0811..a70dd89d1d 100644 --- a/test/trace2.test +++ b/test/trace2.test @@ -128,7 +128,7 @@ ifcapable fts3 { INSERT INTO x1 VALUES('North northwest wind between 8 and 14 mph'); } { "INSERT INTO x1 VALUES('North northwest wind between 8 and 14 mph');" - "-- INSERT INTO 'main'.'x1_content' VALUES(?,?)" + "-- INSERT INTO 'main'.'x1_content' VALUES(?,(?))" "-- REPLACE INTO 'main'.'x1_docsize' VALUES(?,?)" "-- SELECT value FROM 'main'.'x1_stat' WHERE id=0" "-- REPLACE INTO 'main'.'x1_stat' VALUES(0,?)" @@ -141,8 +141,8 @@ ifcapable fts3 { INSERT INTO x1(x1) VALUES('optimize'); } { "INSERT INTO x1(x1) VALUES('optimize');" - "-- SELECT count(*), max(level) FROM 'main'.'x1_segdir'" "-- SELECT idx, start_block, leaves_end_block, end_block, root FROM 'main'.'x1_segdir' ORDER BY level DESC, idx ASC" + "-- SELECT count(*), max(level) FROM 'main'.'x1_segdir'" "-- SELECT coalesce((SELECT max(blockid) FROM 'main'.'x1_segments') + 1, 1)" "-- DELETE FROM 'main'.'x1_segdir'" "-- INSERT INTO 'main'.'x1_segdir' VALUES(?,?,?,?,?,?)"