int nName; /* Bytes required to hold table name */
int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
int bNoDocsize = 0; /* True to omit %_docsize table */
+ int bPrefix = 0; /* True to include a prefix-search index */
const char **aCol; /* Array of column names */
sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
}else if( nKey==10 && 0==sqlite3_strnicmp(z, "uncompress", 10) ){
zUncompress = zVal;
zVal = 0;
+ }else if( nKey==6 && 0==sqlite3_strnicmp(z, "prefix", 6) ){
+ bPrefix = 1;
}else{
*pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
rc = SQLITE_ERROR;
p->bHasStat = isFts4;
TESTONLY( p->inTransaction = -1 );
TESTONLY( p->mxSavepoint = -1 );
+ p->bPrefix = bPrefix;
fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1);
+ fts3HashInit(&p->pendingPrefixes, FTS3_HASH_STRING, 1);
/* Fill in the zName and zDb fields of the vtab structure. */
zCsr = (char *)&p->azColumn[nCol];
return SQLITE_OK;
}
+static int fts3SegReaderCursorAppend(
+ Fts3SegReaderCursor *pCsr,
+ Fts3SegReader *pNew
+){
+ if( (pCsr->nSegment%16)==0 ){
+ Fts3SegReader **apNew;
+ int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
+ apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
+ if( !apNew ){
+ sqlite3Fts3SegReaderFree(pNew);
+ return SQLITE_NOMEM;
+ }
+ pCsr->apSegment = apNew;
+ }
+ pCsr->apSegment[pCsr->nSegment++] = pNew;
+ return SQLITE_OK;
+}
+
+/*
+** Set up a cursor object for iterating through the full-text index or
+** a single level therein.
+*/
int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLevel, /* Level of segments to scan */
int rc2;
int iAge = 0;
sqlite3_stmt *pStmt = 0;
- Fts3SegReader *pPending = 0;
- assert( iLevel==FTS3_SEGCURSOR_ALL
+ assert( iLevel==FTS3_SEGCURSOR_ALL_TERM
|| iLevel==FTS3_SEGCURSOR_PENDING
+ || iLevel==FTS3_SEGCURSOR_PENDING_PREFIX
+ || iLevel==FTS3_SEGCURSOR_ALL_PREFIX
|| iLevel>=0
);
- assert( FTS3_SEGCURSOR_PENDING<0 );
- assert( FTS3_SEGCURSOR_ALL<0 );
- assert( iLevel==FTS3_SEGCURSOR_ALL || (zTerm==0 && isPrefix==1) );
+ assert( 0>FTS3_SEGCURSOR_ALL_TERM
+ && 0>FTS3_SEGCURSOR_PENDING
+ && 0>FTS3_SEGCURSOR_PENDING_PREFIX
+ && 0>FTS3_SEGCURSOR_ALL_PREFIX
+ );
+ assert( iLevel==FTS3_SEGCURSOR_ALL_TERM
+ || iLevel==FTS3_SEGCURSOR_ALL_PREFIX
+ || (zTerm==0 && isPrefix==1)
+ );
assert( isPrefix==0 || isScan==0 );
-
memset(pCsr, 0, sizeof(Fts3SegReaderCursor));
- /* If iLevel is less than 0, include a seg-reader for the pending-terms. */
+ /* "isScan" is only set to true by the ft4aux module, not an ordinary
+ ** full-text table. The pendingTerms and pendingPrefixes tables must be
+ ** empty in this case. */
assert( isScan==0 || fts3HashCount(&p->pendingTerms)==0 );
+ assert( isScan==0 || fts3HashCount(&p->pendingPrefixes)==0 );
+
+ /* If iLevel is less than 0, include a seg-reader for the pending-terms. */
if( iLevel<0 && isScan==0 ){
- rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pPending);
+ int bPrefix = (
+ iLevel==FTS3_SEGCURSOR_PENDING_PREFIX
+ || iLevel==FTS3_SEGCURSOR_ALL_PREFIX
+ );
+ Fts3SegReader *pPending = 0;
+
+ rc = sqlite3Fts3SegReaderPending(p,zTerm,nTerm,isPrefix,bPrefix,&pPending);
if( rc==SQLITE_OK && pPending ){
- int nByte = (sizeof(Fts3SegReader *) * 16);
- pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);
- if( pCsr->apSegment==0 ){
- rc = SQLITE_NOMEM;
- }else{
- pCsr->apSegment[0] = pPending;
- pCsr->nSegment = 1;
- pPending = 0;
- }
+ rc = fts3SegReaderCursorAppend(pCsr, pPending);
}
}
- if( iLevel!=FTS3_SEGCURSOR_PENDING ){
+ if( iLevel!=FTS3_SEGCURSOR_PENDING && iLevel!=FTS3_SEGCURSOR_PENDING_PREFIX ){
if( rc==SQLITE_OK ){
rc = sqlite3Fts3AllSegdirs(p, iLevel, &pStmt);
}
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
+ Fts3SegReader *pSeg = 0;
/* Read the values returned by the SELECT into local variables. */
sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
int nRoot = sqlite3_column_bytes(pStmt, 4);
char const *zRoot = sqlite3_column_blob(pStmt, 4);
- /* If nSegment is a multiple of 16 the array needs to be extended. */
- if( (pCsr->nSegment%16)==0 ){
- Fts3SegReader **apNew;
- int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
- apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
- if( !apNew ){
- rc = SQLITE_NOMEM;
- goto finished;
- }
- pCsr->apSegment = apNew;
- }
-
/* If zTerm is not NULL, and this segment is not stored entirely on its
** root node, the range of leaves scanned can be reduced. Do this. */
if( iStartBlock && zTerm ){
}
rc = sqlite3Fts3SegReaderNew(iAge, iStartBlock, iLeavesEndBlock,
- iEndBlock, zRoot, nRoot, &pCsr->apSegment[pCsr->nSegment]
+ iEndBlock, zRoot, nRoot, &pSeg
);
if( rc!=SQLITE_OK ) goto finished;
- pCsr->nSegment++;
+ rc = fts3SegReaderCursorAppend(pCsr, pSeg);
iAge++;
}
}
finished:
rc2 = sqlite3_reset(pStmt);
if( rc==SQLITE_DONE ) rc = rc2;
- sqlite3Fts3SegReaderFree(pPending);
return rc;
}
pSegcsr = sqlite3_malloc(sizeof(Fts3SegReaderCursor));
if( pSegcsr ){
- Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
int i;
int nCost = 0;
- rc = sqlite3Fts3SegReaderCursor(
- p, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr);
+ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+
+ if( isPrefix && p->bPrefix && nTerm<=FTS3_MAX_PREFIX ){
+ rc = sqlite3Fts3SegReaderCursor(
+ p, FTS3_SEGCURSOR_ALL_PREFIX, zTerm, nTerm, 0, 0, pSegcsr);
+
+ }else{
+ rc = sqlite3Fts3SegReaderCursor(
+ p, FTS3_SEGCURSOR_ALL_TERM, zTerm, nTerm, isPrefix, 0, pSegcsr);
+ }
for(i=0; rc==SQLITE_OK && i<pSegcsr->nSegment; i++){
rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost);
assert( p->inTransaction!=0 );
TESTONLY( p->inTransaction = 0 );
TESTONLY( p->mxSavepoint = -1; );
+ sqlite3Fts3PendingPrefixesClear((Fts3Table *)pVtab);
return SQLITE_OK;
}
assert( p->mxSavepoint >= iSavepoint );
TESTONLY( p->mxSavepoint = iSavepoint );
sqlite3Fts3PendingTermsClear(p);
+ sqlite3Fts3PendingPrefixesClear((Fts3Table *)pVtab);
return SQLITE_OK;
}
#include "fts3_tokenizer.h"
#include "fts3_hash.h"
+#define FTS3_MAX_PREFIX 8
+
/*
** This constant controls how often segments are merged. Once there are
** FTS3_MERGE_COUNT segments of level N, they are merged into a single
*/
#define FTS3_VARINT_MAX 10
+/*
+** FTS4 virtual tables may maintain two separate indexes. One that indexes
+** all document terms (the same index FTS3 tables maintain) and another used
+** for prefixes. B+-trees that are part of the prefix index have values for
+** the %_segdir.level column that are equal to or greater than the following
+** value.
+**
+** It is considered impossible for the regular index to use levels this large.
+** In theory it could, but that would require that at least 2^1024 separate
+** write operations to be made within the lifetime of the database.
+*/
+#define FTS3_SEGDIR_PREFIXLEVEL 1024
+#define FTS3_SEGDIR_PREFIXLEVEL_STR "1024"
+
/*
** The testcase() macro is only used by the amalgamation. If undefined,
** make it a no-op.
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
- sqlite3_stmt *aStmt[24];
+ sqlite3_stmt *aStmt[27];
char *zReadExprlist;
char *zWriteExprlist;
int nNodeSize; /* Soft limit for node size */
u8 bHasStat; /* True if %_stat table exists */
u8 bHasDocsize; /* True if %_docsize table exists */
+ u8 bPrefix; /* True if there is a prefix index */
int nPgsz; /* Page size for host database */
char *zSegmentsTbl; /* Name of %_segments table */
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
int nPendingData;
sqlite_int64 iPrevDocid;
Fts3Hash pendingTerms;
+ Fts3Hash pendingPrefixes;
#if defined(SQLITE_DEBUG)
/* State variables used for validating that the transaction control
int sqlite3Fts3Optimize(Fts3Table *);
int sqlite3Fts3SegReaderNew(int, sqlite3_int64,
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
-int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**);
+int sqlite3Fts3SegReaderPending(
+ Fts3Table*,const char*,int,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **);
char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *);
void sqlite3Fts3SegmentsClose(Fts3Table *);
-#define FTS3_SEGCURSOR_PENDING -1
-#define FTS3_SEGCURSOR_ALL -2
+/* Special values interpreted by sqlite3SegReaderCursor() */
+#define FTS3_SEGCURSOR_PENDING -1
+#define FTS3_SEGCURSOR_PENDING_PREFIX -2
+#define FTS3_SEGCURSOR_ALL_PREFIX -3
+#define FTS3_SEGCURSOR_ALL_TERM -4
int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*);
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *);
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
}
- rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL,
+ rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL_TERM,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){
struct Fts3termTable {
sqlite3_vtab base; /* Base class used by SQLite core */
+ int bPrefix; /* True for an fts4prefix table */
Fts3Table *pFts3Tab;
};
*/
static int fts3termConnectMethod(
sqlite3 *db, /* Database connection */
- void *pUnused, /* Unused */
+ void *pCtx, /* Non-zero for an fts4prefix table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
int rc; /* value returned by declare_vtab() */
Fts3termTable *p; /* Virtual table object to return */
- UNUSED_PARAMETER(pUnused);
-
/* The user should specify a single argument - the name of an fts3 table. */
if( argc!=4 ){
*pzErr = sqlite3_mprintf(
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
p->pFts3Tab->db = db;
+ p->bPrefix = (int)pCtx;
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
- Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
+ Fts3termTable *p = (Fts3termTable *)pCursor->pVtab;
+ Fts3Table *pFts3 = p->pFts3Tab;
int rc;
UNUSED_PARAMETER(nVal);
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
- rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL,
+ rc = sqlite3Fts3SegReaderCursor(pFts3,
+ p->bPrefix ? FTS3_SEGCURSOR_ALL_PREFIX : FTS3_SEGCURSOR_ALL_TERM,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
);
if( rc==SQLITE_OK ){
int rc; /* Return code */
rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_module(db, "fts4prefix", &fts3term_module, (void*)1);
+ }
return rc;
}
** fts3NodeAddTerm()
** fts3NodeWrite()
** fts3NodeFree()
+**
+** When a b+tree is written to the database (either as a result of a merge
+** or the pending-terms table being flushed), leaves are written into the
+** database file as soon as they are completely populated. The interior of
+** the tree is assembled in memory and written out only once all leaves have
+** been populated and stored. This is Ok, as the b+-tree fanout is usually
+** very large, meaning that the interior of the tree consumes relatively
+** little memory.
*/
struct SegmentNode {
SegmentNode *pParent; /* Parent node (or NULL for root node) */
#define SQL_SELECT_LEVEL 12
#define SQL_SELECT_ALL_LEVEL 13
#define SQL_SELECT_LEVEL_COUNT 14
-#define SQL_SELECT_SEGDIR_COUNT_MAX 15
+#define SQL_SELECT_SEGDIR_MAX_LEVEL 15
#define SQL_DELETE_SEGDIR_BY_LEVEL 16
#define SQL_DELETE_SEGMENTS_RANGE 17
#define SQL_CONTENT_INSERT 18
#define SQL_SELECT_DOCSIZE 21
#define SQL_SELECT_DOCTOTAL 22
#define SQL_REPLACE_DOCTOTAL 23
+#define SQL_SELECT_ALL_PREFIX_LEVEL 24
+
+#define SQL_DELETE_ALL_TERMS_SEGDIR 25
+#define SQL_DELETE_ALL_PREFIX_SEGDIR 26
/*
** This function is used to obtain an SQLite prepared statement handle
/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
"FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC",
/* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
- "FROM %Q.'%q_segdir' ORDER BY level DESC, idx ASC",
+ "FROM %Q.'%q_segdir' WHERE level < " FTS3_SEGDIR_PREFIXLEVEL_STR
+ " ORDER BY level DESC, idx ASC",
/* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?",
-/* 15 */ "SELECT count(*), max(level) FROM %Q.'%q_segdir'",
+/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level < (?+1)*"
+ FTS3_SEGDIR_PREFIXLEVEL_STR,
/* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?",
/* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?",
/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=0",
/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
+/* 24 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
+ "FROM %Q.'%q_segdir' WHERE level >= " FTS3_SEGDIR_PREFIXLEVEL_STR
+ " ORDER BY level DESC, idx ASC",
+/* 25 */ "DELETE FROM %Q.'%q_segdir' WHERE level<" FTS3_SEGDIR_PREFIXLEVEL_STR,
+/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level>=" FTS3_SEGDIR_PREFIXLEVEL_STR,
};
int rc = SQLITE_OK;
sqlite3_stmt *pStmt;
int sqlite3Fts3AllSegdirs(Fts3Table *p, int iLevel, sqlite3_stmt **ppStmt){
int rc;
sqlite3_stmt *pStmt = 0;
- if( iLevel<0 ){
+ if( iLevel==FTS3_SEGCURSOR_ALL_PREFIX ){
+ rc = fts3SqlStmt(p, SQL_SELECT_ALL_PREFIX_LEVEL, &pStmt, 0);
+ }else if( iLevel==FTS3_SEGCURSOR_ALL_TERM ){
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, &pStmt, 0);
}else{
+ assert( iLevel>=0 );
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
if( rc==SQLITE_OK ) sqlite3_bind_int(pStmt, 1, iLevel);
}
return 0;
}
+static int fts3PendingTermsAddOne(
+ Fts3Table *p,
+ int iCol,
+ int iPos,
+ Fts3Hash *pHash,
+ const char *zToken,
+ int nToken
+){
+ PendingList *pList;
+ int rc = SQLITE_OK;
+
+ pList = (PendingList *)fts3HashFind(pHash, zToken, nToken);
+ if( pList ){
+ p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
+ }
+ if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
+ if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){
+ /* Malloc failed while inserting the new entry. This can only
+ ** happen if there was no previous entry for this token.
+ */
+ assert( 0==fts3HashFind(pHash, zToken, nToken) );
+ sqlite3_free(pList);
+ rc = SQLITE_NOMEM;
+ }
+ }
+ if( rc==SQLITE_OK ){
+ p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
+ }
+ return rc;
+}
+
+
+
+
/*
** Tokenize the nul-terminated string zText and add all tokens to the
** pending-terms hash-table. The docid used is that currently stored in
while( SQLITE_OK==rc
&& SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
){
- PendingList *pList;
-
if( iPos>=nWord ) nWord = iPos+1;
/* Positions cannot be negative; we use -1 as a terminator internally.
break;
}
- pList = (PendingList *)fts3HashFind(&p->pendingTerms, zToken, nToken);
- if( pList ){
- p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
- }
- if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
- if( pList==fts3HashInsert(&p->pendingTerms, zToken, nToken, pList) ){
- /* Malloc failed while inserting the new entry. This can only
- ** happen if there was no previous entry for this token.
- */
- assert( 0==fts3HashFind(&p->pendingTerms, zToken, nToken) );
- sqlite3_free(pList);
- rc = SQLITE_NOMEM;
+ rc = fts3PendingTermsAddOne(p,iCol,iPos,&p->pendingTerms,zToken,nToken);
+ if( p->bPrefix ){
+ int n = (nToken > FTS3_MAX_PREFIX ? FTS3_MAX_PREFIX : nToken);
+ for(; n>0 && rc==SQLITE_OK; n--){
+ rc = fts3PendingTermsAddOne(p,iCol,iPos,&p->pendingPrefixes,zToken,n);
}
}
- if( rc==SQLITE_OK ){
- p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
- }
}
pModule->xClose(pCsr);
p->nPendingData = 0;
}
+/*
+** Discard the contents of the pending-prefixes hash table.
+*/
+void sqlite3Fts3PendingPrefixesClear(Fts3Table *p){
+ Fts3HashElem *pElem;
+ for(pElem=fts3HashFirst(&p->pendingPrefixes); pElem; pElem=fts3HashNext(pElem)){
+ sqlite3_free(fts3HashData(pElem));
+ }
+ fts3HashClear(&p->pendingPrefixes);
+}
+
/*
** This function is called by the xUpdate() method as part of an INSERT
** operation. It adds entries for each term in the new record to the
/* Discard the contents of the pending-terms hash table. */
sqlite3Fts3PendingTermsClear(p);
+ sqlite3Fts3PendingPrefixesClear(p);
/* Delete everything from the %_content, %_segments and %_segdir tables. */
fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
/*
** This function is used to allocate an Fts3SegReader that iterates through
** a subset of the terms stored in the Fts3Table.pendingTerms array.
+**
+** If the isPrefixIter parameter is zero, then the returned SegReader iterates
+** through each term in the pending-terms table. Or, if isPrefixIter is
+** non-zero, it iterates through each term and its prefixes. For example, if
+** the pending terms hash table contains the terms "sqlite", "mysql" and
+** "firebird", then the iterator visits the following 'terms' (in the order
+** shown):
+**
+** f fi fir fire fireb firebi firebir firebird
+** m my mys mysq mysql
+** s sq sql sqli sqlit sqlite
+**
+** Whereas if isPrefixIter is zero, the terms visited are:
+**
+** firebird mysql sqlite
*/
int sqlite3Fts3SegReaderPending(
Fts3Table *p, /* Virtual table handle */
const char *zTerm, /* Term to search for */
int nTerm, /* Size of buffer zTerm */
- int isPrefix, /* True for a term-prefix query */
+ int isMultiTerm, /* True to visit multiple terms */
+ int isPrefixIter, /* 0->pendingTerms, 1->pendingPrefixes */
Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */
){
Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */
int nElem = 0; /* Size of array at aElem */
int rc = SQLITE_OK; /* Return Code */
- if( isPrefix ){
+ if( isMultiTerm ){
int nAlloc = 0; /* Size of allocated array at aElem */
Fts3HashElem *pE = 0; /* Iterator variable */
+ Fts3Hash *pHash;
- for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){
+ pHash = (isPrefixIter ? &p->pendingPrefixes : &p->pendingTerms);
+
+ for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
char *zKey = (char *)fts3HashKey(pE);
int nKey = fts3HashKeysize(pE);
if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){
}
aElem = aElem2;
}
+
aElem[nElem++] = pE;
}
}
}
}else{
+ /* The query is a simple term lookup that matches at most one term in
+ ** the index. All that is required is a straight hash-lookup. */
Fts3HashElem *pE = fts3HashFindElem(&p->pendingTerms, zTerm, nTerm);
if( pE ){
aElem = &pE;
}
}
- if( isPrefix ){
+ if( isMultiTerm ){
sqlite3_free(aElem);
}
*ppReader = pReader;
}
/*
-** Set *pnSegment to the total number of segments in the database. Set
-** *pnMax to the largest segment level in the database (segment levels
-** are stored in the 'level' column of the %_segdir table).
+** Set *pnMax to the largest segment level in the database for either the
+** terms index (if parameter bPrefixIndex is 0) or the prefixes index (if
+** parameter bPrefixIndex is 1).
+**
+** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
-static int fts3SegmentCountMax(Fts3Table *p, int *pnSegment, int *pnMax){
+static int fts3SegmentMaxLevel(Fts3Table *p, int bPrefixIndex, int *pnMax){
sqlite3_stmt *pStmt;
int rc;
+ assert( bPrefixIndex==0 || bPrefixIndex==1 );
- rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_COUNT_MAX, &pStmt, 0);
+ /* Set pStmt to the compiled version of:
+ **
+ ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level < (?+1) * 1024
+ **
+ ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
+ */
+ rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
if( rc!=SQLITE_OK ) return rc;
+ sqlite3_bind_int(pStmt, 1, bPrefixIndex);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
- *pnSegment = sqlite3_column_int(pStmt, 0);
- *pnMax = sqlite3_column_int(pStmt, 1);
+ *pnMax = sqlite3_column_int(pStmt, 0);
}
return sqlite3_reset(pStmt);
}
return rc;
}
- if( iLevel==FTS3_SEGCURSOR_ALL ){
- fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
+ assert( iLevel>=0
+ || iLevel==FTS3_SEGCURSOR_ALL_TERM
+ || iLevel==FTS3_SEGCURSOR_ALL_PREFIX
+ || iLevel==FTS3_SEGCURSOR_PENDING
+ || iLevel==FTS3_SEGCURSOR_PENDING_PREFIX
+ );
+ if( iLevel==FTS3_SEGCURSOR_ALL_TERM ){
+ fts3SqlExec(&rc, p, SQL_DELETE_ALL_TERMS_SEGDIR, 0);
+ }else if( iLevel==FTS3_SEGCURSOR_ALL_PREFIX ){
+ fts3SqlExec(&rc, p, SQL_DELETE_ALL_PREFIX_SEGDIR, 0);
+ }else if( iLevel==FTS3_SEGCURSOR_PENDING_PREFIX ){
+ sqlite3Fts3PendingPrefixesClear(p);
}else if( iLevel==FTS3_SEGCURSOR_PENDING ){
sqlite3Fts3PendingTermsClear(p);
- }else{
- assert( iLevel>=0 );
+ }else if( iLevel>=0 ){
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_BY_LEVEL, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pDelete, 1, iLevel);
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
Fts3SegFilter filter; /* Segment term filter condition */
Fts3SegReaderCursor csr; /* Cursor to iterate through level(s) */
+ int bIgnoreEmpty = 0; /* True to ignore empty segments */
rc = sqlite3Fts3SegReaderCursor(p, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
- if( iLevel==FTS3_SEGCURSOR_ALL ){
+ if( iLevel==FTS3_SEGCURSOR_ALL_TERM || iLevel==FTS3_SEGCURSOR_ALL_PREFIX ){
/* This call is to merge all segments in the database to a single
** segment. The level of the new segment is equal to the the numerically
** greatest segment level currently present in the database. The index
** of the new segment is always 0. */
- int nDummy; /* TODO: Remove this */
if( csr.nSegment==1 ){
rc = SQLITE_DONE;
goto finished;
}
- rc = fts3SegmentCountMax(p, &nDummy, &iNewLevel);
+ rc = fts3SegmentMaxLevel(p, iLevel==FTS3_SEGCURSOR_ALL_PREFIX, &iNewLevel);
+ bIgnoreEmpty = 1;
}else{
- /* This call is to merge all segments at level iLevel. Find the next
+ /* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
- iNewLevel = iLevel+1;
+ if( iLevel==FTS3_SEGCURSOR_PENDING ){
+ iNewLevel = 0;
+ }else if( iLevel==FTS3_SEGCURSOR_PENDING_PREFIX ){
+ iNewLevel = FTS3_SEGDIR_PREFIXLEVEL;
+ }else{
+ iNewLevel = iLevel+1;
+ }
rc = fts3AllocateSegdirIdx(p, iNewLevel, &iIdx);
}
if( rc!=SQLITE_OK ) goto finished;
memset(&filter, 0, sizeof(Fts3SegFilter));
filter.flags = FTS3_SEGMENT_REQUIRE_POS;
- filter.flags |= (iLevel==FTS3_SEGCURSOR_ALL ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
+ filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
while( SQLITE_OK==rc ){
** Flush the contents of pendingTerms to a level 0 segment.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
- return fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING);
+ int rc = SQLITE_OK;
+ if( p->bPrefix ){
+ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING_PREFIX);
+ }
+ if( rc==SQLITE_OK || rc==SQLITE_DONE ){
+ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING);
+ }
+ if( rc==SQLITE_DONE ){
+ rc = SQLITE_OK;
+ }
+ return rc;
}
/*
if( !zVal ){
return SQLITE_NOMEM;
}else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
- rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL);
- if( rc==SQLITE_DONE ){
- rc = SQLITE_OK;
- }else{
- sqlite3Fts3PendingTermsClear(p);
+ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_PREFIX);
+ if( rc==SQLITE_OK ){
+ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_TERM);
}
#ifdef SQLITE_TEST
}else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
}
sqlite3Fts3SegmentsClose(p);
+ sqlite3Fts3PendingTermsClear(p);
+ sqlite3Fts3PendingPrefixesClear(p);
return rc;
}
*/
int sqlite3Fts3Optimize(Fts3Table *p){
int rc;
+ int bReturnDone = 0;
rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
if( rc==SQLITE_OK ){
- rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL);
+ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_PREFIX);
+ if( rc==SQLITE_OK ){
+ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_TERM);
+ }
+ if( rc==SQLITE_DONE ){
+ bReturnDone = 1;
+ rc = SQLITE_OK;
+ }
if( rc==SQLITE_OK ){
rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
if( rc==SQLITE_OK ){
sqlite3Fts3PendingTermsClear(p);
+ sqlite3Fts3PendingPrefixesClear(p);
}
}else{
sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
}
}
sqlite3Fts3SegmentsClose(p);
- return rc;
+ return ((rc==SQLITE_OK && bReturnDone) ? SQLITE_DONE : rc);
}
#endif
-C Do\snot\sinvoke\sthe\sxRollbackTo\sor\sxRelease\smethods\sof\sa\svirtual\stable\swithout\nhaving\sfirst\sinvoked\san\sappropriate\sxSavepoint\smethod.\s\sAdd\sassert()\sstatements\nto\sFTS3/4\sto\sverify\sthat\sthis\sis\shappening\sin\sall\scases.
-D 2011-05-24T15:36:01.532
+C If\sthe\sfts4\soption\sprefix=1\sis\sspecified,\shave\sthe\sfts4\smodule\smaintain\san\sindex\sof\sprefixes\sas\swell\sas\sterms.
+D 2011-05-24T18:49:45.786
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 11dcc00a8d0e5202def00e81732784fb0cc4fe1d
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c e7600e285b58027657ebb46c6132b7d5a100accb
+F ext/fts3/fts3.c 4a48bfef342badba0a71bdeb5354edaa3ad83382
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
-F ext/fts3/fts3Int.h c8c0011c5e5b3a7703376ea6cd7deb91cfb96a06
-F ext/fts3/fts3_aux.c 9e931f55eed8498dafe7bc1160f10cbb1a652fdf
+F ext/fts3/fts3Int.h 02699211c0b6cf5aa713cc68c527c9a6e9159fbe
+F ext/fts3/fts3_aux.c d68d8e4d39e0342302d2c834618755af7c8058ea
F ext/fts3/fts3_expr.c 5f49e0deaf723724b08100bb3ff40aab02ad0c93
F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c d61cfd81fb0fd8fbcb25adcaee0ba671aefaa5c2
F ext/fts3/fts3_snippet.c 92b40397b28422c35c4127492d7ac6da34d1966a
-F ext/fts3/fts3_term.c f115f5a5f4298303d3b22fc6c524b8d565c7b950
+F ext/fts3/fts3_term.c cd226a311940b8ef414d5c1f7c74971a47cacedb
F ext/fts3/fts3_tokenizer.c 055f3dc7369585350b28db1ee0f3b214dca6724d
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
-F ext/fts3/fts3_write.c b50181e5ecf484c2f56e98d651424e4b69f96c89
+F ext/fts3/fts3_write.c 0fd6a55c774731852f889007fc6edb1b99819ee5
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
F test/fts3malloc.test 9c8cc3f885bb4dfc66d0460c52f68f45e4710d1b
F test/fts3matchinfo.test cc0b009edbbf575283d5fdb53271179e0d8019ba
F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
+F test/fts3prefix.test 5b4e08c63d5d4a79e54754dc6b2209b03c885200
F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2
F test/fts3rnd.test 2b1a579be557ab8ac54a51b39caa4aa8043cc4ad
F test/fts3shared.test 8bb266521d7c5495c0ae522bb4d376ad5387d4a2
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/split-sqlite3c.tcl d9be87f1c340285a3e081eb19b4a247981ed290c
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P a9d095660ca0e99b226e0fe669c11a0be6c49710
-R 19875cc50e6351fd8eaf432c5942eda7
-U drh
-Z f15ff60c160c7edadf458f2c05939bd4
+P 651ef24249d8c22c4f13e4c0bb98a60099cfd23a
+R c499b030bb5d74c4afca730dec7a05bc
+T *branch * fts3-prefix-search
+T *sym-fts3-prefix-search *
+T -sym-trunk *
+U dan
+Z 0c60a0f2885eb6df2e24e22407faffbe
-651ef24249d8c22c4f13e4c0bb98a60099cfd23a
\ No newline at end of file
+b5bdc639898ee22eebedeb560810e94e74de8aa4
\ No newline at end of file
--- /dev/null
+# 2011 May 04
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library. The
+# focus of this script is testing the FTS3 module.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+set testprefix fts3prefix
+
+# This proc tests that the prefixes index appears to represent the same content
+# as the terms index.
+#
+proc fts3_terms_and_prefixes {db tbl} {
+ $db eval "CREATE VIRTUAL TABLE fts3check1 USING fts4term($tbl);"
+ $db eval "CREATE VIRTUAL TABLE fts3check2 USING fts4prefix($tbl);"
+
+ $db eval {
+ CREATE TEMP TABLE terms AS SELECT * FROM fts3check1;
+ CREATE TEMP TABLE prefixes AS SELECT * FROM fts3check2;
+ CREATE INDEX temp.idx ON prefixes(term);
+ DROP TABLE fts3check1;
+ DROP TABLE fts3check2;
+ }
+
+ $db eval { SELECT term, docid, col, pos FROM temp.terms } a {
+ set nMax [expr [string length $a(term)] - 1]
+ if {$nMax>8} {set nMax 8}
+ for {set n 0} {$n < $nMax} {incr n} {
+ set t [string range $a(term) 0 $n]
+ set r [$db one {
+ SELECT count(*) FROM temp.prefixes WHERE
+ term = $t AND docid = $a(docid) AND col = $a(col) AND pos = $a(pos)
+ }]
+ if {$r != 1} {
+ error "$t, $a(docid), $a(col), $a(pos)"
+ }
+ }
+ }
+
+ execsql { DROP TABLE temp.prefixes }
+ execsql { DROP TABLE temp.terms }
+
+ set terms_layout [$db eval "
+ SELECT level, idx FROM ${tbl}_segdir WHERE level < 1024 ORDER by 1, 2
+ "]
+ set prefixes_layout [$db eval "
+ SELECT level-1024, idx FROM ${tbl}_segdir WHERE level >= 1024 ORDER by 1, 2
+ "]
+
+ if {$terms_layout != $prefixes_layout} {
+ puts "TERMS LAYOUT: $terms_layout"
+ puts "PREFIX LAYOUT: $prefixes_layout"
+ error "Terms and prefixes are comprised of different b-trees"
+ }
+
+ return ""
+}
+proc fts3_tap_test {tn db tbl} {
+ uplevel [list do_test $tn [list fts3_terms_and_prefixes $db $tbl] ""]
+}
+
+#-------------------------------------------------------------------------
+# Test cases 1.* are a sanity check. They test that the prefixes index is
+# being constructed correctly for the simplest possible case.
+#
+do_execsql_test 1.1 {
+ CREATE VIRTUAL TABLE t1 USING fts4(prefix=1);
+ CREATE VIRTUAL TABLE prefixes USING fts4prefix(t1);
+ CREATE VIRTUAL TABLE terms USING fts4term(t1);
+}
+do_execsql_test 1.2 {
+ INSERT INTO t1 VALUES('sqlite mysql firebird');
+}
+do_execsql_test 1.3 {
+ SELECT term FROM prefixes;
+} {f fi fir fire fireb firebi firebir firebird m my mys mysq mysql s sq sql sqli sqlit sqlite}
+do_execsql_test 1.4 {
+ SELECT term FROM terms;
+} {firebird mysql sqlite}
+
+fts3_tap_test 1.5 db t1
+
+#-------------------------------------------------------------------------
+# A slightly more complicated dataset. This test also verifies that DELETE
+# operations do not corrupt the prefixes index.
+#
+do_execsql_test 2.1 {
+ INSERT INTO t1 VALUES('FTS3 and FTS4 are an SQLite virtual table modules');
+ INSERT INTO t1 VALUES('that allows users to perform full-text searches on');
+ INSERT INTO t1 VALUES('a set of documents. The most common (and');
+ INSERT INTO t1 VALUES('effective) way to describe full-text searches is');
+ INSERT INTO t1 VALUES('"what Google, Yahoo and Altavista do with');
+ INSERT INTO t1 VALUES('documents placed on the World Wide Web". Users');
+ INSERT INTO t1 VALUES('input a term, or series of terms, perhaps');
+ INSERT INTO t1 VALUES('connected by a binary operator or grouped together');
+ INSERT INTO t1 VALUES('into a phrase, and the full-text query system');
+ INSERT INTO t1 VALUES('finds the set of documents that best matches those');
+ INSERT INTO t1 VALUES('terms considering the operators and groupings the');
+ INSERT INTO t1 VALUES('user has specified. This article describes the');
+ INSERT INTO t1 VALUES('deployment and usage of FTS3 and FTS4.');
+ INSERT INTO t1 VALUES('FTS1 and FTS2 are obsolete full-text search');
+ INSERT INTO t1 VALUES('modules for SQLite. There are known issues with');
+ INSERT INTO t1 VALUES('these older modules and their use should be');
+ INSERT INTO t1 VALUES('avoided. Portions of the original FTS3 code were');
+ INSERT INTO t1 VALUES('contributed to the SQLite project by Scott Hess of');
+ INSERT INTO t1 VALUES('Google. It is now developed and maintained as part');
+ INSERT INTO t1 VALUES('of SQLite. ');
+}
+fts3_tap_test 2.2 db t1
+do_execsql_test 2.3 { DELETE FROM t1 WHERE docid%2; }
+fts3_tap_test 2.4 db t1
+
+do_execsql_test 2.5 { INSERT INTO t1(t1) VALUES('optimize') }
+fts3_tap_test 2.6 db t1
+
+do_execsql_test 3.1 {
+ CREATE VIRTUAL TABLE t2 USING fts4(prefix=1);
+ INSERT INTO t2 VALUES('On 12 September the wind direction turned and');
+ INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the');
+ INSERT INTO t2 VALUES('fleet was forced to take shelter at');
+ INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind');
+ INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet');
+ INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey');
+ INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved');
+ INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he');
+ INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of');
+ INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland');
+ INSERT INTO t2 VALUES('and waited for Harold''s return from the north.');
+ INSERT INTO t2 VALUES('On 12 September the wind direction turned and');
+ INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the');
+ INSERT INTO t2 VALUES('fleet was forced to take shelter at');
+ INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind');
+ INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet');
+ INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey');
+ INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved');
+ INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he');
+ INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of');
+ INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland');
+ INSERT INTO t2 VALUES('and waited for Harold''s return from the north.');
+}
+
+fts3_tap_test 3.2 db t2
+do_execsql_test 3.3 { SELECT optimize(t2) FROM t2 LIMIT 1 } {{Index optimized}}
+fts3_tap_test 3.4 db t2
+
+
+#-------------------------------------------------------------------------
+# Simple tests for reading the prefix-index.
+#
+do_execsql_test 4.1 {
+ CREATE VIRTUAL TABLE t3 USING fts4(prefix=1);
+ INSERT INTO t3 VALUES('one two three');
+ INSERT INTO t3 VALUES('four five six');
+ INSERT INTO t3 VALUES('seven eight nine');
+}
+do_execsql_test 4.2 {
+ SELECT * FROM t3 WHERE t3 MATCH 'f*'
+} {{four five six}}
+do_execsql_test 4.3 {
+ SELECT * FROM t3 WHERE t3 MATCH 'four*'
+} {{four five six}}
+do_execsql_test 4.4 {
+ SELECT * FROM t3 WHERE t3 MATCH 's*'
+} {{four five six} {seven eight nine}}
+
+finish_test