From: dan Date: Mon, 23 Jun 2014 11:33:22 +0000 (+0000) Subject: Add some code for an experimental fts5 module. Does not work yet. X-Git-Tag: version-3.8.11~114^2~177 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e0fa4107c2e8921f5d019909d69bf163a7e31738;p=thirdparty%2Fsqlite.git Add some code for an experimental fts5 module. Does not work yet. FossilOrigin-Name: 1e0648dcf283d4f1f6159db4d2433b6cc635992e --- diff --git a/ext/fts3/fts3.h b/ext/fts3/fts3.h index c1aa8caf09..e99457eebd 100644 --- a/ext/fts3/fts3.h +++ b/ext/fts3/fts3.h @@ -20,6 +20,7 @@ extern "C" { #endif /* __cplusplus */ int sqlite3Fts3Init(sqlite3 *db); +int sqlite3Fts5Init(sqlite3 *db); #ifdef __cplusplus } /* extern "C" */ diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c new file mode 100644 index 0000000000..7a6c361068 --- /dev/null +++ b/ext/fts5/fts5.c @@ -0,0 +1,425 @@ +/* +** 2014 Jun 09 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is an SQLite module implementing full-text search. +*/ + +#include "fts5Int.h" + +typedef struct Fts5Table Fts5Table; + +struct Fts5Table { + sqlite3_vtab base; /* Base class used by SQLite core */ + Fts5Config *pConfig; /* Virtual table configuration */ + Fts5Index *pIndex; /* Full-text index */ + Fts5Storage *pStorage; /* Document store */ +}; + +/* +** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy +** argument is non-zero, attempt delete the shadow tables from teh database +*/ +static int fts5FreeVtab(Fts5Table *pTab, int bDestroy){ + int rc = SQLITE_OK; + if( pTab ){ + int rc2; + rc2 = sqlite3Fts5IndexClose(pTab->pIndex, bDestroy); + if( rc==SQLITE_OK ) rc = rc2; + rc2 = sqlite3Fts5StorageClose(pTab->pStorage, bDestroy); + if( rc==SQLITE_OK ) rc = rc2; + sqlite3Fts5ConfigFree(pTab->pConfig); + sqlite3_free(pTab); + } + return rc; +} + +/* +** The xDisconnect() virtual table method. +*/ +static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ + return fts5FreeVtab((Fts5Table*)pVtab, 0); +} + +/* +** The xDestroy() virtual table method. +*/ +static int fts5DestroyMethod(sqlite3_vtab *pVtab){ + return fts5FreeVtab((Fts5Table*)pVtab, 1); +} + +/* +** This function is the implementation of both the xConnect and xCreate +** methods of the FTS3 virtual table. +** +** The argv[] array contains the following: +** +** argv[0] -> module name ("fts5") +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> "column name" and other module argument fields. +*/ +static int fts5InitVtab( + int bCreate, /* True for xCreate, false for xConnect */ + sqlite3 *db, /* The SQLite database connection */ + void *pAux, /* Hash table containing tokenizers */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ + char **pzErr /* Write any error message here */ +){ + int rc; /* Return code */ + Fts5Config *pConfig; /* Results of parsing argc/argv */ + Fts5Table *pTab = 0; /* New virtual table object */ + + /* Parse the arguments */ + rc = sqlite3Fts5ConfigParse(db, argc, (const char**)argv, &pConfig, pzErr); + assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); + + /* Allocate the new vtab object */ + if( rc==SQLITE_OK ){ + pTab = (Fts5Table*)sqlite3_malloc(sizeof(Fts5Table)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pTab, 0, sizeof(Fts5Table)); + pTab->pConfig = pConfig; + } + } + + /* Open the index sub-system */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->pIndex, pzErr); + } + + /* Open the storage sub-system */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5StorageOpen( + pConfig, pTab->pIndex, bCreate, &pTab->pStorage, pzErr + ); + } + + /* Call sqlite3_declare_vtab() */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ConfigDeclareVtab(pConfig); + } + + if( rc!=SQLITE_OK ){ + fts5FreeVtab(pTab, 0); + pTab = 0; + } + *ppVTab = (sqlite3_vtab*)pTab; + return rc; +} + +/* +** The xConnect() and xCreate() methods for the virtual table. All the +** work is done in function fts5InitVtab(). +*/ +static int fts5ConnectMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); +} +static int fts5CreateMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); +} + +/* +** Implementation of the xBestIndex method for FTS3 tables. There +** are three possible strategies, in order of preference: +** +** 1. Direct lookup by rowid or docid. +** 2. Full-text search using a MATCH operator on a non-docid column. +** 3. Linear scan of %_content table. +*/ +static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ + return SQLITE_OK; +} + +/* +** Implementation of xOpen method. +*/ +static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + return SQLITE_OK; +} + +/* +** Close the cursor. For additional information see the documentation +** on the xClose method of the virtual table interface. +*/ +static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ + return SQLITE_OK; +} + + +/* +** Advance the cursor to the next row in the table that matches the +** search criteria. +** +** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned +** even if we reach end-of-file. The fts5EofMethod() will be called +** subsequently to determine whether or not an EOF was hit. +*/ +static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ + return SQLITE_OK; +} + +/* +** This is the xFilter interface for the virtual table. See +** the virtual table xFilter method documentation for additional +** information. +*/ +static int fts5FilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + return SQLITE_OK; +} + +/* +** This is the xEof method of the virtual table. SQLite calls this +** routine to find out if it has reached the end of a result set. +*/ +static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ + return 1; +} + +/* +** This is the xRowid method. The SQLite core calls this routine to +** retrieve the rowid for the current row of the result set. fts5 +** exposes %_content.docid as the rowid for the virtual table. The +** rowid should be written to *pRowid. +*/ +static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ + return SQLITE_OK; +} + +/* +** This is the xColumn method, called by SQLite to request a value from +** the row that the supplied cursor currently points to. +*/ +static int fts5ColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + return SQLITE_OK; +} + +/* +** This function is called to handle an FTS INSERT command. In other words, +** an INSERT statement of the form: +** +** INSERT INTO fts(fts) VALUES($pVal) +** +** Argument pVal is the value assigned to column "fts" by the INSERT +** statement. This function returns SQLITE_OK if successful, or an SQLite +** error code if an error occurs. +*/ +static int fts5SpecialCommand(Fts5Table *pTab, sqlite3_value *pVal){ + const char *z = sqlite3_value_text(pVal); + int n = sqlite3_value_bytes(pVal); + int rc = SQLITE_ERROR; + + if( 0==sqlite3_stricmp("integrity-check", z) ){ + rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); + }else + + if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){ + int pgsz = atoi(&z[5]); + if( pgsz<32 ) pgsz = 32; + sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); + rc = SQLITE_OK; + } + + return rc; +} + +/* +** This function is the implementation of the xUpdate callback used by +** FTS3 virtual tables. It is invoked by SQLite each time a row is to be +** inserted, updated or deleted. +*/ +static int fts5UpdateMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Size of argument array */ + sqlite3_value **apVal, /* Array of arguments */ + sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ +){ + Fts5Table *pTab = (Fts5Table*)pVtab; + Fts5Config *pConfig = pTab->pConfig; + int eType0; /* value_type() of apVal[0] */ + int eConflict; /* ON CONFLICT for this DML */ + int rc = SQLITE_OK; /* Return code */ + + assert( nArg==1 || nArg==(2 + pConfig->nCol + 1) ); + + if( SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ + return fts5SpecialCommand(pTab, apVal[2 + pConfig->nCol]); + } + + eType0 = sqlite3_value_type(apVal[0]); + eConflict = sqlite3_vtab_on_conflict(pConfig->db); + + assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); + if( eType0==SQLITE_INTEGER ){ + i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); + } + + if( rc==SQLITE_OK && nArg>1 ){ + rc = sqlite3Fts5StorageInsert(pTab->pStorage, apVal, eConflict, pRowid); + } + + return rc; +} + +/* +** Implementation of xSync() method. +*/ +static int fts5SyncMethod(sqlite3_vtab *pVtab){ + int rc; + Fts5Table *pTab = (Fts5Table*)pVtab; + rc = sqlite3Fts5IndexSync(pTab->pIndex); + return rc; +} + +/* +** Implementation of xBegin() method. +*/ +static int fts5BeginMethod(sqlite3_vtab *pVtab){ + return SQLITE_OK; +} + +/* +** Implementation of xCommit() method. This is a no-op. The contents of +** the pending-terms hash-table have already been flushed into the database +** by fts5SyncMethod(). +*/ +static int fts5CommitMethod(sqlite3_vtab *pVtab){ + return SQLITE_OK; +} + +/* +** Implementation of xRollback(). Discard the contents of the pending-terms +** hash-table. Any changes made to the database are reverted by SQLite. +*/ +static int fts5RollbackMethod(sqlite3_vtab *pVtab){ + Fts5Table *pTab = (Fts5Table*)pVtab; + int rc; + rc = sqlite3Fts5IndexRollback(pTab->pIndex); + return rc; +} + +/* +** This routine implements the xFindFunction method for the FTS3 +** virtual table. +*/ +static int fts5FindFunctionMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Number of SQL function arguments */ + const char *zName, /* Name of SQL function */ + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ + void **ppArg /* Unused */ +){ + /* No function of the specified name was found. Return 0. */ + return 0; +} + +/* +** Implementation of FTS3 xRename method. Rename an fts5 table. +*/ +static int fts5RenameMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + const char *zName /* New name of table */ +){ + int rc = SQLITE_OK; + return rc; +} + +/* +** The xSavepoint() method. +** +** Flush the contents of the pending-terms table to disk. +*/ +static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ + int rc = SQLITE_OK; + return rc; +} + +/* +** The xRelease() method. +** +** This is a no-op. +*/ +static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ + return SQLITE_OK; +} + +/* +** The xRollbackTo() method. +** +** Discard the contents of the pending terms table. +*/ +static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ + return SQLITE_OK; +} + +static const sqlite3_module fts5Module = { + /* iVersion */ 2, + /* xCreate */ fts5CreateMethod, + /* xConnect */ fts5ConnectMethod, + /* xBestIndex */ fts5BestIndexMethod, + /* xDisconnect */ fts5DisconnectMethod, + /* xDestroy */ fts5DestroyMethod, + /* xOpen */ fts5OpenMethod, + /* xClose */ fts5CloseMethod, + /* xFilter */ fts5FilterMethod, + /* xNext */ fts5NextMethod, + /* xEof */ fts5EofMethod, + /* xColumn */ fts5ColumnMethod, + /* xRowid */ fts5RowidMethod, + /* xUpdate */ fts5UpdateMethod, + /* xBegin */ fts5BeginMethod, + /* xSync */ fts5SyncMethod, + /* xCommit */ fts5CommitMethod, + /* xRollback */ fts5RollbackMethod, + /* xFindFunction */ fts5FindFunctionMethod, + /* xRename */ fts5RenameMethod, + /* xSavepoint */ fts5SavepointMethod, + /* xRelease */ fts5ReleaseMethod, + /* xRollbackTo */ fts5RollbackToMethod, +}; + +int sqlite3Fts5Init(sqlite3 *db){ + int rc; + rc = sqlite3_create_module_v2(db, "fts5", &fts5Module, 0, 0); + if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(db); + return rc; +} + diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h new file mode 100644 index 0000000000..5329c207c8 --- /dev/null +++ b/ext/fts5/fts5Int.h @@ -0,0 +1,310 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ +#ifndef _FTS5INT_H +#define _FTS5INT_H + +#include "sqliteInt.h" +#include "fts3_tokenizer.h" + + +/* +** Maximum number of prefix indexes on single FTS5 table. This must be +** less than 32. If it is set to anything large than that, an #error +** directive in fts5_index.c will cause the build to fail. +*/ +#define FTS5_MAX_PREFIX_INDEXES 31 + +#define FTS5_DEFAULT_NEARDIST 10 + +/************************************************************************** +** Interface to code in fts5_config.c. fts5_config.c contains contains code +** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. +*/ + +typedef struct Fts5Config Fts5Config; + +/* +** An instance of the following structure encodes all information that can +** be gleaned from the CREATE VIRTUAL TABLE statement. +*/ +struct Fts5Config { + sqlite3 *db; /* Database handle */ + char *zDb; /* Database holding FTS index (e.g. "main") */ + char *zName; /* Name of FTS index */ + int nCol; /* Number of columns */ + char **azCol; /* Column names */ + int nPrefix; /* Number of prefix indexes */ + int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ + sqlite3_tokenizer *pTokenizer; /* Tokenizer instance for this table */ +}; + +int sqlite3Fts5ConfigParse(sqlite3*, int, const char**, Fts5Config**, char**); +void sqlite3Fts5ConfigFree(Fts5Config*); + +int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); + +int sqlite3Fts5Tokenize( + Fts5Config *pConfig, /* FTS5 Configuration object */ + const char *pText, int nText, /* Text to tokenize */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ +); + +void sqlite3Fts5Dequote(char *z); + +/* +** End of interface to code in fts5_config.c. +**************************************************************************/ + +/************************************************************************** +** Interface to code in fts5_index.c. fts5_index.c contains contains code +** to access the data stored in the %_data table. +*/ + +typedef struct Fts5Index Fts5Index; +typedef struct Fts5IndexIter Fts5IndexIter; + +/* +** Values used as part of the flags argument passed to IndexQuery(). +*/ +#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ +#define FTS5INDEX_QUERY_ASC 0x0002 /* Docs in ascending rowid order */ +#define FTS5INDEX_QUERY_MATCH 0x0004 /* Use the iMatch arg to Next() */ +#define FTS5INDEX_QUERY_DELETE 0x0008 /* Visit delete markers */ + +/* +** Create/destroy an Fts5Index object. +*/ +int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); +int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy); + +/* +** for( +** pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0); +** 0==sqlite3Fts5IterEof(pIter); +** sqlite3Fts5IterNext(pIter) +** ){ +** i64 iDocid = sqlite3Fts5IndexDocid(pIter); +** } +*/ + +/* +** Open a new iterator to iterate though all docids that match the +** specified token or token prefix. +*/ +Fts5IndexIter *sqlite3Fts5IndexQuery( + Fts5Index *p, /* FTS index to query */ + const char *pToken, int nToken, /* Token (or prefix) to query for */ + int flags /* Mask of FTS5INDEX_QUERY_X flags */ +); + +/* +** Docid list iteration. +*/ +int sqlite3Fts5IterEof(Fts5IndexIter*); +void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch); +int sqlite3Fts5IterSeek(Fts5IndexIter*, i64 iDocid); +i64 sqlite3Fts5IterDocid(Fts5IndexIter*); + +/* +** Position list iteration. +** +** for( +** iPos=sqlite3Fts5IterFirstPos(pIter, iCol); +** iPos>=0; +** iPos=sqlite3Fts5IterNextPos(pIter) +** ){ +** // token appears at position iPos of column iCol of the current document +** } +*/ +int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); +int sqlite3Fts5IterNextPos(Fts5IndexIter*); + +/* +** Close an iterator opened by sqlite3Fts5IndexQuery(). +*/ +void sqlite3Fts5IterClose(Fts5IndexIter*); + +/* +** Insert or remove data to or from the index. Each time a document is +** added to or removed from the index, this function is called one or more +** times. +** +** For an insert, it must be called once for each token in the new document. +** If the operation is a delete, it must be called (at least) once for each +** unique token in the document with an iCol value less than zero. The iPos +** argument is ignored for a delete. +*/ +void sqlite3Fts5IndexWrite( + Fts5Index *p, /* Index to write to */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +); + +/* +** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to +** document iDocid. +*/ +void sqlite3Fts5IndexBeginWrite( + Fts5Index *p, /* Index to write to */ + i64 iDocid /* Docid to add or remove data from */ +); + +/* +** Flush any data stored in the in-memory hash tables to the database. +** +** This is called whenever (a) the main transaction is committed or (b) a +** new sub-transaction is opened. +*/ +void sqlite3Fts5IndexFlush(Fts5Index *p); + +int sqlite3Fts5IndexSync(Fts5Index *p); + +/* +** Discard any data stored in the in-memory hash tables. Do not write it +** to the database. Additionally, assume that the contents of the %_data +** table may have changed on disk. So any in-memory caches of %_data +** records must be invalidated. +** +** This is called (a) whenever a main or sub-transaction is rolled back, +** and (b) whenever the read transaction is closed. +*/ +int sqlite3Fts5IndexRollback(Fts5Index *p); + +/* +** Retrieve and clear the current error code, respectively. +*/ +int sqlite3Fts5IndexErrcode(Fts5Index*); +void sqlite3Fts5IndexReset(Fts5Index*); + +/* +** Get (bSet==0) or set (bSet!=0) the "averages" record. +*/ +void sqlite3Fts5IndexAverages(Fts5Index *p, int bSet, int nAvg, int *aAvg); + +/* +** Functions called by the storage module as part of integrity-check. +*/ +u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int); +int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum); + +/* Called during startup to register a UDF with SQLite */ +int sqlite3Fts5IndexInit(sqlite3*); + +void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); + +/* +** End of interface to code in fts5_index.c. +**************************************************************************/ + +/************************************************************************** +** Interface to code in fts5_storage.c. fts5_storage.c contains contains +** code to access the data stored in the %_content and %_docsize tables. +*/ +typedef struct Fts5Storage Fts5Storage; + +int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); +int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy); + +int sqlite3Fts5DropTable(Fts5Config*, const char *zPost); +int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, char **pzErr); + +int sqlite3Fts5StorageDelete(Fts5Storage *p, i64); +int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*); + +int sqlite3Fts5StorageIntegrity(Fts5Storage *p); + +/* +** End of interface to code in fts5_storage.c. +**************************************************************************/ + + +/************************************************************************** +** Interface to code in fts5_expr.c. +*/ +typedef struct Fts5Expr Fts5Expr; +typedef struct Fts5Parse Fts5Parse; +typedef struct Fts5Token Fts5Token; +typedef struct Fts5ExprPhrase Fts5ExprPhrase; +typedef struct Fts5ExprNearset Fts5ExprNearset; + +struct Fts5Token { + const char *p; /* Token text (not NULL terminated) */ + int n; /* Size of buffer p in bytes */ +}; + +int sqlite3Fts5ExprNew( + Fts5Config *pConfig, + Fts5Index *pIdx, + const char *zExpr, + Fts5Expr **ppNew, + char **pzErr +); + +int sqlite3Fts5ExprFirst(Fts5Expr *p); +int sqlite3Fts5ExprNext(Fts5Expr *p); +int sqlite3Fts5ExprEof(Fts5Expr *p); +i64 sqlite3Fts5ExprRowid(Fts5Expr *p); + +void sqlite3Fts5ExprFree(Fts5Expr *p); + +// int sqlite3Fts5IterFirstPos(Fts5Expr*, int iCol, int *piPos); +// int sqlite3Fts5IterNextPos(Fts5Expr*, int *piPos); + +/* Called during startup to register a UDF with SQLite */ +int sqlite3Fts5ExprInit(sqlite3*); + +/******************************************* +** The fts5_expr.c API above this point is used by the other hand-written +** C code in this module. The interfaces below this point are called by +** the parser code in fts5parse.y. */ + +void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); + +Fts5Expr *sqlite3Fts5ParseExpr( + Fts5Parse *pParse, + int eType, + Fts5Expr *pLeft, + Fts5Expr *pRight, + Fts5ExprNearset *pNear +); + +Fts5ExprPhrase *sqlite3Fts5ParseTerm( + Fts5Parse *pParse, + Fts5ExprPhrase *pPhrase, + Fts5Token *pToken, + int bPrefix +); + +Fts5ExprNearset *sqlite3Fts5ParseNearset( + Fts5Parse*, + Fts5ExprNearset*, + Fts5ExprPhrase* +); + +void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); +void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); + +void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); +void sqlite3Fts5ParseSetColumn(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p); +void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); + + +/* +** End of interface to code in fts5_expr.c. +**************************************************************************/ + +#endif diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c new file mode 100644 index 0000000000..bbcbc5e0e5 --- /dev/null +++ b/ext/fts5/fts5_config.c @@ -0,0 +1,318 @@ +/* +** 2014 Jun 09 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is an SQLite module implementing full-text search. +*/ + +#include "fts5Int.h" + +/* +** Convert an SQL-style quoted string into a normal string by removing +** the quote characters. The conversion is done in-place. If the +** input does not begin with a quote character, then this routine +** is a no-op. +** +** Examples: +** +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno +*/ +void sqlite3Fts5Dequote(char *z){ + char quote; /* Quote character (if any ) */ + + quote = z[0]; + if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ + int iIn = 1; /* Index of next byte to read from input */ + int iOut = 0; /* Index of next byte to write to output */ + + /* If the first byte was a '[', then the close-quote character is a ']' */ + if( quote=='[' ) quote = ']'; + + while( ALWAYS(z[iIn]) ){ + if( z[iIn]==quote ){ + if( z[iIn+1]!=quote ) break; + z[iOut++] = quote; + iIn += 2; + }else{ + z[iOut++] = z[iIn++]; + } + } + z[iOut] = '\0'; + } +} + +/* +** Parse the "special" CREATE VIRTUAL TABLE directive and update +** configuration object pConfig as appropriate. +** +** If successful, object pConfig is updated and SQLITE_OK returned. If +** an error occurs, an SQLite error code is returned and an error message +** may be left in *pzErr. It is the responsibility of the caller to +** eventually free any such error message using sqlite3_free(). +*/ +static int fts5ConfigParseSpecial( + Fts5Config *pConfig, /* Configuration object to update */ + char *zCmd, /* Special command to parse */ + char *zArg, /* Argument to parse */ + char **pzErr /* OUT: Error message */ +){ + if( sqlite3_stricmp(zCmd, "prefix")==0 ){ + char *p; + if( pConfig->aPrefix ){ + *pzErr = sqlite3_mprintf("multiple prefix=... directives"); + return SQLITE_ERROR; + } + pConfig->aPrefix = sqlite3_malloc(sizeof(int) * FTS5_MAX_PREFIX_INDEXES); + p = zArg; + while( p[0] ){ + int nPre = 0; + while( p[0]==' ' ) p++; + while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ + nPre = nPre*10 + (p[0] - '0'); + p++; + } + while( p[0]==' ' ) p++; + if( p[0]==',' ){ + p++; + }else if( p[0] ){ + *pzErr = sqlite3_mprintf("malformed prefix=... directive"); + return SQLITE_ERROR; + } + if( nPre==0 || nPre>=1000 ){ + *pzErr = sqlite3_mprintf("prefix length out of range: %d", nPre); + return SQLITE_ERROR; + } + pConfig->aPrefix[pConfig->nPrefix] = nPre; + pConfig->nPrefix++; + } + return SQLITE_OK; + } + + *pzErr = sqlite3_mprintf("unrecognized directive: \"%s\"", zCmd); + return SQLITE_ERROR; +} + +/* +** Duplicate the string passed as the only argument into a buffer allocated +** by sqlite3_malloc(). +** +** Return 0 if an OOM error is encountered. +*/ +static char *fts5Strdup(const char *z){ + return sqlite3_mprintf("%s", z); +} + +void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module**); + +/* +** Allocate an instance of the default tokenizer ("simple") at +** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error +** code if an error occurs. +*/ +static int fts5ConfigDefaultTokenizer(Fts5Config *pConfig){ + sqlite3_tokenizer_module *pMod; /* Tokenizer module "simple" */ + sqlite3_tokenizer *pTokenizer; /* Tokenizer instance */ + int rc; /* Return code */ + + sqlite3Fts3SimpleTokenizerModule(&pMod); + rc = pMod->xCreate(0, 0, &pTokenizer); + if( rc==SQLITE_OK ){ + pTokenizer->pModule = pMod; + pConfig->pTokenizer = pTokenizer; + } + + return rc; +} + +/* +** Arguments nArg/azArg contain the string arguments passed to the xCreate +** or xConnect method of the virtual table. This function attempts to +** allocate an instance of Fts5Config containing the results of parsing +** those arguments. +** +** If successful, SQLITE_OK is returned and *ppOut is set to point to the +** new Fts5Config object. If an error occurs, an SQLite error code is +** returned, *ppOut is set to NULL and an error message may be left in +** *pzErr. It is the responsibility of the caller to eventually free any +** such error message using sqlite3_free(). +*/ +int sqlite3Fts5ConfigParse( + sqlite3 *db, + int nArg, /* Number of arguments */ + const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ + Fts5Config **ppOut, /* OUT: Results of parse */ + char **pzErr /* OUT: Error message */ +){ + int rc = SQLITE_OK; /* Return code */ + Fts5Config *pRet; /* New object to return */ + + *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); + if( pRet==0 ) return SQLITE_NOMEM; + memset(pRet, 0, sizeof(Fts5Config)); + pRet->db = db; + + pRet->azCol = (char**)sqlite3_malloc(sizeof(char*) * nArg); + pRet->zDb = fts5Strdup(azArg[1]); + pRet->zName = fts5Strdup(azArg[2]); + if( pRet->azCol==0 || pRet->zDb==0 || pRet->zName==0 ){ + rc = SQLITE_NOMEM; + }else{ + int i; + for(i=3; rc==SQLITE_OK && iazCol[pRet->nCol++] = zDup; + } + } + } + } + + if( rc==SQLITE_OK && pRet->pTokenizer==0 ){ + rc = fts5ConfigDefaultTokenizer(pRet); + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts5ConfigFree(pRet); + *ppOut = 0; + } + return rc; +} + +/* +** Free the configuration object passed as the only argument. +*/ +void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ + if( pConfig ){ + int i; + if( pConfig->pTokenizer ){ + pConfig->pTokenizer->pModule->xDestroy(pConfig->pTokenizer); + } + sqlite3_free(pConfig->zDb); + sqlite3_free(pConfig->zName); + for(i=0; inCol; i++){ + sqlite3_free(pConfig->azCol[i]); + } + sqlite3_free(pConfig->azCol); + sqlite3_free(pConfig->aPrefix); + sqlite3_free(pConfig); + } +} + +/* +** Call sqlite3_declare_vtab() based on the contents of the configuration +** object passed as the only argument. Return SQLITE_OK if successful, or +** an SQLite error code if an error occurs. +*/ +int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ + int i; + int rc; + char *zSql; + char *zOld; + + zSql = (char*)sqlite3_mprintf("CREATE TABLE x("); + for(i=0; zSql && inCol; i++){ + zOld = zSql; + zSql = sqlite3_mprintf("%s%s%Q", zOld, (i==0?"":", "), pConfig->azCol[i]); + sqlite3_free(zOld); + } + + if( zSql ){ + zOld = zSql; + zSql = sqlite3_mprintf("%s, %Q HIDDEN)", zOld, pConfig->zName); + sqlite3_free(zOld); + } + + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_declare_vtab(pConfig->db, zSql); + sqlite3_free(zSql); + } + + return rc; +} + +/* +** Tokenize the text passed via the second and third arguments. +** +** The callback is invoked once for each token in the input text. The +** arguments passed to it are, in order: +** +** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() +** const char *pToken // Pointer to buffer containing token +** int nToken // Size of token in bytes +** int iStart // Byte offset of start of token within input text +** int iEnd // Byte offset of end of token within input text +** int iPos // Position of token in input (first token is 0) +** +** If the callback returns a non-zero value the tokenization is abandoned +** and no further callbacks are issued. +** +** This function returns SQLITE_OK if successful or an SQLite error code +** if an error occurs. If the tokenization was abandoned early because +** the callback returned SQLITE_DONE, this is not an error and this function +** still returns SQLITE_OK. Or, if the tokenization was abandoned early +** because the callback returned another non-zero value, it is assumed +** to be an SQLite error code and returned to the caller. +*/ +int sqlite3Fts5Tokenize( + Fts5Config *pConfig, /* FTS5 Configuration object */ + const char *pText, int nText, /* Text to tokenize */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ +){ + const sqlite3_tokenizer_module *pMod = pConfig->pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr = 0; + int rc; + + rc = pMod->xOpen(pConfig->pTokenizer, pText, nText, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( rc==SQLITE_OK ){ + const char *pToken; /* Pointer to token buffer */ + int nToken; /* Size of token in bytes */ + int iStart, iEnd, iPos; /* Start, end and position of token */ + pCsr->pTokenizer = pConfig->pTokenizer; + for(rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos); + rc==SQLITE_OK; + rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos) + ){ + if( (rc = xToken(pCtx, pToken, nToken, iStart, iEnd, iPos)) ) break; + } + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + pMod->xClose(pCsr); + } + return rc; +} + + diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c new file mode 100644 index 0000000000..36dc60a069 --- /dev/null +++ b/ext/fts5/fts5_expr.c @@ -0,0 +1,670 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#include "fts5Int.h" +#include "fts5parse.h" + +/* +** All token types in the generated fts5parse.h file are greater than 0. +*/ +#define FTS5_EOF 0 + +typedef struct Fts5ExprTerm Fts5ExprTerm; + +/* +** Functions generated by lemon from fts5parse.y. +*/ +void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(size_t)); +void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); +void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); + +/* +** eType: +** Expression node type. Always one of: +** +** FTS5_AND (pLeft, pRight valid) +** FTS5_OR (pLeft, pRight valid) +** FTS5_NOT (pLeft, pRight valid) +** FTS5_STRING (pNear valid) +*/ +struct Fts5Expr { + int eType; /* Node type */ + Fts5Expr *pLeft; /* Left hand child node */ + Fts5Expr *pRight; /* Right hand child node */ + Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ +}; + +/* +** An instance of the following structure represents a single search term +** or term prefix. +*/ +struct Fts5ExprTerm { + int bPrefix; /* True for a prefix term */ + char *zTerm; /* nul-terminated term */ +}; + +/* +** A phrase. One or more terms that must appear in a contiguous sequence +** within a document for it to match. +*/ +struct Fts5ExprPhrase { + int nTerm; /* Number of entries in aTerm[] */ + Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ +}; + +/* +** One or more phrases that must appear within a certain token distance of +** each other within each matching document. +*/ +struct Fts5ExprNearset { + int nNear; /* NEAR parameter */ + int iCol; /* Column to search (-1 -> all columns) */ + int nPhrase; /* Number of entries in aPhrase[] array */ + Fts5ExprPhrase *apPhrase[0]; /* Array of phrase pointers */ +}; + + +/* +** Parse context. +*/ +struct Fts5Parse { + Fts5Config *pConfig; + char *zErr; + int rc; + Fts5Expr *pExpr; /* Result of a successful parse */ +}; + +void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ + if( pParse->rc==SQLITE_OK ){ + va_list ap; + va_start(ap, zFmt); + pParse->zErr = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + pParse->rc = SQLITE_ERROR; + } +} + +static int fts5ExprIsspace(char t){ + return t==' ' || t=='\t' || t=='\n' || t=='\r'; +} + +static int fts5ExprIstoken(char t){ + return fts5ExprIsspace(t)==0 && t!='\0' + && t!=':' && t!='(' && t!=')' + && t!=',' && t!='+' && t!='*'; +} + +/* +** Read the first token from the nul-terminated string at *pz. +*/ +static int fts5ExprGetToken( + Fts5Parse *pParse, + const char **pz, /* IN/OUT: Pointer into buffer */ + Fts5Token *pToken +){ + const char *z = *pz; + int tok; + + /* Skip past any whitespace */ + while( fts5ExprIsspace(*z) ) z++; + + pToken->p = z; + pToken->n = 1; + switch( *z ){ + case '(': tok = FTS5_LP; break; + case ')': tok = FTS5_RP; break; + case ':': tok = FTS5_COLON; break; + case ',': tok = FTS5_COMMA; break; + case '+': tok = FTS5_PLUS; break; + case '*': tok = FTS5_STAR; break; + case '\0': tok = FTS5_EOF; break; + + case '"': { + const char *z2; + tok = FTS5_STRING; + + for(z2=&z[1]; 1; z2++){ + if( z2[0]=='"' ){ + z2++; + if( z2[0]!='"' ) break; + } + if( z2[0]=='\0' ){ + sqlite3Fts5ParseError(pParse, "unterminated string"); + return FTS5_EOF; + } + } + pToken->n = (z2 - z); + break; + } + + default: { + const char *z2; + tok = FTS5_STRING; + for(z2=&z[1]; fts5ExprIstoken(*z2); z2++); + pToken->n = (z2 - z); + if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; + if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; + if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; + break; + } + } + + *pz = &pToken->p[pToken->n]; + return tok; +} + +static void *fts5ParseAlloc(size_t t){ return sqlite3_malloc((int)t); } +static void fts5ParseFree(void *p){ sqlite3_free(p); } + +int sqlite3Fts5ExprNew( + Fts5Config *pConfig, + Fts5Index *pIdx, + const char *zExpr, /* Expression text */ + Fts5Expr **ppNew, + char **pzErr +){ + Fts5Parse sParse; + Fts5Token token; + const char *z = zExpr; + int t; /* Next token type */ + void *pEngine; + + *ppNew = 0; + *pzErr = 0; + memset(&sParse, 0, sizeof(sParse)); + pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); + if( pEngine==0 ) return SQLITE_NOMEM; + sParse.pConfig = pConfig; + + do { + t = fts5ExprGetToken(&sParse, &z, &token); + sqlite3Fts5Parser(pEngine, t, token, &sParse); + }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); + sqlite3Fts5ParserFree(pEngine, fts5ParseFree); + + assert( sParse.pExpr==0 || (sParse.rc==SQLITE_OK && sParse.zErr==0) ); + *ppNew = sParse.pExpr; + *pzErr = sParse.zErr; + return sParse.rc; +} + +/* +** Free the object passed as the only argument. +*/ +void sqlite3Fts5ExprFree(Fts5Expr *p){ + if( p ){ + sqlite3Fts5ExprFree(p->pLeft); + sqlite3Fts5ExprFree(p->pRight); + sqlite3Fts5ParseNearsetFree(p->pNear); + sqlite3_free(p); + } +} + +/* +** Argument pIn points to a buffer of nIn bytes. This function allocates +** and returns a new buffer populated with a copy of (pIn/nIn) with a +** nul-terminator byte appended to it. +** +** It is the responsibility of the caller to eventually free the returned +** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. +*/ +static char *fts5Strdup(const char *pIn, int nIn){ + char *zRet = (char*)sqlite3_malloc(nIn+1); + if( zRet ){ + memcpy(zRet, pIn, nIn); + zRet[nIn] = '\0'; + } + return zRet; +} + +static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ + *pz = sqlite3_mprintf("%.*s", pToken->n, pToken->p); + if( *pz==0 ) return SQLITE_NOMEM; + return SQLITE_OK; +} + +/* +** Free the phrase object passed as the only argument. +*/ +static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ + if( pPhrase ){ + int i; + for(i=0; inTerm; i++){ + sqlite3_free(pPhrase->aTerm[i].zTerm); + } + sqlite3_free(pPhrase); + } +} + +/* +** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated +** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is +** appended to it and the results returned. +** +** If an OOM error occurs, both the pNear and pPhrase objects are freed and +** NULL returned. +*/ +Fts5ExprNearset *sqlite3Fts5ParseNearset( + Fts5Parse *pParse, /* Parse context */ + Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ + Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ +){ + const int SZALLOC = 8; + Fts5ExprNearset *pRet = 0; + + if( pParse->rc==SQLITE_OK ){ + if( pNear==0 ){ + int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + pRet->iCol = -1; + } + }else if( (pNear->nPhrase % SZALLOC)==0 ){ + int nNew = pRet->nPhrase + SZALLOC; + int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); + + pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + } + }else{ + pRet = pNear; + } + } + + if( pRet==0 ){ + assert( pParse->rc!=SQLITE_OK ); + sqlite3Fts5ParseNearsetFree(pNear); + sqlite3Fts5ParsePhraseFree(pPhrase); + }else{ + pRet->apPhrase[pRet->nPhrase++] = pPhrase; + } + return pRet; +} + +typedef struct TokenCtx TokenCtx; +struct TokenCtx { + Fts5ExprPhrase *pPhrase; +}; + +/* +** Callback for tokenizing terms used by ParseTerm(). +*/ +static int fts5ParseTokenize( + void *pContext, /* Pointer to Fts5InsertCtx object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + const int SZALLOC = 8; + TokenCtx *pCtx = (TokenCtx*)pContext; + Fts5ExprPhrase *pPhrase = pCtx->pPhrase; + Fts5ExprTerm *pTerm; + + if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ + Fts5ExprPhrase *pNew; + int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); + + pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew + ); + if( pNew==0 ) return SQLITE_NOMEM; + pCtx->pPhrase = pPhrase = pNew; + pNew->nTerm = nNew - SZALLOC; + } + + pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; + pTerm->bPrefix = 0; + + pTerm->zTerm = fts5Strdup(pToken, nToken); + return pTerm->zTerm ? SQLITE_OK : SQLITE_NOMEM; +} + + +/* +** Free the phrase object passed as the only argument. +*/ +void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ + fts5ExprPhraseFree(pPhrase); +} + +/* +** Free the phrase object passed as the second argument. +*/ +void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ + if( pNear ){ + int i; + for(i=0; inPhrase; i++){ + fts5ExprPhraseFree(pNear->apPhrase[i]); + } + sqlite3_free(pNear); + } +} + +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p){ + assert( pParse->pExpr==0 ); + pParse->pExpr = p; +} + +/* +** This function is called by the parser to process a string token. The +** string may or may not be quoted. In any case it is tokenized and a +** phrase object consisting of all tokens returned. +*/ +Fts5ExprPhrase *sqlite3Fts5ParseTerm( + Fts5Parse *pParse, /* Parse context */ + Fts5ExprPhrase *pPhrase, /* Phrase to append to */ + Fts5Token *pToken, /* String to tokenize */ + int bPrefix /* True if there is a trailing "*" */ +){ + Fts5Config *pConfig = pParse->pConfig; + TokenCtx sCtx; /* Context object passed to callback */ + int rc; /* Tokenize return code */ + char *z = 0; + + pParse->rc = fts5ParseStringFromToken(pToken, &z); + if( z==0 ) return 0; + sqlite3Fts5Dequote(z); + + memset(&sCtx, 0, sizeof(TokenCtx)); + sCtx.pPhrase = pPhrase; + rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); + if( rc ){ + pParse->rc = rc; + fts5ExprPhraseFree(sCtx.pPhrase); + sCtx.pPhrase = 0; + }else if( sCtx.pPhrase->nTerm>0 ){ + sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; + } + + sqlite3_free(z); + return sCtx.pPhrase; +} + +void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ + if( pParse->rc==SQLITE_OK ){ + if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ + sqlite3Fts5ParseError( + pParse, "syntax error near \"%.*s\"", pTok->n, pTok->p + ); + } + } +} + +void sqlite3Fts5ParseSetDistance( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5Token *p +){ + int nNear = 0; + int i; + if( p->n ){ + for(i=0; in; i++){ + char c = (char)p->p[i]; + if( c<'0' || c>'9' ){ + sqlite3Fts5ParseError( + pParse, "expected integer, got \"%.*s\"", p->n, p->p + ); + return; + } + nNear = nNear * 10 + (p->p[i] - '0'); + } + }else{ + nNear = FTS5_DEFAULT_NEARDIST; + } + pNear->nNear = nNear; +} + +void sqlite3Fts5ParseSetColumn( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5Token *p +){ + char *z = 0; + int rc = fts5ParseStringFromToken(p, &z); + if( rc==SQLITE_OK ){ + Fts5Config *pConfig = pParse->pConfig; + int i; + for(i=0; inCol; i++){ + if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){ + pNear->iCol = i; + break; + } + } + if( i==pConfig->nCol ){ + sqlite3Fts5ParseError(pParse, "no such column: %s", z); + } + sqlite3_free(z); + }else{ + pParse->rc = rc; + } +} + +/* +** Allocate and return a new expression object. If anything goes wrong (i.e. +** OOM error), leave an error code in pParse and return NULL. +*/ +Fts5Expr *sqlite3Fts5ParseExpr( + Fts5Parse *pParse, /* Parse context */ + int eType, /* FTS5_STRING, AND, OR or NOT */ + Fts5Expr *pLeft, /* Left hand child expression */ + Fts5Expr *pRight, /* Right hand child expression */ + Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ +){ + Fts5Expr *pRet = 0; + + if( pParse->rc==SQLITE_OK ){ + assert( (eType!=FTS5_STRING && pLeft && pRight && !pNear) + || (eType==FTS5_STRING && !pLeft && !pRight && pNear) + ); + pRet = (Fts5Expr*)sqlite3_malloc(sizeof(Fts5Expr)); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, sizeof(*pRet)); + pRet->eType = eType; + pRet->pLeft = pLeft; + pRet->pRight = pRight; + pRet->pNear = pNear; + } + } + + if( pRet==0 ){ + assert( pParse->rc!=SQLITE_OK ); + sqlite3Fts5ExprFree(pLeft); + sqlite3Fts5ExprFree(pRight); + sqlite3Fts5ParseNearsetFree(pNear); + } + return pRet; +} + +static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ + char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2); + if( zQuoted ){ + int i = 0; + char *zIn = pTerm->zTerm; + zQuoted[i++] = '"'; + while( *zIn ){ + if( *zIn=='"' ) zQuoted[i++] = '"'; + zQuoted[i++] = *zIn++; + } + zQuoted[i++] = '"'; + if( pTerm->bPrefix ){ + zQuoted[i++] = ' '; + zQuoted[i++] = '*'; + } + zQuoted[i++] = '\0'; + } + return zQuoted; +} + +static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ + char *zNew; + va_list ap; + va_start(ap, zFmt); + zNew = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + if( zApp ){ + char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); + sqlite3_free(zNew); + zNew = zNew2; + } + sqlite3_free(zApp); + return zNew; +} + +static char *fts5ExprPrint(Fts5Config *pConfig, Fts5Expr *pExpr){ + char *zRet = 0; + if( pExpr->eType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pExpr->pNear; + int i; + int iTerm; + + if( pNear->iCol>=0 ){ + zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[pNear->iCol]); + if( zRet==0 ) return 0; + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, "NEAR("); + if( zRet==0 ) return 0; + } + + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( i!=0 ){ + zRet = fts5PrintfAppend(zRet, " "); + if( zRet==0 ) return 0; + } + for(iTerm=0; iTermnTerm; iTerm++){ + char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); + if( zTerm ){ + zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); + sqlite3_free(zTerm); + } + if( zTerm==0 || zRet==0 ){ + sqlite3_free(zRet); + return 0; + } + } + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); + if( zRet==0 ) return 0; + } + + }else{ + char *zOp = 0; + char *z1 = 0; + char *z2 = 0; + switch( pExpr->eType ){ + case FTS5_AND: zOp = "AND"; break; + case FTS5_NOT: zOp = "NOT"; break; + case FTS5_OR: zOp = "OR"; break; + default: assert( 0 ); + } + + z1 = fts5ExprPrint(pConfig, pExpr->pLeft); + z2 = fts5ExprPrint(pConfig, pExpr->pRight); + if( z1 && z2 ){ + int b1 = pExpr->pLeft->eType!=FTS5_STRING; + int b2 = pExpr->pRight->eType!=FTS5_STRING; + zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", + b1 ? "(" : "", z1, b1 ? ")" : "", + zOp, + b2 ? "(" : "", z2, b2 ? ")" : "" + ); + } + sqlite3_free(z1); + sqlite3_free(z2); + } + + return zRet; +} + +/* +** The implementation of user-defined scalar function fts5_expr(). +*/ +static void fts5ExprFunction( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + sqlite3 *db = sqlite3_context_db_handle(pCtx); + const char *zExpr = 0; + char *zErr = 0; + Fts5Expr *pExpr = 0; + int rc; + int i; + + const char **azConfig; /* Array of arguments for Fts5Config */ + int nConfig; /* Size of azConfig[] */ + Fts5Config *pConfig = 0; + + nConfig = nArg + 2; + azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); + if( azConfig==0 ){ + sqlite3_result_error_nomem(pCtx); + return; + } + azConfig[0] = 0; + azConfig[1] = "main"; + azConfig[2] = "tbl"; + for(i=1; i 0) +** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) +** + first leaf page number (often 1) +** + final leaf page number +** +** 2. The Averages Record: +** +** A single record within the %_data table. The data is a list of varints. +** The first value is the number of rows in the index. Then, for each column +** from left to right, the total number of tokens in the column for all +** rows of the table. +** +** 3. Segment leaves: +** +** TERM DOCLIST FORMAT: +** +** Most of each segment leaf is taken up by term/doclist data. The +** general format of the term/doclist data is: +** +** varint : size of first term +** blob: first term data +** doclist: first doclist +** zero-or-more { +** varint: number of bytes in common with previous term +** varint: number of bytes of new term data (nNew) +** blob: nNew bytes of new term data +** doclist: next doclist +** } +** +** doclist format: +** +** varint: first rowid +** poslist: first poslist +** zero-or-more { +** varint: rowid delta (always > 0) +** poslist: first poslist +** } +** 0x00 byte +** +** poslist format: +** +** collist: collist for column 0 +** zero-or-more { +** 0x01 byte +** varint: column number (I) +** collist: collist for column I +** } +** 0x00 byte +** +** collist format: +** +** varint: first offset + 2 +** zero-or-more { +** varint: offset delta + 2 +** } +** +** PAGINATION +** +** The format described above is only accurate if the entire term/doclist +** data fits on a single leaf page. If this is not the case, the format +** is changed in two ways: +** +** + if the first rowid on a page occurs before the first term, it +** is stored as a literal value: +** +** varint: first rowid +** +** + the first term on each page is stored in the same way as the +** very first term of the segment: +** +** varint : size of first term +** blob: first term data +** +** Each leaf page begins with: +** +** + 2-byte unsigned containing offset to first rowid (or 0). +** + 2-byte unsigned containing offset to first term (or 0). +** +** Followed by term/doclist data. +** +** 4. Segment interior nodes: +** +** The interior nodes turn the list of leaves into a b+tree. +** +** Each interior node begins with a varint - the page number of the left +** most child node. Following this, for each leaf page except the first, +** the interior nodes contain: +** +** a) If the leaf page contains at least one term, then a term-prefix that +** is greater than all previous terms, and less than or equal to the +** first term on the leaf page. +** +** b) If the leaf page no terms, a record indicating how many consecutive +** leaves contain no terms, and whether or not there is an associated +** by-rowid index record. +** +** By definition, there is never more than one type (b) record in a row. +** Type (b) records only ever appear on height=1 pages - immediate parents +** of leaves. Only type (a) records are pushed to higher levels. +** +** Term format: +** +** * Number of bytes in common with previous term plus 2, as a varint. +** * Number of bytes of new term data, as a varint. +** * new term data. +** +** No-term format: +** +** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there +** is an associated index-by-rowid record. +** * the number of zero-term leaves as a varint. +** +** 5. Segment doclist indexes: +** +** A list of varints - the first docid on each page (starting with the +** second) of the doclist. First element in the list is a literal docid. +** Each docid thereafter is a (negative) delta. +*/ + +/* +** Rowids for the averages and structure records in the %_data table. +*/ +#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ +#define FTS5_STRUCTURE_ROWID(iIdx) (10 + (iIdx)) /* For structure records */ + +/* +** Macros determining the rowids used by segment nodes. All nodes in all +** segments for all indexes (the regular FTS index and any prefix indexes) +** are stored in the %_data table with large positive rowids. +** +** The %_data table may contain up to (1< ((1<=? AND id<=?" */ +}; + +/* +** Buffer object for the incremental building of string data. +*/ +struct Fts5Buffer { + u8 *p; + int n; + int nSpace; +}; + +/* +** A single record read from the %_data table. +*/ +struct Fts5Data { + u8 *p; /* Pointer to buffer containing record */ + int n; /* Size of record in bytes */ + int nRef; /* Ref count */ +}; + +/* +** Before it is flushed to a level-0 segment, term data is collected in +** the hash tables in the Fts5Index.aHash[] array. Hash table keys are +** terms (or, for prefix indexes, term prefixes) and values are instances +** of type Fts5PendingDoclist. +*/ +struct Fts5PendingDoclist { + u8 *pTerm; /* Term for this entry */ + int nTerm; /* Bytes of data at pTerm */ + Fts5PendingPoslist *pPoslist; /* Linked list of position lists */ + int iCol; /* Column for last entry in pPending */ + int iPos; /* Pos value for last entry in pPending */ + Fts5PendingDoclist *pNext; /* Used during merge sort */ +}; +struct Fts5PendingPoslist { + i64 iRowid; /* Rowid for this doclist entry */ + Fts5Buffer buf; /* Current doclist contents */ + Fts5PendingPoslist *pNext; /* Previous poslist for same term */ +}; + +/* +** The contents of the "structure" record for each index are represented +** using an Fts5Structure record in memory. Which uses instances of the +** other Fts5StructureXXX types as components. +*/ +struct Fts5StructureSegment { + int iSegid; /* Segment id */ + int nHeight; /* Height of segment b-tree */ + int pgnoFirst; /* First leaf page number in segment */ + int pgnoLast; /* Last leaf page number in segment */ +}; +struct Fts5StructureLevel { + int nMerge; /* Number of segments in incr-merge */ + int nSeg; /* Total number of segments on level */ + Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ +}; +struct Fts5Structure { + u64 nWriteCounter; /* Total leaves written to level 0 */ + int nLevel; /* Number of levels in this index */ + Fts5StructureLevel aLevel[0]; /* Array of nLevel level objects */ +}; + +/* +** An object of type Fts5SegWriter is used to write to segments. +*/ +struct Fts5PageWriter { + int pgno; /* Page number for this page */ + Fts5Buffer buf; /* Buffer containing page data */ + Fts5Buffer term; /* Buffer containing previous term on page */ +}; + +struct Fts5SegWriter { + int iIdx; /* Index to write to */ + int iSegid; /* Segid to write to */ + int nWriter; /* Number of entries in aWriter */ + Fts5PageWriter *aWriter; /* Array of PageWriter objects */ + i64 iPrevRowid; /* Previous docid written to current leaf */ + u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ + u8 bFirstRowidInPage; /* True if next rowid is first in page */ + int nLeafWritten; /* Number of leaf pages written */ + int nEmpty; /* Number of contiguous term-less nodes */ +}; + +/* +** Object for iterating through the merged results of one or more segments, +** visiting each term/docid pair in the merged data. +** +** nSeg is always a power of two greater than or equal to the number of +** segments that this object is merging data from. Both the aSeg[] and +** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded +** with zeroed objects - these are handled as if they were iterators opened +** on empty segments. +** +** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an +** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the +** comparison in this context is the index of the iterator that currently +** points to the smaller term/rowid combination. Iterators at EOF are +** considered to be greater than all other iterators. +** +** aFirst[1] contains the index in aSeg[] of the iterator that points to +** the smallest key overall. aFirst[0] is unused. +*/ +struct Fts5MultiSegIter { + int nSeg; /* Size of aSeg[] array */ + Fts5SegIter *aSeg; /* Array of segment iterators */ + u16 *aFirst; /* Current merge state (see above) */ +}; + +/* +** Object for iterating through a single segment, visiting each term/docid +** pair in the segment. +** +** pSeg: +** The segment to iterate through. +** +** iLeafPgno: +** Current leaf page number within segment. +** +** iLeafOffset: +** Byte offset within the current leaf that is one byte past the end of the +** rowid field of the current entry. Usually this is the first byte of +** the position list data. The exception is if the rowid for the current +** entry is the last thing on the leaf page. +** +** pLeaf: +** Buffer containing current leaf page data. Set to NULL at EOF. +** +** iTermLeafPgno, iTermLeafOffset: +** Leaf page number containing the last term read from the segment. And +** the offset immediately following the term data. +*/ +struct Fts5SegIter { + Fts5StructureSegment *pSeg; /* Segment to iterate through */ + int iIdx; /* Byte offset within current leaf */ + int iLeafPgno; /* Current leaf page number */ + Fts5Data *pLeaf; /* Current leaf data */ + int iLeafOffset; /* Byte offset within current leaf */ + + int iTermLeafPgno; + int iTermLeafOffset; + + /* Variables populated based on current entry. */ + Fts5Buffer term; /* Current term */ + i64 iRowid; /* Current rowid */ +}; + +/* +** Object for iterating through a single position list. +*/ +struct Fts5PosIter { + Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ + i64 iLeafRowid; /* Absolute rowid of current leaf */ + int iLeafOffset; /* Current offset within leaf */ + + int iCol; + int iPos; +}; + +/* +** Object for iterating through the conents of a single internal node in +** memory. +*/ +struct Fts5NodeIter { + /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, + ** the EOF test for the iterator is (Fts5NodeIter.aData==0). */ + const u8 *aData; + int nData; + int iOff; + + /* Output variables */ + Fts5Buffer term; + int nEmpty; + int iChild; +}; + +/* +** An Fts5BtreeIter object is used to iterate through all entries in the +** b-tree hierarchy belonging to a single fts5 segment. In this case the +** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the +** b-tree hierarchy consists of the following: +** +** iLeaf: The page number of the leaf page the entry points to. +** +** term: A split-key that all terms on leaf page $leaf must be greater +** than or equal to. The "term" associated with the first b-tree +** hierarchy entry (the one that points to leaf page 1) is always +** an empty string. +** +** nEmpty: The number of empty (termless) leaf pages that immediately +** following iLeaf. +** +** The Fts5BtreeIter object is only used as part of the integrity-check code. +*/ +struct Fts5BtreeIterLevel { + Fts5NodeIter s; /* Iterator for the current node */ + Fts5Data *pData; /* Data for the current node */ +}; +struct Fts5BtreeIter { + Fts5Index *p; /* FTS5 backend object */ + Fts5StructureSegment *pSeg; /* Iterate through this segment's b-tree */ + int iIdx; /* Index pSeg belongs to */ + int nLvl; /* Size of aLvl[] array */ + Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */ + + /* Output variables */ + Fts5Buffer term; /* Current term */ + int iLeaf; /* Leaf containing terms >= current term */ + int nEmpty; /* Number of "empty" leaves following iLeaf */ + int bEof; /* Set to true at EOF */ +}; + +static void fts5PutU16(u8 *aOut, u16 iVal){ + aOut[0] = (iVal>>8); + aOut[1] = (iVal&0xFF); +} + +static u16 fts5GetU16(const u8 *aIn){ + return ((u16)aIn[0] << 8) + aIn[1]; +} + +/* +** Allocate and return a buffer at least nByte bytes in size. +** +** If an OOM error is encountered, return NULL and set the error code in +** the Fts5Index handle passed as the first argument. +*/ +static void *fts5IdxMalloc(Fts5Index *p, int nByte){ + void *pRet; + assert( p->rc==SQLITE_OK ); + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } + return pRet; +} + + +static int fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ + /* A no-op if an error has already occurred */ + if( *pRc ) return 1; + + if( (pBuf->n + nByte) > pBuf->nSpace ){ + u8 *pNew; + int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; + while( nNew<(pBuf->n + nByte) ){ + nNew = nNew * 2; + } + pNew = sqlite3_realloc(pBuf->p, nNew); + if( pNew==0 ){ + *pRc = SQLITE_NOMEM; + return 1; + }else{ + pBuf->nSpace = nNew; + pBuf->p = pNew; + } + } + return 0; +} + +/* +** Encode value iVal as an SQLite varint and append it to the buffer object +** pBuf. If an OOM error occurs, set the error code in p. +*/ +static void fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ + if( fts5BufferGrow(pRc, pBuf, 9) ) return; + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iVal); +} + +/* +** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set +** the error code in p. If an error has already occurred when this function +** is called, it is a no-op. +*/ +static void fts5BufferAppendBlob( + int *pRc, + Fts5Buffer *pBuf, + int nData, + const u8 *pData +){ + if( fts5BufferGrow(pRc, pBuf, nData) ) return; + memcpy(&pBuf->p[pBuf->n], pData, nData); + pBuf->n += nData; +} + +/* +** Append the nul-terminated string zStr to the buffer pBuf. This function +** ensures that the byte following the buffer data is set to 0x00, even +** though this byte is not included in the pBuf->n count. +*/ +static void fts5BufferAppendString( + int *pRc, + Fts5Buffer *pBuf, + const char *zStr +){ + int nStr = strlen(zStr); + if( fts5BufferGrow(pRc, pBuf, nStr+1) ) return; + fts5BufferAppendBlob(pRc, pBuf, nStr, (const u8*)zStr); + if( *pRc==SQLITE_OK ) pBuf->p[pBuf->n] = 0x00; +} + +/* +** Argument zFmt is a printf() style format string. This function performs +** the printf() style processing, then appends the results to buffer pBuf. +** +** Like fts5BufferAppendString(), this function ensures that the byte +** following the buffer data is set to 0x00, even though this byte is not +** included in the pBuf->n count. +*/ +static void fts5BufferAppendPrintf( + int *pRc, + Fts5Buffer *pBuf, + char *zFmt, ... +){ + if( *pRc==SQLITE_OK ){ + char *zTmp; + va_list ap; + va_start(ap, zFmt); + zTmp = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + + if( zTmp==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + fts5BufferAppendString(pRc, pBuf, zTmp); + sqlite3_free(zTmp); + } + } +} + +/* +** Free any buffer allocated by pBuf. Zero the structure before returning. +*/ +static void fts5BufferFree(Fts5Buffer *pBuf){ + sqlite3_free(pBuf->p); + memset(pBuf, 0, sizeof(Fts5Buffer)); +} + +/* +** Zero the contents of the buffer object. But do not free the associated +** memory allocation. +*/ +static void fts5BufferZero(Fts5Buffer *pBuf){ + pBuf->n = 0; +} + +/* +** Set the buffer to contain nData/pData. If an OOM error occurs, leave an +** the error code in p. If an error has already occurred when this function +** is called, it is a no-op. +*/ +static void fts5BufferSet( + int *pRc, + Fts5Buffer *pBuf, + int nData, + const u8 *pData +){ + pBuf->n = 0; + fts5BufferAppendBlob(pRc, pBuf, nData, pData); +} + +/* +** Compare the contents of the two buffers using memcmp(). If one buffer +** is a prefix of the other, it is considered the lesser. +** +** Return -ve if pLeft is smaller than pRight, 0 if they are equal or +** +ve if pRight is smaller than pLeft. In other words: +** +** res = *pLeft - *pRight +*/ +static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ + int nCmp = MIN(pLeft->n, pRight->n); + int res = memcmp(pLeft->p, pRight->p, nCmp); + return (res==0 ? (pLeft->n - pRight->n) : res); +} + + +/* +** Close the read-only blob handle, if it is open. +*/ +static void fts5CloseReader(Fts5Index *p){ + if( p->pReader ){ + sqlite3_blob_close(p->pReader); + p->pReader = 0; + } +} + +static Fts5Data *fts5DataReadOrBuffer( + Fts5Index *p, + Fts5Buffer *pBuf, + i64 iRowid +){ + Fts5Data *pRet = 0; + if( p->rc==SQLITE_OK ){ + int rc; + + /* If the blob handle is not yet open, open and seek it. Otherwise, use + ** the blob_reopen() API to reseek the existing blob handle. */ + if( p->pReader==0 ){ + Fts5Config *pConfig = p->pConfig; + rc = sqlite3_blob_open(pConfig->db, + pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader + ); + }else{ + rc = sqlite3_blob_reopen(p->pReader, iRowid); + } + + if( rc==SQLITE_OK ){ + int nByte = sqlite3_blob_bytes(p->pReader); + if( pBuf ){ + fts5BufferZero(pBuf); + fts5BufferGrow(&rc, pBuf, nByte); + rc = sqlite3_blob_read(p->pReader, pBuf->p, nByte, 0); + if( rc==SQLITE_OK ) pBuf->n = nByte; + }else{ + pRet = (Fts5Data*)fts5IdxMalloc(p, sizeof(Fts5Data) + nByte); + if( !pRet ) return 0; + + pRet->n = nByte; + pRet->p = (u8*)&pRet[1]; + pRet->nRef = 1; + rc = sqlite3_blob_read(p->pReader, pRet->p, nByte, 0); + if( rc!=SQLITE_OK ){ + sqlite3_free(pRet); + pRet = 0; + } + } + } + p->rc = rc; + } + + return pRet; +} + +/* +** Retrieve a record from the %_data table. +** +** If an error occurs, NULL is returned and an error left in the +** Fts5Index object. +*/ +static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ + Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); + assert( (pRet==0)==(p->rc!=SQLITE_OK) ); +assert( pRet ); + return pRet; +} + +/* +** Read a record from the %_data table into the buffer supplied as the +** second argument. +** +** If an error occurs, an error is left in the Fts5Index object. If an +** error has already occurred when this function is called, it is a +** no-op. +*/ +static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ + (void)fts5DataReadOrBuffer(p, pBuf, iRowid); +} + +/* +** Release a reference to data record returned by an earlier call to +** fts5DataRead(). +*/ +static void fts5DataRelease(Fts5Data *pData){ + if( pData ){ + pData->nRef--; + if( pData->nRef==0 ) sqlite3_free(pData); + } +} + +static void fts5DataReference(Fts5Data *pData){ + pData->nRef++; +} + +/* +** INSERT OR REPLACE a record into the %_data table. +*/ +static void fts5DataWrite(Fts5Index *p, i64 iRowid, u8 *pData, int nData){ + if( p->rc!=SQLITE_OK ) return; + + if( p->pWriter==0 ){ + int rc; + Fts5Config *pConfig = p->pConfig; + char *zSql = sqlite3_mprintf( + "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0); + sqlite3_free(zSql); + } + if( rc!=SQLITE_OK ){ + p->rc = rc; + return; + } + } + + sqlite3_bind_int64(p->pWriter, 1, iRowid); + sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); + sqlite3_step(p->pWriter); + p->rc = sqlite3_reset(p->pWriter); +} + +/* +** Execute the following SQL: +** +** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast +*/ +static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ + if( p->rc!=SQLITE_OK ) return; + + if( p->pDeleter==0 ){ + int rc; + Fts5Config *pConfig = p->pConfig; + char *zSql = sqlite3_mprintf( + "DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0); + sqlite3_free(zSql); + } + if( rc!=SQLITE_OK ){ + p->rc = rc; + return; + } + } + + sqlite3_bind_int64(p->pDeleter, 1, iFirst); + sqlite3_bind_int64(p->pDeleter, 2, iLast); + sqlite3_step(p->pDeleter); + p->rc = sqlite3_reset(p->pDeleter); +} + +/* +** Close the sqlite3_blob handle used to read records from the %_data table. +** And discard any cached reads. This function is called at the end of +** a read transaction or when any sub-transaction is rolled back. +*/ +static void fts5DataReset(Fts5Index *p){ + if( p->pReader ){ + sqlite3_blob_close(p->pReader); + p->pReader = 0; + } +} + +/* +** Remove all records associated with segment iSegid in index iIdx. +*/ +static void fts5DataRemoveSegment(Fts5Index *p, int iIdx, int iSegid){ + i64 iFirst = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, 0); + i64 iLast = FTS5_SEGMENT_ROWID(iIdx, iSegid+1, 0, 0)-1; + fts5DataDelete(p, iFirst, iLast); +} + +/* +** Deserialize and return the structure record currently stored in serialized +** form within buffer pData/nData. +** +** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array +** are over-allocated by one slot. This allows the structure contents +** to be more easily edited. +** +** If an error occurs, *ppOut is set to NULL and an SQLite error code +** returned. Otherwise, *ppOut is set to point to the new object and +** SQLITE_OK returned. +*/ +static int fts5StructureDecode( + const u8 *pData, /* Buffer containing serialized structure */ + int nData, /* Size of buffer pData in bytes */ + Fts5Structure **ppOut /* OUT: Deserialized object */ +){ + int rc = SQLITE_OK; + int i = 0; + int iLvl; + int nLevel = 0; + int nSegment = 0; + int nByte; /* Bytes of space to allocate */ + Fts5Structure *pRet = 0; + + /* Read the total number of levels and segments from the start of the + ** structure record. Use these values to allocate space for the deserialized + ** version of the record. */ + i = getVarint32(&pData[i], nLevel); + i += getVarint32(&pData[i], nSegment); + nByte = ( + sizeof(Fts5Structure) + + sizeof(Fts5StructureLevel) * (nLevel+1) + + sizeof(Fts5StructureSegment) * (nSegment+nLevel+1) + ); + pRet = (Fts5Structure*)sqlite3_malloc(nByte); + + if( pRet ){ + u8 *pSpace = (u8*)&pRet->aLevel[nLevel+1]; + memset(pRet, 0, nByte); + pRet->nLevel = nLevel; + i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter); + for(iLvl=0; iLvlaLevel[iLvl]; + int nTotal; + int iSeg; + + i += getVarint32(&pData[i], pLvl->nMerge); + i += getVarint32(&pData[i], nTotal); + assert( nTotal>=pLvl->nMerge ); + pLvl->nSeg = nTotal; + pLvl->aSeg = (Fts5StructureSegment*)pSpace; + pSpace += ((nTotal+1) * sizeof(Fts5StructureSegment)); + + for(iSeg=0; iSegaSeg[iSeg].iSegid); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); + } + } + pRet->aLevel[nLevel].aSeg = (Fts5StructureSegment*)pSpace; + }else{ + rc = SQLITE_NOMEM; + } + + *ppOut = pRet; + return rc; +} + +/* +** Read, deserialize and return the structure record for index iIdx. +** +** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array +** are over-allocated as described for function fts5StructureDecode() +** above. +** +** If an error occurs, NULL is returned and an error code left in the +** Fts5Index handle. If an error has already occurred when this function +** is called, it is a no-op. +*/ +static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ + Fts5Config *pConfig = p->pConfig; + Fts5Structure *pRet = 0; /* Object to return */ + Fts5Data *pData; /* %_data entry containing structure record */ + + assert( iIdx<=pConfig->nPrefix ); + pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID(iIdx)); + if( !pData ) return 0; + p->rc = fts5StructureDecode(pData->p, pData->n, &pRet); + + fts5DataRelease(pData); + return pRet; +} + +/* +** Release a reference to an Fts5Structure object returned by an earlier +** call to fts5StructureRead() or fts5StructureDecode(). +*/ +static void fts5StructureRelease(Fts5Structure *pStruct){ + sqlite3_free(pStruct); +} + +/* +** Return the total number of segments in index structure pStruct. +*/ +static int fts5StructureCountSegments(Fts5Structure *pStruct){ + int nSegment = 0; /* Total number of segments */ + int iLvl; /* Used to iterate through levels */ + + for(iLvl=0; iLvlnLevel; iLvl++){ + nSegment += pStruct->aLevel[iLvl].nSeg; + } + + return nSegment; +} + +/* +** Serialize and store the "structure" record for index iIdx. +** +** If an error occurs, leave an error code in the Fts5Index object. If an +** error has already occurred, this function is a no-op. +*/ +static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ + int nSegment; /* Total number of segments */ + Fts5Buffer buf; /* Buffer to serialize record into */ + int iLvl; /* Used to iterate through levels */ + + nSegment = fts5StructureCountSegments(pStruct); + memset(&buf, 0, sizeof(Fts5Buffer)); + fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); + fts5BufferAppendVarint(&p->rc, &buf, nSegment); + fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); + + for(iLvl=0; iLvlnLevel; iLvl++){ + int iSeg; /* Used to iterate through segments */ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); + + for(iSeg=0; iSegnSeg; iSeg++){ + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); + } + } + + fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n); + fts5BufferFree(&buf); +} + + +/* +** Load the next leaf page into the segment iterator. +*/ +static void fts5SegIterNextPage( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter /* Iterator to advance to next page */ +){ + Fts5StructureSegment *pSeg = pIter->pSeg; + if( pIter->pLeaf ) fts5DataRelease(pIter->pLeaf); + if( pIter->iLeafPgnopgnoLast ){ + pIter->iLeafPgno++; + pIter->pLeaf = fts5DataRead(p, + FTS5_SEGMENT_ROWID(pIter->iIdx, pSeg->iSegid, 0, pIter->iLeafPgno) + ); + }else{ + pIter->pLeaf = 0; + } +} + +static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ + u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ + int iOff = pIter->iLeafOffset; /* Offset to read at */ + int nNew; /* Bytes of new data */ + + iOff += getVarint32(&a[iOff], nNew); + pIter->term.n = nKeep; + fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); + iOff += nNew; + pIter->iTermLeafOffset = iOff; + pIter->iTermLeafPgno = pIter->iLeafPgno; + if( iOff>=pIter->pLeaf->n ){ + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ){ + if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; + return; + } + iOff = 4; + a = pIter->pLeaf->p; + } + iOff += sqlite3GetVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; +} + +/* +** Initialize the iterator object pIter to iterate through the entries in +** segment pSeg within index iIdx. The iterator is left pointing to the +** first entry when this function returns. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterInit( + Fts5Index *p, + int iIdx, /* Config.aHash[] index of FTS index */ + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + + if( p->rc==SQLITE_OK ){ + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->iIdx = iIdx; + pIter->iLeafPgno = pSeg->pgnoFirst-1; + fts5SegIterNextPage(p, pIter); + } + + if( p->rc==SQLITE_OK ){ + u8 *a = pIter->pLeaf->p; + pIter->iLeafOffset = fts5GetU16(&a[2]); + fts5SegIterLoadTerm(p, pIter, 0); + } +} + +/* +** Advance iterator pIter to the next entry. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. It +** is not considered an error if the iterator reaches EOF. If an error has +** already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterNext( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter /* Iterator to advance */ +){ + if( p->rc==SQLITE_OK ){ + Fts5Data *pLeaf = pIter->pLeaf; + int iOff; + int bNewTerm = 0; + int nKeep = 0; + + /* Search for the end of the position list within the current page. */ + u8 *a = pLeaf->p; + int n = pLeaf->n; + for(iOff=pIter->iLeafOffset; iOffiLeafOffset = iOff; + if( iDelta==0 ){ + bNewTerm = 1; + if( iOff>=n ){ + fts5SegIterNextPage(p, pIter); + pIter->iLeafOffset = 4; + }else if( iOff!=fts5GetU16(&a[2]) ){ + pIter->iLeafOffset += getVarint32(&a[iOff], nKeep); + } + }else{ + pIter->iRowid -= iDelta; + } + }else{ + iOff = 0; + /* Next entry is not on the current page */ + while( iOff==0 ){ + fts5SegIterNextPage(p, pIter); + pLeaf = pIter->pLeaf; + if( pLeaf==0 ) break; + if( (iOff = fts5GetU16(&pLeaf->p[0])) ){ + iOff += sqlite3GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ + pIter->iLeafOffset = iOff; + bNewTerm = 1; + } + } + } + + /* Check if the iterator is now at EOF. If so, return early. */ + if( pIter->pLeaf==0 ) return; + if( bNewTerm ){ + fts5SegIterLoadTerm(p, pIter, nKeep); + } + } +} + +/* +** Zero the iterator passed as the only argument. +*/ +static void fts5SegIterClear(Fts5SegIter *pIter){ + fts5BufferFree(&pIter->term); + fts5DataRelease(pIter->pLeaf); + memset(pIter, 0, sizeof(Fts5SegIter)); +} + +/* +** Do the comparison necessary to populate pIter->aFirst[iOut]. +** +** If the returned value is non-zero, then it is the index of an entry +** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing +** to a key that is a duplicate of another, higher priority, +** segment-iterator in the pSeg->aSeg[] array. +*/ +static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ + int i1; /* Index of left-hand Fts5SegIter */ + int i2; /* Index of right-hand Fts5SegIter */ + int iRes; + Fts5SegIter *p1; /* Left-hand Fts5SegIter */ + Fts5SegIter *p2; /* Right-hand Fts5SegIter */ + + assert( iOutnSeg && iOut>0 ); + + if( iOut>=(pIter->nSeg/2) ){ + i1 = (iOut - pIter->nSeg/2) * 2; + i2 = i1 + 1; + }else{ + i1 = pIter->aFirst[iOut*2]; + i2 = pIter->aFirst[iOut*2+1]; + } + p1 = &pIter->aSeg[i1]; + p2 = &pIter->aSeg[i2]; + + if( p1->pLeaf==0 ){ /* If p1 is at EOF */ + iRes = i2; + }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ + iRes = i1; + }else{ + int res = fts5BufferCompare(&p1->term, &p2->term); + if( res==0 ){ + assert( i2>i1 ); + assert( i2!=0 ); + if( p1->iRowid==p2->iRowid ) return i2; + res = (p1->iRowid > p2->iRowid) ? -1 : +1; + } + assert( res!=0 ); + if( res<0 ){ + iRes = i1; + }else{ + iRes = i2; + } + } + + pIter->aFirst[iOut] = iRes; + return 0; +} + +/* +** Free the iterator object passed as the second argument. +*/ +static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( pIter ){ + int i; + for(i=0; inSeg; i++){ + fts5SegIterClear(&pIter->aSeg[i]); + } + sqlite3_free(pIter); + } +} + +static void fts5MultiIterAdvanced( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged, /* Index of sub-iterator just advanced */ + int iMinset /* Minimum entry in aFirst[] to set */ +){ + int i; + for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ + fts5SegIterNext(p, &pIter->aSeg[iEq]); + i = pIter->nSeg + iEq; + } + } +} + +/* +** Move the iterator to the next entry. +** +** If an error occurs, an error code is left in Fts5Index.rc. It is not +** considered an error if the iterator reaches EOF, or if it is already at +** EOF when this function is called. +*/ +static void fts5MultiIterNext(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( p->rc==SQLITE_OK ){ + int iFirst = pIter->aFirst[1]; + fts5SegIterNext(p, &pIter->aSeg[iFirst]); + fts5MultiIterAdvanced(p, pIter, iFirst, 1); + } +} + +/* +** Allocate a new Fts5MultiSegIter object. +** +** The new object will be used to iterate through data in structure pStruct. +** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel +** is zero or greater, data from the first nSegment segments on level iLevel +** is merged. +** +** The iterator initially points to the first term/rowid entry in the +** iterated data. +*/ +static void fts5MultiIterNew( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5Structure *pStruct, /* Structure of specific index */ + int iIdx, /* Config.aHash[] index of FTS index */ + int iLevel, /* Level to iterate (-1 for all) */ + int nSegment, /* Number of segments to merge (iLevel>=0) */ + Fts5MultiSegIter **ppOut /* New object */ +){ + int nSeg; /* Number of segments merged */ + int nSlot; /* Power of two >= nSeg */ + int iIter = 0; /* */ + int iSeg; /* Used to iterate through segments */ + Fts5StructureLevel *pLvl; + Fts5MultiSegIter *pNew; + + /* Allocate space for the new multi-seg-iterator. */ + if( iLevel<0 ){ + nSeg = fts5StructureCountSegments(pStruct); + }else{ + nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); + } + for(nSlot=2; nSlotaSeg[] */ + sizeof(u16) * nSlot /* pNew->aFirst[] */ + ); + if( pNew==0 ) return; + pNew->nSeg = nSlot; + pNew->aSeg = (Fts5SegIter*)&pNew[1]; + pNew->aFirst = (u16*)&pNew->aSeg[nSlot]; + + /* Initialize each of the component segment iterators. */ + if( iLevel<0 ){ + Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; + for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ + fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + } + } + }else{ + pLvl = &pStruct->aLevel[iLevel]; + for(iSeg=nSeg-1; iSeg>=0; iSeg--){ + fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + } + } + assert( iIter==nSeg ); + + /* If the above was successful, each component iterators now points + ** to the first entry in its segment. In this case initialize the + ** aFirst[] array. Or, if an error has occurred, free the iterator + ** object and set the output variable to NULL. */ + if( p->rc==SQLITE_OK ){ + for(iIter=nSlot-1; iIter>0; iIter--){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ + fts5SegIterNext(p, &pNew->aSeg[iEq]); + fts5MultiIterAdvanced(p, pNew, iEq, iIter); + } + } + }else{ + fts5MultiIterFree(p, pNew); + *ppOut = 0; + } +} + +/* +** Return true if the iterator is at EOF or if an error has occurred. +** False otherwise. +*/ +static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ + return (p->rc || pIter->aSeg[ pIter->aFirst[1] ].pLeaf==0); +} + +/* +** Return the rowid of the entry that the iterator currently points +** to. If the iterator points to EOF when this function is called the +** results are undefined. +*/ +static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ + return pIter->aSeg[ pIter->aFirst[1] ].iRowid; +} + +/* +** Return a pointer to a buffer containing the term associated with the +** entry that the iterator currently points to. +*/ +static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ + Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1] ]; + *pn = p->term.n; + return p->term.p; +} + +/* +** Read and return the next 32-bit varint from the position-list iterator +** passed as the second argument. +** +** If an error occurs, zero is returned an an error code left in +** Fts5Index.rc. If an error has already occurred when this function is +** called, it is a no-op. +*/ +static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ + int iVal = 0; + if( p->rc==SQLITE_OK ){ + int iOff = pIter->iLeafOffset; + if( iOff < pIter->pLeaf->n ){ + pIter->iLeafOffset += getVarint32(&pIter->pLeaf->p[iOff], iVal); + }else{ + fts5DataRelease(pIter->pLeaf); + pIter->iLeafRowid++; + pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); + if( pIter->pLeaf ){ + pIter->iLeafOffset = 4 + getVarint32(&pIter->pLeaf->p[4], iVal); + } + } + } + return iVal; +} + +/* +** Advance the position list iterator to the next entry. +*/ +static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){ + int iVal; + iVal = fts5PosIterReadVarint(p, pIter); + if( iVal==0 ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + } + else if( iVal==1 ){ + pIter->iCol = fts5PosIterReadVarint(p, pIter); + pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; + }else{ + pIter->iPos += (iVal - 2); + } +} + +/* +** Initialize the Fts5PosIter object passed as the final argument to iterate +** through the position-list associated with the index entry that iterator +** pMulti currently points to. +*/ +static void fts5PosIterInit( + Fts5Index *p, /* FTS5 backend object */ + Fts5MultiSegIter *pMulti, /* Multi-seg iterator to read pos-list from */ + Fts5PosIter *pIter /* Initialize this object */ +){ + if( p->rc==SQLITE_OK ){ + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + int iId = pSeg->pSeg->iSegid; + + memset(pIter, 0, sizeof(*pIter)); + pIter->pLeaf = pSeg->pLeaf; + pIter->iLeafOffset = pSeg->iLeafOffset; + pIter->iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); + fts5DataReference(pIter->pLeaf); + fts5PosIterNext(p, pIter); + } +} + +/* +** Return true if the position iterator passed as the second argument is +** at EOF. Or if an error has already occurred. Otherwise, return false. +*/ +static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ + return (p->rc || pIter->pLeaf==0); +} + + +/* +** Allocate memory. The difference between this function and fts5IdxMalloc() +** is that this increments the Fts5Index.nPendingData variable by the +** number of bytes allocated. It should be used for all allocations used +** to store pending-data within the in-memory hash tables. +*/ +static void *fts5PendingMalloc(Fts5Index *p, int nByte){ + p->nPendingData += nByte; + return fts5IdxMalloc(p, nByte); +} + +/* +** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken) +** in hash table for index iIdx. If iIdx is zero, this is the main terms +** index. Values of 1 and greater for iIdx are prefix indexes. +** +** If an OOM error is encountered, set the Fts5Index.rc error code +** accordingly. +*/ +static void fts5AddTermToHash( + Fts5Index *p, /* Index object to write to */ + int iIdx, /* Entry in p->aHash[] to update */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + Fts5Config *pConfig = p->pConfig; + Fts3Hash *pHash; + Fts5PendingDoclist *pDoclist; + Fts5PendingPoslist *pPoslist; + i64 iRowid = p->iWriteRowid; /* Rowid associated with these tokens */ + + /* If an error has already occured this call is a no-op. */ + if( p->rc!=SQLITE_OK ) return; + + /* Find the hash table to use. It has already been allocated. */ + assert( iIdx<=pConfig->nPrefix ); + assert( iIdx==0 || nToken==pConfig->aPrefix[iIdx-1] ); + pHash = &p->aHash[iIdx]; + + /* Find the doclist to append to. Allocate a new doclist object if + ** required. */ + pDoclist = (Fts5PendingDoclist*)fts3HashFind(pHash, pToken, nToken); + if( pDoclist==0 ){ + Fts5PendingDoclist *pDel; + pDoclist = fts5PendingMalloc(p, sizeof(Fts5PendingDoclist) + nToken); + if( pDoclist==0 ) return; + pDoclist->pTerm = (u8*)&pDoclist[1]; + pDoclist->nTerm = nToken; + memcpy(pDoclist->pTerm, pToken, nToken); + pDel = fts3HashInsert(pHash, pDoclist->pTerm, nToken, pDoclist); + if( pDel ){ + assert( pDoclist==pDel ); + sqlite3_free(pDel); + p->rc = SQLITE_NOMEM; + return; + } + } + + /* Find the poslist to append to. Allocate a new object if required. */ + pPoslist = pDoclist->pPoslist; + if( pPoslist==0 || pPoslist->iRowid!=iRowid ){ + pPoslist = fts5PendingMalloc(p, sizeof(Fts5PendingPoslist)); + if( pPoslist==0 ) return; + pPoslist->pNext = pDoclist->pPoslist; + pPoslist->iRowid = iRowid; + pDoclist->pPoslist = pPoslist; + pDoclist->iCol = 0; + pDoclist->iPos = 0; + } + + /* Append the values to the position list. */ + if( iCol>=0 ){ + p->nPendingData -= pPoslist->buf.nSpace; + if( iCol!=pDoclist->iCol ){ + fts5BufferAppendVarint(&p->rc, &pPoslist->buf, 1); + fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iCol); + pDoclist->iCol = iCol; + pDoclist->iPos = 0; + } + fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iPos + 2 - pDoclist->iPos); + p->nPendingData += pPoslist->buf.nSpace; + pDoclist->iPos = iPos; + } +} + +/* +** Free the pending-doclist object passed as the only argument. +*/ +static void fts5FreePendingDoclist(Fts5PendingDoclist *p){ + Fts5PendingPoslist *pPoslist; + Fts5PendingPoslist *pNext; + for(pPoslist=p->pPoslist; pPoslist; pPoslist=pNext){ + pNext = pPoslist->pNext; + fts5BufferFree(&pPoslist->buf); + sqlite3_free(pPoslist); + } + sqlite3_free(p); +} + +/* +** Insert or remove data to or from the index. Each time a document is +** added to or removed from the index, this function is called one or more +** times. +** +** For an insert, it must be called once for each token in the new document. +** If the operation is a delete, it must be called (at least) once for each +** unique token in the document with an iCol value less than zero. The iPos +** argument is ignored for a delete. +*/ +void sqlite3Fts5IndexWrite( + Fts5Index *p, /* Index to write to */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + int i; /* Used to iterate through indexes */ + Fts5Config *pConfig = p->pConfig; + + /* If an error has already occured this call is a no-op. */ + if( p->rc!=SQLITE_OK ) return; + + /* Allocate hash tables if they have not already been allocated */ + if( p->aHash==0 ){ + int nHash = pConfig->nPrefix + 1; + p->aHash = (Fts3Hash*)sqlite3_malloc(sizeof(Fts3Hash) * nHash); + if( p->aHash==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + for(i=0; iaHash[i], FTS3_HASH_STRING, 0); + } + } + } + + /* Add the new token to the main terms hash table. And to each of the + ** prefix hash tables that it is large enough for. */ + fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); + for(i=0; inPrefix; i++){ + if( nToken>=pConfig->aPrefix[i] ){ + fts5AddTermToHash(p, i+1, iCol, iPos, pToken, pConfig->aPrefix[i]); + } + } +} + +/* +** Allocate a new segment-id for the structure pStruct. +** +** If an error has already occurred, this function is a no-op. 0 is +** returned in this case. +*/ +static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ + int i; + if( p->rc!=SQLITE_OK ) return 0; + + for(i=0; i<100; i++){ + int iSegid; + sqlite3_randomness(sizeof(int), (void*)&iSegid); + iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1); + if( iSegid ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ + iSegid = 0; + } + } + } + } + if( iSegid ) return iSegid; + } + + p->rc = SQLITE_ERROR; + return 0; +} + +static Fts5PendingDoclist *fts5PendingMerge( + Fts5Index *p, + Fts5PendingDoclist *pLeft, + Fts5PendingDoclist *pRight +){ + Fts5PendingDoclist *p1 = pLeft; + Fts5PendingDoclist *p2 = pRight; + Fts5PendingDoclist *pRet = 0; + Fts5PendingDoclist **ppOut = &pRet; + + while( p1 || p2 ){ + if( p1==0 ){ + *ppOut = p2; + p2 = 0; + }else if( p2==0 ){ + *ppOut = p1; + p1 = 0; + }else{ + int nCmp = MIN(p1->nTerm, p2->nTerm); + int res = memcmp(p1->pTerm, p2->pTerm, nCmp); + if( res==0 ) res = p1->nTerm - p2->nTerm; + + if( res>0 ){ + /* p2 is smaller */ + *ppOut = p2; + ppOut = &p2->pNext; + p2 = p2->pNext; + }else{ + /* p1 is smaller */ + *ppOut = p1; + ppOut = &p1->pNext; + p1 = p1->pNext; + } + *ppOut = 0; + } + } + + return pRet; +} + +/* +** Extract all tokens from hash table iHash and link them into a list +** in sorted order. The hash table is cleared before returning. It is +** the responsibility of the caller to free the elements of the returned +** list. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){ + const int nMergeSlot = 32; + Fts3Hash *pHash; + Fts3HashElem *pE; /* Iterator variable */ + Fts5PendingDoclist **ap; + Fts5PendingDoclist *pList; + int i; + + ap = fts5IdxMalloc(p, sizeof(Fts5PendingDoclist*) * nMergeSlot); + if( !ap ) return 0; + + pHash = &p->aHash[iHash]; + for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ + int i; + Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE); + assert( pDoclist->pNext==0 ); + for(i=0; ap[i]; i++){ + pDoclist = fts5PendingMerge(p, pDoclist, ap[i]); + ap[i] = 0; + } + ap[i] = pDoclist; + } + + pList = 0; + for(i=0; iiOff offset currently points to an entry indicating one +** or more term-less nodes, advance past it and set pIter->nEmpty to +** the number of empty child nodes. +*/ +static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ + if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ + pIter->iOff++; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); + }else{ + pIter->nEmpty = 0; + } +} + +/* +** Advance to the next entry within the node. +*/ +static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ + if( pIter->iOff>=pIter->nData ){ + pIter->aData = 0; + pIter->iChild += pIter->nEmpty; + }else{ + int nPre, nNew; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nPre); + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nNew); + pIter->term.n = nPre-2; + fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); + pIter->iOff += nNew; + pIter->iChild += (1 + pIter->nEmpty); + fts5NodeIterGobbleNEmpty(pIter); + if( *pRc ) pIter->aData = 0; + } +} + + +/* +** Initialize the iterator object pIter to iterate through the internal +** segment node in pData. +*/ +static void fts5NodeIterInit(int nData, const u8 *aData, Fts5NodeIter *pIter){ + memset(pIter, 0, sizeof(*pIter)); + pIter->aData = aData; + pIter->nData = nData; + pIter->iOff = getVarint32(aData, pIter->iChild); + fts5NodeIterGobbleNEmpty(pIter); +} + +/* +** Free any memory allocated by the iterator object. +*/ +static void fts5NodeIterFree(Fts5NodeIter *pIter){ + fts5BufferFree(&pIter->term); +} + + +/* +** This is called once for each leaf page except the first that contains +** at least one term. Argument (nTerm/pTerm) is the split-key - a term that +** is larger than all terms written to earlier leaves, and equal to or +** smaller than the first term on the new leaf. +** +** If an error occurs, an error code is left in Fts5Index.rc. If an error +** has already occurred when this function is called, it is a no-op. +*/ +static void fts5WriteBtreeTerm( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Writer object */ + int nTerm, const u8 *pTerm /* First term on new page */ +){ + int iHeight; + for(iHeight=1; 1; iHeight++){ + Fts5PageWriter *pPage; + + if( iHeight>=pWriter->nWriter ){ + Fts5PageWriter *aNew; + Fts5PageWriter *pNew; + int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); + aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); + if( aNew==0 ) return; + + pNew = &aNew[pWriter->nWriter]; + memset(pNew, 0, sizeof(Fts5PageWriter)); + pNew->pgno = 1; + fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); + + pWriter->nWriter++; + pWriter->aWriter = aNew; + } + pPage = &pWriter->aWriter[iHeight]; + + if( pWriter->nEmpty ){ + assert( iHeight==1 ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, 0); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->nEmpty); + pWriter->nEmpty = 0; + } + + if( pPage->buf.n>=p->pgsz ){ + /* pPage will be written to disk. The term will be written into the + ** parent of pPage. */ + i64 iRowid = FTS5_SEGMENT_ROWID( + pWriter->iIdx, pWriter->iSegid, iHeight, pPage->pgno + ); + fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); + fts5BufferZero(&pPage->buf); + fts5BufferZero(&pPage->term); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno); + pPage->pgno++; + }else{ + int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre); + fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre); + fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + break; + } + } +} + +static void fts5WriteBtreeNoTerm( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter /* Writer object */ +){ + pWriter->nEmpty++; +} + +static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ + static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + i64 iRowid; + + if( pPage->term.n==0 ){ + /* No term was written to this page. */ + fts5WriteBtreeNoTerm(p, pWriter); + } + + /* Write the current page to the db. */ + iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, 0, pPage->pgno); + fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); + + /* Initialize the next page. */ + fts5BufferZero(&pPage->buf); + fts5BufferZero(&pPage->term); + fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + pPage->pgno++; + + /* Increase the leaves written counter */ + pWriter->nLeafWritten++; +} + +/* +** Append term pTerm/nTerm to the segment being written by the writer passed +** as the second argument. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5WriteAppendTerm( + Fts5Index *p, + Fts5SegWriter *pWriter, + int nTerm, const u8 *pTerm +){ + int nPrefix; /* Bytes of prefix compression for term */ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + + assert( pPage->buf.n==0 || pPage->buf.n>4 ); + if( pPage->buf.n==0 ){ + /* Zero the first term and first docid fields */ + static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; + fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + assert( pPage->term.n==0 ); + } + if( p->rc ) return; + + if( pPage->term.n==0 ){ + /* Update the "first term" field of the page header. */ + assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); + fts5PutU16(&pPage->buf.p[2], pPage->buf.n); + nPrefix = 0; + if( pWriter->aWriter[0].pgno!=1 ){ + fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm); + pPage = &pWriter->aWriter[0]; + } + }else{ + nPrefix = fts5PrefixCompress( + pPage->term.n, pPage->term.p, nTerm, pTerm + ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); + } + + /* Append the number of bytes of new data, then the term data itself + ** to the page. */ + fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); + fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); + + /* Update the Fts5PageWriter.term field. */ + fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + + pWriter->bFirstRowidInPage = 0; + pWriter->bFirstRowidInDoclist = 1; + + /* If the current leaf page is full, flush it to disk. */ + if( pPage->buf.n>=p->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } +} + +/* +** Append a docid to the writers output. +*/ +static void fts5WriteAppendRowid( + Fts5Index *p, + Fts5SegWriter *pWriter, + i64 iRowid +){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + + /* If this is to be the first docid written to the page, set the + ** docid-pointer in the page-header. */ + if( pWriter->bFirstRowidInPage ) fts5PutU16(pPage->buf.p, pPage->buf.n); + + /* Write the docid. */ + if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ + fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); + }else{ + assert( iRowidiPrevRowid ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->iPrevRowid - iRowid); + } + pWriter->iPrevRowid = iRowid; + pWriter->bFirstRowidInDoclist = 0; + pWriter->bFirstRowidInPage = 0; + + if( pPage->buf.n>=p->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } +} + +static void fts5WriteAppendPoslistInt( + Fts5Index *p, + Fts5SegWriter *pWriter, + int iVal +){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); + if( pPage->buf.n>=p->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } +} + +static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ + fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); +} + +/* +** Write the contents of pending-doclist object pDoclist to writer pWriter. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5WritePendingDoclist( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Write to this writer object */ + Fts5PendingDoclist *pDoclist /* Doclist to write to pWriter */ +){ + Fts5PendingPoslist *pPoslist; /* Used to iterate through the doclist */ + + /* Append the term */ + fts5WriteAppendTerm(p, pWriter, pDoclist->nTerm, pDoclist->pTerm); + + /* Append the position list for each rowid */ + for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){ + int i = 0; + + /* Append the rowid itself */ + fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid); + + /* Copy the position list to the output segment */ + while( ibuf.n){ + int iVal; + i += getVarint32(&pPoslist->buf.p[i], iVal); + fts5WriteAppendPoslistInt(p, pWriter, iVal); + } + + /* Write the position list terminator */ + fts5WriteAppendZerobyte(p, pWriter); + } + + /* Write the doclist terminator */ + fts5WriteAppendZerobyte(p, pWriter); +} + +static void fts5WriteFinish( + Fts5Index *p, + Fts5SegWriter *pWriter, + int *pnHeight, + int *pnLeaf +){ + int i; + *pnLeaf = pWriter->aWriter[0].pgno; + *pnHeight = pWriter->nWriter; + fts5WriteFlushLeaf(p, pWriter); + if( pWriter->nWriter>1 && pWriter->nEmpty ){ + Fts5PageWriter *pPg = &pWriter->aWriter[1]; + fts5BufferAppendVarint(&p->rc, &pPg->buf, 0); + fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); + } + for(i=1; inWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno); + fts5DataWrite(p, iRow, pPg->buf.p, pPg->buf.n); + } + for(i=0; inWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + fts5BufferFree(&pPg->term); + fts5BufferFree(&pPg->buf); + } + sqlite3_free(pWriter->aWriter); +} + +static void fts5WriteInit( + Fts5Index *p, + Fts5SegWriter *pWriter, + int iIdx, int iSegid +){ + memset(pWriter, 0, sizeof(Fts5SegWriter)); + pWriter->iIdx = iIdx; + pWriter->iSegid = iSegid; + + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); + if( pWriter->aWriter==0 ) return; + pWriter->nWriter = 1; + pWriter->aWriter[0].pgno = 1; +} + +static void fts5WriteInitForAppend( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Writer to initialize */ + int iIdx, /* Index segment is a part of */ + Fts5StructureSegment *pSeg /* Segment object to append to */ +){ + int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); + memset(pWriter, 0, sizeof(Fts5SegWriter)); + pWriter->iIdx = iIdx; + pWriter->iSegid = pSeg->iSegid; + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); + pWriter->nWriter = pSeg->nHeight; + + if( p->rc==SQLITE_OK ){ + int pgno = 1; + int i; + pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; + for(i=pSeg->nHeight-1; i>0; i--){ + i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pgno); + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + pPg->pgno = pgno; + fts5DataBuffer(p, &pPg->buf, iRowid); + if( p->rc==SQLITE_OK ){ + Fts5NodeIter ss; + fts5NodeIterInit(pPg->buf.n, pPg->buf.p, &ss); + while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); + fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); + pgno = ss.iChild; + fts5NodeIterFree(&ss); + } + } + if( pSeg->nHeight==1 ){ + pWriter->nEmpty = pSeg->pgnoLast-1; + } + assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); + } +} + +/* +** Iterator pIter was used to iterate through the input segments of on an +** incremental merge operation. This function is called if the incremental +** merge step has finished but the input has not been completely exhausted. +*/ +static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ + int i; + Fts5Buffer buf; + memset(&buf, 0, sizeof(Fts5Buffer)); + for(i=0; inSeg; i++){ + Fts5SegIter *pSeg = &pIter->aSeg[i]; + if( pSeg->pSeg==0 ){ + /* no-op */ + }else if( pSeg->pLeaf==0 ){ + pSeg->pSeg->pgnoLast = 0; + pSeg->pSeg->pgnoFirst = 0; + }else{ + int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ + i64 iLeafRowid; + Fts5Data *pData; + int iId = pSeg->pSeg->iSegid; + u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; + + iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iTermLeafPgno); + pData = fts5DataRead(p, iLeafRowid); + if( pData ){ + fts5BufferZero(&buf); + fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); + fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); + fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); + fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); + fts5DataRelease(pData); + pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; + fts5DataDelete(p, FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, 1),iLeafRowid); + fts5DataWrite(p, iLeafRowid, buf.p, buf.n); + } + } + } + fts5BufferFree(&buf); +} + +/* +** +*/ +static void fts5IndexMergeLevel( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure *pStruct, /* Stucture of index iIdx */ + int iLvl, /* Level to read input from */ + int *pnRem /* Write up to this many output leaves */ +){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1]; + Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ + int nRem = *pnRem; /* Output leaf pages left to write */ + int nInput; /* Number of input segments */ + Fts5SegWriter writer; /* Writer object */ + Fts5StructureSegment *pSeg; /* Output segment */ + Fts5Buffer term; + int bRequireDoclistTerm = 0; + + assert( iLvlnLevel ); + assert( pLvl->nMerge<=pLvl->nSeg ); + + memset(&writer, 0, sizeof(Fts5SegWriter)); + memset(&term, 0, sizeof(Fts5Buffer)); + writer.iIdx = iIdx; + if( pLvl->nMerge ){ + assert( pLvlOut->nSeg>0 ); + nInput = pLvl->nMerge; + fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); + pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; + }else{ + int iSegid = fts5AllocateSegid(p, pStruct); + fts5WriteInit(p, &writer, iIdx, iSegid); + + /* Add the new segment to the output level */ + if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++; + pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; + pLvlOut->nSeg++; + pSeg->pgnoFirst = 1; + pSeg->iSegid = iSegid; + + /* Read input from all segments in the input level */ + nInput = pLvl->nSeg; + } +#if 0 +fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); +fflush(stdout); +#endif + + for(fts5MultiIterNew(p, pStruct, iIdx, iLvl, nInput, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter) + ){ + Fts5PosIter sPos; /* Used to iterate through position list */ + int iCol = 0; /* Current output column */ + int iPos = 0; /* Current output position */ + int nTerm; + const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); + + if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ + if( writer.nLeafWritten>nRem ) break; + + /* This is a new term. Append a term to the output segment. */ + if( bRequireDoclistTerm ){ + fts5WriteAppendZerobyte(p, &writer); + } + fts5WriteAppendTerm(p, &writer, nTerm, pTerm); + fts5BufferSet(&p->rc, &term, nTerm, pTerm); + bRequireDoclistTerm = 1; + } + + /* Append the rowid to the output */ + fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); + + /* Copy the position list from input to output */ + for(fts5PosIterInit(p, pIter, &sPos); + fts5PosIterEof(p, &sPos)==0; + fts5PosIterNext(p, &sPos) + ){ + if( sPos.iCol!=iCol ){ + fts5WriteAppendPoslistInt(p, &writer, 1); + fts5WriteAppendPoslistInt(p, &writer, sPos.iCol); + iCol = sPos.iCol; + iPos = 0; + } + fts5WriteAppendPoslistInt(p, &writer, (sPos.iPos-iPos) + 2); + iPos = sPos.iPos; + } + fts5WriteAppendZerobyte(p, &writer); + } + + /* Flush the last leaf page to disk. Set the output segment b-tree height + ** and last leaf page number at the same time. */ + fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); + + if( fts5MultiIterEof(p, pIter) ){ + int i; + + /* Remove the redundant segments from the %_data table */ + for(i=0; iaSeg[i].iSegid); + } + + /* Remove the redundant segments from the input level */ + if( pLvl->nSeg!=nInput ){ + int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); + memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); + } + pLvl->nSeg -= nInput; + pLvl->nMerge = 0; + }else{ + fts5TrimSegments(p, pIter); + pLvl->nMerge = nInput; + } + + fts5MultiIterFree(p, pIter); + fts5BufferFree(&term); + *pnRem -= writer.nLeafWritten; +} + +/* +** A total of nLeaf leaf pages of data has just been flushed to a level-0 +** segments in index iIdx with structure pStruct. This function updates the +** write-counter accordingly and, if necessary, performs incremental merge +** work. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5IndexWork( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure *pStruct, /* Current structure of index */ + int nLeaf /* Number of output leaves just written */ +){ + i64 nWrite; /* Initial value of write-counter */ + int nWork; /* Number of work-quanta to perform */ + int nRem; /* Number of leaf pages left to write */ + + /* Update the write-counter. While doing so, set nWork. */ + nWrite = pStruct->nWriteCounter; + nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); + pStruct->nWriteCounter += nLeaf; + nRem = p->nWorkUnit * nWork * pStruct->nLevel; + + while( nRem>0 ){ + int iLvl; /* To iterate through levels */ + int iBestLvl = -1; /* Level offering the most input segments */ + int nBest = 0; /* Number of input segments on best level */ + + /* Set iBestLvl to the level to read input segments from. */ + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + if( pLvl->nMerge ){ + if( pLvl->nMerge>nBest ){ + iBestLvl = iLvl; + nBest = pLvl->nMerge; + } + break; + } + if( pLvl->nSeg>nBest ){ + nBest = pLvl->nSeg; + iBestLvl = iLvl; + } + } + assert( iBestLvl>=0 && nBest>0 ); + + if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; + fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); + assert( nRem==0 || p->rc==SQLITE_OK ); + } +} + +/* +** Flush the contents of in-memory hash table iHash to a new level-0 +** segment on disk. Also update the corresponding structure record. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ + Fts5Structure *pStruct; + int iSegid; + int pgnoLast = 0; /* Last leaf page number in segment */ + + /* Obtain a reference to the index structure and allocate a new segment-id + ** for the new level-0 segment. */ + pStruct = fts5StructureRead(p, iHash); + iSegid = fts5AllocateSegid(p, pStruct); + + if( iSegid ){ + Fts5SegWriter writer; + Fts5PendingDoclist *pList; + Fts5PendingDoclist *pIter; + Fts5PendingDoclist *pNext; + + Fts5StructureSegment *pSeg; /* New segment within pStruct */ + int nHeight; /* Height of new segment b-tree */ + + pList = fts5PendingList(p, iHash); + assert( pList!=0 || p->rc!=SQLITE_OK ); + fts5WriteInit(p, &writer, iHash, iSegid); + + for(pIter=pList; pIter; pIter=pNext){ + pNext = pIter->pNext; + fts5WritePendingDoclist(p, &writer, pIter); + fts5FreePendingDoclist(pIter); + } + fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); + + /* Edit the Fts5Structure and write it back to the database. */ + if( pStruct->nLevel==0 ) pStruct->nLevel = 1; + pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; + pSeg->iSegid = iSegid; + pSeg->nHeight = nHeight; + pSeg->pgnoFirst = 1; + pSeg->pgnoLast = pgnoLast; + } + + fts5IndexWork(p, iHash, pStruct, pgnoLast); + fts5StructureWrite(p, iHash, pStruct); + fts5StructureRelease(pStruct); +} + +/* +** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain +** to the document with rowid iRowid. +*/ +void sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ + if( iRowid<=p->iWriteRowid ){ + sqlite3Fts5IndexFlush(p); + } + p->iWriteRowid = iRowid; +} + +/* +** Flush any data stored in the in-memory hash tables to the database. +*/ +void sqlite3Fts5IndexFlush(Fts5Index *p){ + Fts5Config *pConfig = p->pConfig; + int i; /* Used to iterate through indexes */ + int nLeaf = 0; /* Number of leaves written */ + + /* If an error has already occured this call is a no-op. */ + if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; + assert( p->aHash ); + + /* Flush the terms and each prefix index to disk */ + for(i=0; i<=pConfig->nPrefix; i++){ + fts5FlushOneHash(p, i, &nLeaf); + } + p->nPendingData = 0; +} + +/* +** Commit data to disk. +*/ +int sqlite3Fts5IndexSync(Fts5Index *p){ + sqlite3Fts5IndexFlush(p); + fts5CloseReader(p); + return p->rc; +} + +/* +** Discard any data stored in the in-memory hash tables. Do not write it +** to the database. Additionally, assume that the contents of the %_data +** table may have changed on disk. So any in-memory caches of %_data +** records must be invalidated. +*/ +int sqlite3Fts5IndexRollback(Fts5Index *p){ + fts5CloseReader(p); + return SQLITE_OK; +} + +/* +** Open a new Fts5Index handle. If the bCreate argument is true, create +** and initialize the underlying %_data table. +** +** If successful, set *pp to point to the new object and return SQLITE_OK. +** Otherwise, set *pp to NULL and return an SQLite error code. +*/ +int sqlite3Fts5IndexOpen( + Fts5Config *pConfig, + int bCreate, + Fts5Index **pp, + char **pzErr +){ + int rc = SQLITE_OK; + Fts5Index *p; /* New object */ + + *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); + if( !p ) return SQLITE_NOMEM; + + memset(p, 0, sizeof(Fts5Index)); + p->pConfig = pConfig; + p->pgsz = 1000; + p->nMinMerge = FTS5_MIN_MERGE; + p->nWorkUnit = FTS5_WORK_UNIT; + p->nMaxPendingData = 1024*1024; + p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); + if( p->zDataTbl==0 ){ + rc = SQLITE_NOMEM; + }else if( bCreate ){ + int i; + Fts5Structure s; + rc = sqlite3Fts5CreateTable( + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", pzErr + ); + if( rc==SQLITE_OK ){ + memset(&s, 0, sizeof(Fts5Structure)); + for(i=0; inPrefix+1; i++){ + fts5StructureWrite(p, i, &s); + } + rc = p->rc; + } + } + + if( rc ){ + sqlite3Fts5IndexClose(p, 0); + *pp = 0; + } + return rc; +} + +/* +** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). +*/ +int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ + int rc = SQLITE_OK; + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "data"); + } + assert( p->pReader==0 ); + sqlite3_finalize(p->pWriter); + sqlite3_finalize(p->pDeleter); + sqlite3_free(p->aHash); + sqlite3_free(p->zDataTbl); + sqlite3_free(p); + return rc; +} + +/* +** Return a simple checksum value based on the arguments. +*/ +static u64 fts5IndexEntryCksum( + i64 iRowid, + int iCol, + int iPos, + const char *pTerm, + int nTerm +){ + int i; + u64 ret = iRowid; + ret += (ret<<3) + iCol; + ret += (ret<<3) + iPos; + for(i=0; inPrefix; iIdx++){ + int n = ((iIdx==pConfig->nPrefix) ? nTerm : pConfig->aPrefix[iIdx]); + if( n<=nTerm ){ + ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, n); + } + } + + return ret; +} + +static void fts5BtreeIterInit( + Fts5Index *p, + int iIdx, + Fts5StructureSegment *pSeg, + Fts5BtreeIter *pIter +){ + int nByte; + int i; + nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1); + memset(pIter, 0, sizeof(*pIter)); + pIter->nLvl = pSeg->nHeight-1; + pIter->iIdx = iIdx; + pIter->p = p; + pIter->pSeg = pSeg; + if( nByte && p->rc==SQLITE_OK ){ + pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte); + } + for(i=0; p->rc==SQLITE_OK && inLvl; i++){ + i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1); + Fts5Data *pData; + pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); + if( pData ){ + fts5NodeIterInit(pData->n, pData->p, &pIter->aLvl[i].s); + } + } + + if( pIter->nLvl==0 || p->rc ){ + pIter->bEof = 1; + pIter->iLeaf = pSeg->pgnoLast; + }else{ + pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->iLeaf = pIter->aLvl[0].s.iChild; + } +} + +static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ + Fts5Index *p = pIter->p; + int i; + + assert( pIter->bEof==0 && pIter->aLvl[0].s.aData ); + for(i=0; inLvl && p->rc==SQLITE_OK; i++){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + fts5NodeIterNext(&p->rc, &pLvl->s); + if( pLvl->s.aData ){ + fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p); + break; + }else{ + fts5NodeIterFree(&pLvl->s); + fts5DataRelease(pLvl->pData); + pLvl->pData = 0; + } + } + if( i==pIter->nLvl || p->rc ){ + pIter->bEof = 1; + }else{ + int iSegid = pIter->pSeg->iSegid; + for(i--; i>=0; i--){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + i64 iRowid = FTS5_SEGMENT_ROWID(pIter->iIdx,iSegid,i+1,pLvl[1].s.iChild); + pLvl->pData = fts5DataRead(p, iRowid); + if( pLvl->pData ){ + fts5NodeIterInit(pLvl->pData->n, pLvl->pData->p, &pLvl->s); + } + } + } + + pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->iLeaf = pIter->aLvl[0].s.iChild; + assert( p->rc==SQLITE_OK || pIter->bEof ); +} + +static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ + int i; + for(i=0; inLvl; i++){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + fts5NodeIterFree(&pLvl->s); + if( pLvl->pData ){ + fts5DataRelease(pLvl->pData); + pLvl->pData = 0; + } + } + sqlite3_free(pIter->aLvl); + fts5BufferFree(&pIter->term); +} + +static void fts5IndexIntegrityCheckSegment( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index that pSeg is a part of */ + Fts5StructureSegment *pSeg /* Segment to check internal consistency */ +){ + Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ + + /* Iterate through the b-tree hierarchy. */ + for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); + iter.bEof==0; + fts5BtreeIterNext(&iter) + ){ + i64 iRow; /* Rowid for this leaf */ + Fts5Data *pLeaf; /* Data for this leaf */ + int iOff; /* Offset of first term on leaf */ + int i; /* Used to iterate through empty leaves */ + + /* If the leaf in question has already been trimmed from the segment, + ** ignore this b-tree entry. Otherwise, load it into memory. */ + if( iter.iLeafpgnoFirst ) continue; + iRow = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, iter.iLeaf); + pLeaf = fts5DataRead(p, iRow); + if( pLeaf==0 ) break; + + /* Check that the leaf contains at least one term, and that it is equal + ** to or larger than the split-key in iter.term. */ + iOff = fts5GetU16(&pLeaf->p[2]); + if( iOff==0 ){ + p->rc = FTS5_CORRUPT; + }else{ + int nTerm; /* Size of term on leaf in bytes */ + int res; /* Comparison of term and split-key */ + iOff += getVarint32(&pLeaf->p[iOff], nTerm); + res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); + if( res==0 ) res = nTerm - iter.term.n; + if( res<0 ){ + p->rc = FTS5_CORRUPT; + } + } + fts5DataRelease(pLeaf); + if( p->rc ) break; + + /* Now check that the iter.nEmpty leaves following the current leaf + ** (a) exist and (b) contain no terms. */ + for(i=1; i<=iter.nEmpty; i++){ + pLeaf = fts5DataRead(p, iRow+i); + if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ + p->rc = FTS5_CORRUPT; + } + fts5DataRelease(pLeaf); + } + } + + if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ + p->rc = FTS5_CORRUPT; + } + + fts5BtreeIterFree(&iter); +} + +/* +** Run internal checks to ensure that the FTS index (a) is internally +** consistent and (b) contains entries for which the XOR of the checksums +** as calculated by fts5IndexEntryCksum() is cksum. +** +** Return SQLITE_CORRUPT if any of the internal checks fail, or if the +** checksum does not match. Return SQLITE_OK if all checks pass without +** error, or some other SQLite error code if another error (e.g. OOM) +** occurs. +*/ +int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ + Fts5Config *pConfig = p->pConfig; + int iIdx; /* Used to iterate through indexes */ + int rc; /* Return code */ + u64 cksum2 = 0; /* Checksum based on contents of indexes */ + + /* Check that the checksum of the index matches the argument checksum */ + for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ + Fts5MultiSegIter *pIter; + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + for(fts5MultiIterNew(p, pStruct, iIdx, -1, 0, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter) + ){ + Fts5PosIter sPos; /* Used to iterate through position list */ + int n; /* Size of term in bytes */ + i64 iRowid = fts5MultiIterRowid(pIter); + char *z = (char*)fts5MultiIterTerm(pIter, &n); + + for(fts5PosIterInit(p, pIter, &sPos); + fts5PosIterEof(p, &sPos)==0; + fts5PosIterNext(p, &sPos) + ){ + cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); +#if 0 + fprintf(stdout, "rowid=%d ", (int)iRowid); + fprintf(stdout, "term=%.*s ", n, z); + fprintf(stdout, "col=%d ", sPos.iCol); + fprintf(stdout, "off=%d\n", sPos.iPos); + fflush(stdout); +#endif + } + } + fts5MultiIterFree(p, pIter); + fts5StructureRelease(pStruct); + } + rc = p->rc; + if( rc==SQLITE_OK && cksum!=cksum2 ) rc = FTS5_CORRUPT; + + /* Check that the internal nodes of each segment match the leaves */ + for(iIdx=0; rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + if( pStruct ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); + } + } + } + fts5StructureRelease(pStruct); + rc = p->rc; + } + + return rc; +} + +/* +*/ +static void fts5DecodeStructure( + int *pRc, /* IN/OUT: error code */ + Fts5Buffer *pBuf, + const u8 *pBlob, int nBlob +){ + int rc; /* Return code */ + int iLvl, iSeg; /* Iterate through levels, segments */ + Fts5Structure *p = 0; /* Decoded structure object */ + + rc = fts5StructureDecode(pBlob, nBlob, &p); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; + fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge); + for(iSeg=0; iSegnSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; + fts5BufferAppendPrintf(pRc, pBuf, + " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, + pSeg->pgnoFirst, pSeg->pgnoLast + ); + } + fts5BufferAppendPrintf(pRc, pBuf, "}"); + } + + fts5StructureRelease(p); +} + +/* +** Decode a segment-data rowid from the %_data table. This function is +** the opposite of macro FTS5_SEGMENT_ROWID(). +*/ +static void fts5DecodeRowid( + i64 iRowid, /* Rowid from %_data table */ + int *piIdx, /* OUT: Index */ + int *piSegid, /* OUT: Segment id */ + int *piHeight, /* OUT: Height */ + int *piPgno /* OUT: Page number */ +){ + *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); + iRowid >>= FTS5_DATA_PAGE_B; + + *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); + iRowid >>= FTS5_DATA_HEIGHT_B; + + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); + iRowid >>= FTS5_DATA_ID_B; + + *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); +} + +/* +** Buffer (a/n) is assumed to contain a list of serialized varints. Read +** each varint and append its string representation to buffer pBuf. Return +** after either the input buffer is exhausted or a 0 value is read. +** +** The return value is the number of bytes read from the input buffer. +*/ +static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ + int iOff = 0; + while( iOffpgsz = pgsz; +} + diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c new file mode 100644 index 0000000000..76cd2e1da7 --- /dev/null +++ b/ext/fts5/fts5_storage.c @@ -0,0 +1,411 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#include "fts5Int.h" + +struct Fts5Storage { + Fts5Config *pConfig; + Fts5Index *pIndex; + + sqlite3_stmt *aStmt[7]; +}; + +#define FTS5_STMT_INSERT_CONTENT 0 +#define FTS5_STMT_REPLACE_CONTENT 1 + +#define FTS5_STMT_DELETE_CONTENT 2 +#define FTS5_STMT_INSERT_DOCSIZE 3 +#define FTS5_STMT_DELETE_DOCSIZE 4 + +#define FTS5_STMT_SCAN_CONTENT 5 +#define FTS5_STMT_SEEK_CONTENT 6 + +/* +** Prepare the two insert statements - Fts5Storage.pInsertContent and +** Fts5Storage.pInsertDocsize - if they have not already been prepared. +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +static int fts5StorageGetStmt( + Fts5Storage *p, /* Storage handle */ + int eStmt, /* FTS5_STMT_XXX constant */ + sqlite3_stmt **ppStmt /* OUT: Prepared statement handle */ +){ + int rc = SQLITE_OK; + + assert( eStmt>=0 && eStmtaStmt) ); + if( p->aStmt[eStmt]==0 ){ + const char *azStmt[] = { + "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ + "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ + "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ + "INSERT INTO %Q.'%q_docsize' VALUES(?,?)", /* INSERT_DOCSIZE */ + "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ + "SELECT * FROM %Q.'%q_content'", /* SCAN_CONTENT */ + "SELECT * FROM %Q.'%q_content' WHERE rowid=?", /* SEEK_CONTENT */ + }; + Fts5Config *pConfig = p->pConfig; + char *zSql = 0; + + if( eStmt==FTS5_STMT_INSERT_CONTENT || eStmt==FTS5_STMT_REPLACE_CONTENT ){ + int nCol = pConfig->nCol + 1; + char *zBind; + int i; + + zBind = sqlite3_malloc(1 + nCol*2); + if( zBind ){ + for(i=0; izDb,pConfig->zName,zBind); + sqlite3_free(zBind); + } + }else{ + zSql = sqlite3_mprintf(azStmt[eStmt], pConfig->zDb, pConfig->zName); + } + + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->aStmt[eStmt], 0); + sqlite3_free(zSql); + } + } + + *ppStmt = p->aStmt[eStmt]; + return rc; +} + +/* +** Drop the shadow table with the postfix zPost (e.g. "content"). Return +** SQLITE_OK if successful or an SQLite error code otherwise. +*/ +int sqlite3Fts5DropTable(Fts5Config *pConfig, const char *zPost){ + int rc; + char *zSql = sqlite3_mprintf("DROP TABLE IF EXISTS %Q.'%q_%q'", + pConfig->zDb, pConfig->zName, zPost + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_exec(pConfig->db, zSql, 0, 0, 0); + sqlite3_free(zSql); + } + return rc; +} + +/* +** Create the shadow table named zPost, with definition zDefn. Return +** SQLITE_OK if successful, or an SQLite error code otherwise. +*/ +int sqlite3Fts5CreateTable( + Fts5Config *pConfig, /* FTS5 configuration */ + const char *zPost, /* Shadow table to create (e.g. "content") */ + const char *zDefn, /* Columns etc. for shadow table */ + char **pzErr /* OUT: Error message */ +){ + int rc; + char *zSql = sqlite3_mprintf("CREATE TABLE %Q.'%q_%q'(%s)", + pConfig->zDb, pConfig->zName, zPost, zDefn + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + char *zErr = 0; + assert( *pzErr==0 ); + rc = sqlite3_exec(pConfig->db, zSql, 0, 0, &zErr); + if( zErr ){ + *pzErr = sqlite3_mprintf( + "fts5: error creating shadow table %q_%s: %s", + pConfig->zName, zPost, zErr + ); + sqlite3_free(zErr); + } + sqlite3_free(zSql); + } + return rc; +} + +/* +** Open a new Fts5Index handle. If the bCreate argument is true, create +** and initialize the underlying tables +** +** If successful, set *pp to point to the new object and return SQLITE_OK. +** Otherwise, set *pp to NULL and return an SQLite error code. +*/ +int sqlite3Fts5StorageOpen( + Fts5Config *pConfig, + Fts5Index *pIndex, + int bCreate, + Fts5Storage **pp, + char **pzErr /* OUT: Error message */ +){ + int rc; + Fts5Storage *p; /* New object */ + + *pp = p = (Fts5Storage*)sqlite3_malloc(sizeof(Fts5Storage)); + if( !p ) return SQLITE_NOMEM; + + memset(p, 0, sizeof(Fts5Storage)); + p->pConfig = pConfig; + p->pIndex = pIndex; + + if( bCreate ){ + int i; + char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); + if( zDefn==0 ){ + rc = SQLITE_NOMEM; + }else{ + int iOff = sprintf(zDefn, "id INTEGER PRIMARY KEY"); + for(i=0; inCol; i++){ + iOff += sprintf(&zDefn[iOff], ", c%d", i); + } + rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, pzErr); + } + sqlite3_free(zDefn); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5CreateTable( + pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", pzErr + ); + } + } + + if( rc ){ + sqlite3Fts5StorageClose(p, 0); + *pp = 0; + } + return rc; +} + +/* +** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). +*/ +int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy){ + int rc = SQLITE_OK; + int i; + + /* Finalize all SQL statements */ + for(i=0; iaStmt); i++){ + sqlite3_finalize(p->aStmt[i]); + } + + /* If required, remove the shadow tables from the database */ + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "content"); + if( rc==SQLITE_OK ) sqlite3Fts5DropTable(p->pConfig, "docsize"); + } + + sqlite3_free(p); + return rc; +} + +/* +** Remove a row from the FTS table. +*/ +int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ + assert( !"do this" ); + return SQLITE_OK; +} + +typedef struct Fts5InsertCtx Fts5InsertCtx; +struct Fts5InsertCtx { + Fts5Storage *pStorage; + int iCol; +}; + +/* +** Tokenization callback used when inserting tokens into the FTS index. +*/ +static int fts5StorageInsertCallback( + void *pContext, /* Pointer to Fts5InsertCtx object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; + Fts5Index *pIdx = pCtx->pStorage->pIndex; + sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); + return SQLITE_OK; +} + +/* +** If a row with rowid iDel is present in the %_content table, add the +** delete-markers to the FTS index necessary to delete it. Do not actually +** remove the %_content row at this time though. +*/ +static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ + Fts5Config *pConfig = p->pConfig; + sqlite3_stmt *pSeek; /* SELECT to read row iDel from %_data */ + int rc; /* Return code */ + + rc = fts5StorageGetStmt(p, FTS5_STMT_SEEK_CONTENT, &pSeek); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int64(pSeek, 1, iDel); + if( sqlite3_step(pSeek)==SQLITE_ROW ){ + int iCol; + Fts5InsertCtx ctx; + ctx.pStorage = p; + ctx.iCol = -1; + sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); + for(iCol=1; iCol<=pConfig->nCol; iCol++){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_column_text(pSeek, iCol), + sqlite3_column_bytes(pSeek, iCol), + (void*)&ctx, + fts5StorageInsertCallback + ); + } + } + rc2 = sqlite3_reset(pSeek); + if( rc==SQLITE_OK ) rc = rc2; + } + + return rc; +} + +/* +** Insert a new row into the FTS table. +*/ +int sqlite3Fts5StorageInsert( + Fts5Storage *p, /* Storage module to write to */ + sqlite3_value **apVal, /* Array of values passed to xUpdate() */ + int eConflict, /* on conflict clause */ + i64 *piRowid /* OUT: rowid of new record */ +){ + Fts5Config *pConfig = p->pConfig; + int rc = SQLITE_OK; /* Return code */ + sqlite3_stmt *pInsert; /* Statement used to write %_content table */ + int eStmt; /* Type of statement used on %_content */ + int i; /* Counter variable */ + Fts5InsertCtx ctx; /* Tokenization callback context object */ + + /* Insert the new row into the %_content table. */ + if( eConflict==SQLITE_REPLACE ){ + eStmt = FTS5_STMT_REPLACE_CONTENT; + if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ + rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); + } + }else{ + eStmt = FTS5_STMT_INSERT_CONTENT; + } + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, eStmt, &pInsert); + } + for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ + rc = sqlite3_bind_value(pInsert, i, apVal[i]); + } + if( rc==SQLITE_OK ){ + sqlite3_step(pInsert); + rc = sqlite3_reset(pInsert); + } + *piRowid = sqlite3_last_insert_rowid(pConfig->db); + + /* Add new entries to the FTS index */ + sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); + ctx.pStorage = p; + for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), + sqlite3_value_bytes(apVal[ctx.iCol+2]), + (void*)&ctx, + fts5StorageInsertCallback + ); + } + + return rc; +} + +/* +** Context object used by sqlite3Fts5StorageIntegrity(). +*/ +typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; +struct Fts5IntegrityCtx { + i64 iRowid; + int iCol; + u64 cksum; + Fts5Config *pConfig; +}; + +/* +** Tokenization callback used by integrity check. +*/ +static int fts5StorageIntegrityCallback( + void *pContext, /* Pointer to Fts5InsertCtx object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; + pCtx->cksum ^= sqlite3Fts5IndexCksum( + pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken + ); + return SQLITE_OK; +} + +/* +** Check that the contents of the FTS index match that of the %_content +** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return +** some other SQLite error code if an error occurs while attempting to +** determine this. +*/ +int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ + Fts5Config *pConfig = p->pConfig; + int rc; /* Return code */ + Fts5IntegrityCtx ctx; + sqlite3_stmt *pScan; + + memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); + ctx.pConfig = p->pConfig; + + /* Generate the expected index checksum based on the contents of the + ** %_content table. This block stores the checksum in ctx.cksum. */ + rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_CONTENT, &pScan); + if( rc==SQLITE_OK ){ + int rc2; + while( SQLITE_ROW==sqlite3_step(pScan) ){ + int i; + ctx.iRowid = sqlite3_column_int64(pScan, 0); + for(i=0; rc==SQLITE_OK && inCol; i++){ + ctx.iCol = i; + rc = sqlite3Fts5Tokenize( + pConfig, + (const char*)sqlite3_column_text(pScan, i+1), + sqlite3_column_bytes(pScan, i+1), + (void*)&ctx, + fts5StorageIntegrityCallback + ); + } + } + rc2 = sqlite3_reset(pScan); + if( rc==SQLITE_OK ) rc = rc2; + } + + /* Pass the expected checksum down to the FTS index module. It will + ** verify, amongst other things, that it matches the checksum generated by + ** inspecting the index itself. */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum); + } + + return rc; +} + + diff --git a/main.mk b/main.mk index 30149eccab..cde67dbb2e 100644 --- a/main.mk +++ b/main.mk @@ -47,6 +47,7 @@ TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3 TCCX += -I$(TOP)/ext/async +TCCX += -I$(TOP)/ext/fts5 # Object files for the SQLite library. # @@ -71,6 +72,13 @@ LIBOBJ+= vdbe.o parse.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ vdbetrace.o wal.o walker.o where.o utf.o vtab.o +LIBOBJ += fts5.o +LIBOBJ += fts5_config.o +LIBOBJ += fts5_expr.o +LIBOBJ += fts5_index.o +LIBOBJ += fts5_storage.o +LIBOBJ += fts5parse.o + # All of the source code files. @@ -375,6 +383,8 @@ EXTHDR += \ $(TOP)/ext/rtree/rtree.h EXTHDR += \ $(TOP)/ext/icu/sqliteicu.h +EXTHDR += \ + $(TOP)/ext/fts5/fts5Int.h # This is the default Makefile target. The objects listed here # are what get build when you type just "make" with no arguments. @@ -553,10 +563,33 @@ fts3_unicode2.o: $(TOP)/ext/fts3/fts3_unicode2.c $(HDR) $(EXTHDR) fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c +fts5.o: $(TOP)/ext/fts5/fts5.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5.c + rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c +# FTS5 things +# +fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c + +fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c + +fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c + +fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c + +fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon + cp $(TOP)/ext/fts5/fts5parse.y . + rm -f fts5parse.h + ./lemon $(OPTS) fts5parse.y + + # Rules for building test programs and for running tests # tclsqlite3: $(TOP)/src/tclsqlite.c libsqlite3.a diff --git a/manifest b/manifest index fe8a9edf8e..fe4d032557 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sSQLITE_OMIT_WSD\sbuilds. -D 2014-06-23T10:18:50.447 +C Add\ssome\scode\sfor\san\sexperimental\sfts5\smodule.\sDoes\snot\swork\syet. +D 2014-06-23T11:33:22.754 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -79,7 +79,7 @@ F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d F ext/fts3/fts3.c 20bc65862cfcea0a39bb64a819f8fe92a8e144c1 -F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe +F ext/fts3/fts3.h 62a77d880cf06a2865052726f8325c8fabcecad7 F ext/fts3/fts3Int.h 16cddf2d7b0e5f3681615ae1d8ca0e45fca44918 F ext/fts3/fts3_aux.c 5c211e17a64885faeb16b9ba7772f9d5445c2365 F ext/fts3/fts3_expr.c 351395fad6fcb16ecfc61db0861008a70101330c @@ -103,6 +103,12 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 +F ext/fts5/fts5.c 2cb2cc3c1acefa36d9e8ce8e68bceaac8515059a +F ext/fts5/fts5Int.h cc41cf776a3e612aa3a461e96463647fd3957bed +F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef +F ext/fts5/fts5_expr.c bdfb98dab7729cf967022d7a4a815828bbad8c23 +F ext/fts5/fts5_index.c 0548e8925a0664cfa00b2477ebe9afa18bc7848f +F ext/fts5/fts5_storage.c aa1ff4b6b283303ffd8c5dc57a45ebe55e62a7b2 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -146,7 +152,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 7b1d0be0840f213405c977c87917241158126a33 +F main.mk 2bb1ec703ac4f27743961764b59cfb5f91d72bfe F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -189,7 +195,7 @@ F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c 0df0b1550b9cc1f58229644735e317ac89131f12 F src/lempar.c cdf0a000315332fc9b50b62f3b5e22e080a0952b F src/loadext.c 867c7b330b740c6c917af9956b13b81d0a048303 -F src/main.c 7c2c3cafdd6313c8f9319ebec1565782e624372e +F src/main.c e777879ad7c431f5b3b5d49c8419727b61d7c1be F src/malloc.c 0203ebce9152c6a0e5de520140b8ba65187350be F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c c0c990fcaddff810ea277b4fb5d9138603dd5d4b @@ -585,6 +591,8 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 +F test/fts5aa.test bbea71fed733b1d433bf83dbc8d86077936d1efc +F test/fts5ea.test 814287a2cb25ac3e59abbe4ccbcabf6bda821868 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -1179,7 +1187,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 612b6d1b1f74eaf618520b90811eca10f978fc71 -R 283ec0802d51fd4e82222d529f6a8475 +P 07dda49c1bf8997a18c3368acb81b6d863ea38d6 +R 66e5d0ccaa728e4d98b92edeb331ffb3 +T *branch * fts5 +T *sym-fts5 * +T -sym-trunk * U dan -Z de359222916ca6f6bd684ca986937509 +Z e3b7f827041011d2f1d78b39cdee11d7 diff --git a/manifest.uuid b/manifest.uuid index 1a8777f6b6..6f7226ff29 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -07dda49c1bf8997a18c3368acb81b6d863ea38d6 \ No newline at end of file +1e0648dcf283d4f1f6159db4d2433b6cc635992e \ No newline at end of file diff --git a/src/main.c b/src/main.c index 564c012472..5d894b2ea9 100644 --- a/src/main.c +++ b/src/main.c @@ -2609,6 +2609,7 @@ static int openDatabase( #ifdef SQLITE_ENABLE_FTS3 if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts3Init(db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5Init(db); } #endif diff --git a/test/fts5aa.test b/test/fts5aa.test new file mode 100644 index 0000000000..699d01d033 --- /dev/null +++ b/test/fts5aa.test @@ -0,0 +1,248 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5aa + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); + SELECT name, sql FROM sqlite_master; +} { + t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)} + t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)} + t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)} + t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)} +} + +do_execsql_test 1.1 { + DROP TABLE t1; + SELECT name, sql FROM sqlite_master; +} { +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); +} +do_execsql_test 2.1 { + INSERT INTO t1 VALUES('a b c', 'd e f'); +} +do_execsql_test 2.2 { + SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 +} { + {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} +} +do_execsql_test 2.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); +} +foreach {i x y} { + 1 {g f d b f} {h h e i a} + 2 {f i g j e} {i j c f f} + 3 {e e i f a} {e h f d f} + 4 {h j f j i} {h a c f j} + 5 {d b j c g} {f e i b e} + 6 {a j a e e} {j d f d e} + 7 {g i j c h} {j d h c a} + 8 {j j i d d} {e e d f b} + 9 {c j j d c} {h j i f g} + 10 {b f h i a} {c f b b j} +} { + do_execsql_test 3.$i.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 3.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + if {[set_test_counter errors]} break +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} +foreach {i x y} { + 1 {g f d b f} {h h e i a} + 2 {f i g j e} {i j c f f} + 3 {e e i f a} {e h f d f} + 4 {h j f j i} {h a c f j} + 5 {d b j c g} {f e i b e} + 6 {a j a e e} {j d f d e} + 7 {g i j c h} {j d h c a} + 8 {j j i d d} {e e d f b} + 9 {c j j d c} {h j i f g} + 10 {b f h i a} {c f b b j} +} { + do_execsql_test 4.$i.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 4.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + if {[set_test_counter errors]} break +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} +foreach {i x y} { + 1 {dd abc abc abc abcde} {aaa dd ddd ddd aab} + 2 {dd aab d aaa b} {abcde c aaa aaa aaa} + 3 {abcde dd b b dd} {abc abc d abc ddddd} + 4 {aaa abcde dddd dddd abcde} {abc b b abcde abc} + 5 {aab dddd d dddd c} {ddd abcde dddd abcde c} + 6 {ddd dd b aab abcde} {d ddddd dddd c abc} + 7 {d ddddd ddd c abcde} {c aab d abcde ddd} + 8 {abcde aaa aab c c} {ddd c dddd b aaa} + 9 {abcde aab ddddd c aab} {dddd dddd b c dd} + 10 {ddd abcde dddd dd c} {dddd c c d abcde} +} { + do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + if {[set_test_counter errors]} break +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +do_execsql_test 6.1 { + INSERT INTO t1(rowid, x, y) VALUES(22, 'a b c', 'c b a'); + REPLACE INTO t1(rowid, x, y) VALUES(22, 'd e f', 'f e d'); +} + +do_execsql_test 6.2 { + INSERT INTO t1(t1) VALUES('integrity-check') +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +proc doc {} { + set v [list aaa aab abc abcde b c d dd ddd dddd ddddd] + set ret [list] + for {set j 0} {$j < 20} {incr j} { + lappend ret [lindex $v [expr int(rand()*[llength $v])]] + } + return $ret +} + +proc dump_structure {} { + db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} { + foreach lvl [lrange $t 1 end] { + set seg [string repeat . [expr [llength $lvl]-2]] + puts "[lrange $lvl 0 1] $seg" + } + } +} + +for {set i 1} {$i <= 10} {incr i} { + do_test 7.$i { + for {set j 0} {$j < 100} {incr j} { + set x [doc] + set y [doc] + set z [doc] + set rowid [expr int(rand() * 100)] + execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } + } + execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } + } {} + if {[set_test_counter errors]} exit +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 8.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +do_execsql_test 8.1 { + INSERT INTO t1 VALUES('the quick brown fox'); + INSERT INTO t1(t1) VALUES('integrity-check'); +} + + +#finish_test + + +#------------------------------------------------------------------------- +# +reset_db + +expr srand(0) + +do_execsql_test 9.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3"); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +proc doc {} { + set v [list aaa aab abc abcde b c d dd ddd dddd ddddd] + set ret [list] + for {set j 0} {$j < 20} {incr j} { + lappend ret [lindex $v [expr int(rand()*[llength $v])]] + } + return $ret +} + +proc dump_structure {} { + db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} { + foreach lvl [lrange $t 1 end] { + set seg [string repeat . [expr [llength $lvl]-2]] + puts "[lrange $lvl 0 1] $seg" + } + } +} + +for {set i 1} {$i <= 10} {incr i} { + do_test 9.$i { + for {set j 0} {$j < 100} {incr j} { + set x [doc] + set y [doc] + set z [doc] + set rowid [expr int(rand() * 100)] + execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } + } + execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } + } {} + if {[set_test_counter errors]} break +} + +finish_test + diff --git a/test/fts5ea.test b/test/fts5ea.test new file mode 100644 index 0000000000..fdb28769cf --- /dev/null +++ b/test/fts5ea.test @@ -0,0 +1,84 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5ea + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +proc do_syntax_error_test {tn expr err} { + set ::se_expr $expr + do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err] +} + +proc do_syntax_test {tn expr res} { + set ::se_expr $expr + do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] +} + +foreach {tn expr res} { + 1 {abc} {"abc"} + 2 {abc def} {"abc" AND "def"} + 3 {abc*} {"abc" *} + 4 {"abc def ghi" *} {"abc" + "def" + "ghi" *} + 5 {one AND two} {"one" AND "two"} + 6 {one+two} {"one" + "two"} + 7 {one AND two OR three} {("one" AND "two") OR "three"} + 8 {one OR two AND three} {"one" OR ("two" AND "three")} + 9 {NEAR(one two)} {NEAR("one" "two", 10)} + 10 {NEAR("one three"* two, 5)} {NEAR("one" + "three" * "two", 5)} +} { + do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] +} + +foreach {tn expr res} { + 1 {c1:abc} + {c1 : "abc"} + 2 {c2 : NEAR(one two) c1:"hello world"} + {c2 : NEAR("one" "two", 10) AND c1 : "hello" + "world"} +} { + do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res] +} + +breakpoint +foreach {tn expr err} { + 1 {AND} {syntax error near "AND"} + 2 {abc def AND} {syntax error near ""} + 3 {abc OR AND} {syntax error near "AND"} + 4 {(a OR b) abc} {syntax error near "abc"} + 5 {NEaR (a b)} {syntax error near "NEaR"} + 6 {(a OR b) NOT c)} {syntax error near ")"} + 7 {nosuch: a nosuch2: b} {no such column: nosuch} + 8 {addr: a nosuch2: b} {no such column: nosuch2} +} { + do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] +} + + + +# do_syntax_error_test 1.0 {NOT} {syntax error near "NOT"} + + + +# do_catchsql_test 1.1 { + # SELECT fts5_expr('a OR b NOT c') +#} {0 {"a" OR "b" NOT "c"}} + + +#do_execsql_test 1.0 { SELECT fts5_expr('a') } {{"a"}} + +finish_test