sqlite3_free(p->zReadExprlist);
sqlite3_free(p->zWriteExprlist);
sqlite3_free(p->zContentTbl);
+ sqlite3_free(p->zLanguageid);
/* Invoke the tokenizer destructor to free the tokenizer. */
p->pTokenizer->pModule->xDestroy(p->pTokenizer);
int rc; /* Return code */
char *zSql; /* SQL statement passed to declare_vtab() */
char *zCols; /* List of user defined columns */
+ const char *zLanguageid;
+ zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
/* Create a list of user columns for the virtual table */
/* Create the whole "CREATE TABLE" statement to pass to SQLite */
zSql = sqlite3_mprintf(
- "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
+ "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)",
+ zCols, p->zName, zLanguageid
);
if( !zCols || !zSql ){
rc = SQLITE_NOMEM;
sqlite3 *db = p->db; /* The database connection */
if( p->zContentTbl==0 ){
+ const char *zLanguageid = p->zLanguageid;
char *zContentCols; /* Columns of %_content table */
/* Create a list of user columns for the content table */
char *z = p->azColumn[i];
zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
}
+ if( zLanguageid && zContentCols ){
+ zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
+ }
if( zContentCols==0 ) rc = SQLITE_NOMEM;
/* Create the content table */
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
}
+ if( p->zLanguageid ) fts3Appendf(pRc, &zRet, ",langid");
sqlite3_free(zFree);
}else{
fts3Appendf(pRc, &zRet, "rowid");
fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
}
}
- fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x",
+ fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x",
p->zDb,
(p->zContentTbl ? p->zContentTbl : p->zName),
(p->zContentTbl ? "" : "_content")
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
}
+ if( p->zLanguageid ){
+ fts3Appendf(pRc, &zRet, ", ?");
+ }
sqlite3_free(zFree);
return zRet;
}
char *zCompress = 0; /* compress=? parameter (or NULL) */
char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
char *zContent = 0; /* content=? parameter (or NULL) */
+ char *zLanguageid = 0; /* languageid=? parameter (or NULL) */
assert( strlen(argv[0])==4 );
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
{ "compress", 8 }, /* 2 -> COMPRESS */
{ "uncompress", 10 }, /* 3 -> UNCOMPRESS */
{ "order", 5 }, /* 4 -> ORDER */
- { "content", 7 } /* 5 -> CONTENT */
+ { "content", 7 }, /* 5 -> CONTENT */
+ { "languageid", 10 } /* 6 -> LANGUAGEID */
};
int iOpt;
bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
break;
- default: /* CONTENT */
- assert( iOpt==5 );
- sqlite3_free(zUncompress);
+ case 5: /* CONTENT */
+ sqlite3_free(zContent);
zContent = zVal;
zVal = 0;
break;
+
+ case 6: /* LANGUAGEID */
+ assert( iOpt==6 );
+ sqlite3_free(zLanguageid);
+ zLanguageid = zVal;
+ zVal = 0;
+ break;
}
}
sqlite3_free(zVal);
p->bHasStat = isFts4;
p->bDescIdx = bDescIdx;
p->zContentTbl = zContent;
+ p->zLanguageid = zLanguageid;
zContent = 0;
+ zLanguageid = 0;
TESTONLY( p->inTransaction = -1 );
TESTONLY( p->mxSavepoint = -1 );
sqlite3_free(zCompress);
sqlite3_free(zUncompress);
sqlite3_free(zContent);
+ sqlite3_free(zLanguageid);
sqlite3_free((void *)aCol);
if( rc!=SQLITE_OK ){
if( p ){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
+ int iLangidCons = -1; /* Index of langid=x constraint, if present */
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
if( pCons->usable==0 ) continue;
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
- if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
+ if( iCons<0
+ && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
){
pInfo->idxNum = FTS3_DOCID_SEARCH;
pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
pInfo->estimatedCost = 2.0;
iCons = i;
- break;
+ }
+
+ /* Equality constraint on the langid column */
+ if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
+ && pCons->iColumn==p->nColumn + 2
+ ){
+ iLangidCons = i;
}
}
pInfo->aConstraintUsage[iCons].argvIndex = 1;
pInfo->aConstraintUsage[iCons].omit = 1;
}
+ if( iLangidCons>=0 ){
+ pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
+ }
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
** docid) order. Both ascending and descending are possible.
*/
static int fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
+ int iLangid, /* Language id */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
if( rc==SQLITE_OK ){
- rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt);
+ rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
}
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
*/
int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
+ int iLangid,
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
return fts3SegReaderCursor(
- p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
+ p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
}
*/
static int fts3SegReaderCursorAddZero(
Fts3Table *p, /* FTS virtual table handle */
+ int iLangid,
const char *zTerm, /* Term to scan doclist of */
int nTerm, /* Number of bytes in zTerm */
Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
){
- return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
+ return fts3SegReaderCursor(p,
+ iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
+ );
}
/*
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm ){
bFound = 1;
- rc = sqlite3Fts3SegReaderCursor(
- p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr);
+ rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
+ i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
+ );
pSegcsr->bLookup = 1;
}
}
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm+1 ){
bFound = 1;
- rc = sqlite3Fts3SegReaderCursor(
- p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
+ rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
+ i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
);
if( rc==SQLITE_OK ){
- rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr);
+ rc = fts3SegReaderCursorAddZero(
+ p, pCsr->iLangid, zTerm, nTerm, pSegcsr
+ );
}
}
}
}
if( bFound==0 ){
- rc = sqlite3Fts3SegReaderCursor(
- p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
+ rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
+ 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
);
pSegcsr->bLookup = !isPrefix;
}
UNUSED_PARAMETER(nVal);
assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
- assert( nVal==0 || nVal==1 );
+ assert( nVal==0 || nVal==1 || nVal==2 );
assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
assert( p->pSegments==0 );
return rc;
}
+ pCsr->iLangid = 0;
+ if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
+
rc = sqlite3Fts3ReadLock(p);
if( rc!=SQLITE_OK ) return rc;
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
+**
+** If:
+**
+** (iCol < p->nColumn) -> The value of the iCol'th user column.
+** (iCol == p->nColumn) -> Magic column with the same name as the table.
+** (iCol == p->nColumn+1) -> Docid column
+** (iCol == p->nColumn+2) -> Langid column
*/
static int fts3ColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */
+ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
int rc = SQLITE_OK; /* Return Code */
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
/* The column value supplied by SQLite must be in range. */
- assert( iCol>=0 && iCol<=p->nColumn+1 );
+ assert( iCol>=0 && iCol<=p->nColumn+2 );
if( iCol==p->nColumn+1 ){
/* This call is a request for the "docid" column. Since "docid" is an
** alias for "rowid", use the xRowid() method to obtain the value.
*/
- sqlite3_result_int64(pContext, pCsr->iPrevId);
+ sqlite3_result_int64(pCtx, pCsr->iPrevId);
}else if( iCol==p->nColumn ){
/* The extra column whose name is the same as the table.
- ** Return a blob which is a pointer to the cursor.
- */
- sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
+ ** Return a blob which is a pointer to the cursor. */
+ sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
}else{
+ /* The requested column is either a user column (one that contains
+ ** indexed data), or the language-id column. */
rc = fts3CursorSeek(0, pCsr);
- if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
- sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
+
+ if( rc==SQLITE_OK ){
+ if( iCol==p->nColumn+2 ){
+ int iLangid = 0;
+ if( p->zLanguageid ){
+ iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
+ }
+ sqlite3_result_int(pCtx, iLangid);
+ }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
+ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
+ }
}
}
char **azColumn; /* column names. malloced */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
char *zContentTbl; /* content=xxx option, or NULL */
+ char *zLanguageid; /* languageid=xxx option, or NULL */
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
/* TODO: Fix the first paragraph of this comment.
**
- ** The following hash table is used to buffer pending index updates during
- ** transactions. Variable nPendingData estimates the memory size of the
- ** pending data, including hash table overhead, but not malloc overhead.
- ** When nPendingData exceeds nMaxPendingData, the buffer is flushed
- ** automatically. Variable iPrevDocid is the docid of the most recently
- ** inserted record.
+ ** The following array of hash tables is used to buffer pending index
+ ** updates during transactions. Variable nPendingData estimates the memory
+ ** size of the pending data, including hash table overhead, not including
+ ** malloc overhead. When nPendingData exceeds nMaxPendingData, the buffer
+ ** is flushed automatically. Variable iPrevDocid is the docid of the most
+ ** recently inserted record.
**
** A single FTS4 table may have multiple full-text indexes. For each index
** there is an entry in the aIndex[] array. Index 0 is an index of all the
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
+ int iPrevLangid; /* Langid of recently inserted document */
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
/* State variables used for validating that the transaction control
** methods of the virtual table are called at appropriate times. These
- ** values do not contribution to the FTS computation; they are used for
- ** verifying the SQLite core.
+ ** values do not contribute to FTS functionality; they are used for
+ ** verifying the operation of the SQLite core.
*/
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
Fts3Expr *pExpr; /* Parsed MATCH query string */
+ int iLangid; /* Language being queried for */
int nPhrase; /* Number of matchable phrases in query */
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
int sqlite3Fts3SegReaderPending(
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
-int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **);
+int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
-int sqlite3Fts3SegReaderCursor(
- Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *);
+int sqlite3Fts3SegReaderCursor(Fts3Table *,
+ int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
}
- rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL,
+ rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
- rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL,
+ rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
);
if( rc==SQLITE_OK ){
return rc;
}
+static sqlite3_int64 getAbsoluteLevel(
+ Fts3Table *p,
+ int iLangid,
+ int iIndex,
+ int iLevel
+){
+ assert( iLangid>=0 );
+ assert( p->nIndex>0 );
+ assert( iIndex>=0 && iIndex<p->nIndex );
+ return (iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL + iLevel;
+}
+
+
/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
*/
int sqlite3Fts3AllSegdirs(
Fts3Table *p, /* FTS3 table */
+ int iLangid, /* Language being queried */
int iIndex, /* Index for p->aIndex[] */
int iLevel, /* Level to select */
sqlite3_stmt **ppStmt /* OUT: Compiled statement */
/* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
- sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL-1);
+ sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+ sqlite3_bind_int(pStmt, 2,
+ getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+ );
}
}else{
/* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
-static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){
+static int fts3PendingTermsDocid(
+ Fts3Table *p, /* Full-text table handle */
+ int iLangid, /* Language id of row being written */
+ sqlite_int64 iDocid /* Docid of row being written */
+){
+ assert( iLangid>=0 );
+
/* TODO(shess) Explore whether partially flushing the buffer on
** forced-flush would provide better performance. I suspect that if
** we ordered the doclists by size and flushed the largest until the
** buffer was half empty, that would let the less frequent terms
** generate longer doclists.
*/
- if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){
+ if( iDocid<=p->iPrevDocid
+ || p->iPrevLangid!=iLangid
+ || p->nPendingData>p->nMaxPendingData
+ ){
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc!=SQLITE_OK ) return rc;
}
p->iPrevDocid = iDocid;
+ p->iPrevLangid = iLangid;
return SQLITE_OK;
}
** apVal[p->nColumn+1] Right-most user-defined column
** apVal[p->nColumn+2] Hidden column with same name as table
** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
+** apVal[p->nColumn+4] Hidden languageid column
*/
static int fts3InsertData(
Fts3Table *p, /* Full-text table */
** defined columns in the FTS3 table, plus one for the docid field.
*/
rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
- if( rc!=SQLITE_OK ){
- return rc;
+ if( rc==SQLITE_OK && p->zLanguageid ){
+ rc = sqlite3_bind_int(
+ pContentInsert, p->nColumn+2,
+ sqlite3_value_int(apVal[p->nColumn+4])
+ );
}
+ if( rc!=SQLITE_OK ) return rc;
/* There is a quirk here. The users INSERT statement may have specified
** a value for the "rowid" field, for the "docid" field, or for both.
return rc;
}
+/*
+**
+*/
+static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
+ int iLangid = 0;
+ if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
+ return iLangid;
+}
+
/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
if( rc==SQLITE_OK ){
if( SQLITE_ROW==sqlite3_step(pSelect) ){
int i;
- for(i=1; i<=p->nColumn; i++){
+ rc = fts3PendingTermsDocid(p,
+ langidFromSelect(p, pSelect),
+ sqlite3_column_int64(pSelect, 0)
+ );
+ for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
const char *zText = (const char *)sqlite3_column_text(pSelect, i);
rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
- if( rc!=SQLITE_OK ){
- sqlite3_reset(pSelect);
- *pRC = rc;
- return;
- }
aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
}
+ if( rc!=SQLITE_OK ){
+ sqlite3_reset(pSelect);
+ *pRC = rc;
+ return;
+ }
}
rc = sqlite3_reset(pSelect);
}else{
** Forward declaration to account for the circular dependency between
** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
*/
-static int fts3SegmentMerge(Fts3Table *, int, int);
+static int fts3SegmentMerge(Fts3Table *, int, int, int);
/*
** This function allocates a new level iLevel index in the segdir table.
*/
static int fts3AllocateSegdirIdx(
Fts3Table *p,
+ int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel,
int *piIdx
sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
int iNext = 0; /* Result of query pNextIdx */
+ assert( iLangid>=0 );
+ assert( p->nIndex>=1 );
+
/* Set variable iNext to the next available segdir index at level iLevel. */
rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pNextIdx, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
+ sqlite3_bind_int64(
+ pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
+ );
if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
iNext = sqlite3_column_int(pNextIdx, 0);
}
** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
*/
if( iNext>=FTS3_MERGE_COUNT ){
- rc = fts3SegmentMerge(p, iIndex, iLevel);
+ rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
*piIdx = 0;
}else{
*piIdx = iNext;
*/
static int fts3DeleteSegdir(
Fts3Table *p, /* Virtual table handle */
+ int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel, /* Level of %_segdir entries to delete */
Fts3SegReader **apSegment, /* Array of SegReader objects */
if( iLevel==FTS3_SEGCURSOR_ALL ){
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
- sqlite3_bind_int(pDelete, 2, (iIndex+1) * FTS3_SEGDIR_MAXLEVEL - 1);
+ sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+ sqlite3_bind_int(pDelete, 2,
+ getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+ );
}
}else{
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
+ sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
}
}
** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
** an SQLite error code is returned.
*/
-static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
+static int fts3SegmentMerge(
+ Fts3Table *p,
+ int iLangid, /* Language id to merge */
+ int iIndex, /* Index in p->aIndex[] to merge */
+ int iLevel /* Level to merge */
+){
int rc; /* Return code */
int iIdx = 0; /* Index of new segment */
int iNewLevel = 0; /* Level/index to create new segment at */
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
Fts3SegFilter filter; /* Segment term filter condition */
- Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
+ Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
assert( iLevel==FTS3_SEGCURSOR_ALL
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( iIndex>=0 && iIndex<p->nIndex );
- rc = sqlite3Fts3SegReaderCursor(p, iIndex, iLevel, 0, 0, 1, 0, &csr);
+ rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
if( iLevel==FTS3_SEGCURSOR_ALL ){
bIgnoreEmpty = 1;
}else if( iLevel==FTS3_SEGCURSOR_PENDING ){
- iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL;
- rc = fts3AllocateSegdirIdx(p, iIndex, 0, &iIdx);
+ iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
+ rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
- rc = fts3AllocateSegdirIdx(p, iIndex, iLevel+1, &iIdx);
- iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL + iLevel+1;
+ rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
+ iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
}
if( rc!=SQLITE_OK ) goto finished;
assert( csr.nSegment>0 );
- assert( iNewLevel>=(iIndex*FTS3_SEGDIR_MAXLEVEL) );
- assert( iNewLevel<((iIndex+1)*FTS3_SEGDIR_MAXLEVEL) );
+ assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
+ assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
memset(&filter, 0, sizeof(Fts3SegFilter));
filter.flags = FTS3_SEGMENT_REQUIRE_POS;
assert( pWriter );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
- rc = fts3DeleteSegdir(p, iIndex, iLevel, csr.apSegment, csr.nSegment);
+ rc = fts3DeleteSegdir(
+ p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
+ );
if( rc!=SQLITE_OK ) goto finished;
}
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
int rc = SQLITE_OK;
int i;
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
- rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_PENDING);
+ rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
sqlite3Fts3PendingTermsClear(p);
sqlite3_free(a);
}
+/*
+** Merge the entire database so that there is one segment for each
+** iIndex/iLangid combination.
+*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
int i;
int bSeenDone = 0;
int rc = SQLITE_OK;
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
- rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_ALL);
+ rc = fts3SegmentMerge(p, 0, i, FTS3_SEGCURSOR_ALL);
if( rc==SQLITE_DONE ){
bSeenDone = 1;
rc = SQLITE_OK;
while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
int iCol;
- rc = fts3PendingTermsDocid(p, sqlite3_column_int64(pStmt, 0));
+ rc = fts3PendingTermsDocid(p,
+ langidFromSelect(p, pStmt), sqlite3_column_int64(pStmt, 0)
+ );
aSz[p->nColumn] = 0;
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
rc = fts3DeleteAll(p, 1);
*pnDoc = *pnDoc - 1;
}else{
- sqlite3_int64 iRemove = sqlite3_value_int64(pRowid);
- rc = fts3PendingTermsDocid(p, iRemove);
fts3DeleteTerms(&rc, p, pRowid, aSzDel);
if( p->zContentTbl==0 ){
fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
/*
** This function does the work for the xUpdate method of FTS3 virtual
-** tables.
+** tables. The schema of the virtual table being:
+**
+** CREATE TABLE <table name>(
+** <user COLUMns>,
+** <table name> HIDDEN,
+** docid HIDDEN,
+** <langid> HIDDEN
+** );
+**
+**
*/
int sqlite3Fts3UpdateMethod(
sqlite3_vtab *pVtab, /* FTS3 vtab object */
int bInsertDone = 0;
assert( p->pSegments==0 );
+ assert(
+ nArg==1 /* DELETE operations */
+ || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */
+ );
/* Check for a "special" INSERT operation. One of the form:
**
goto update_out;
}
+ if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
+ rc = SQLITE_CONSTRAINT;
+ goto update_out;
+ }
+
/* Allocate space to hold the change in document sizes */
aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
if( aSzIns==0 ){
}
}
if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
- rc = fts3PendingTermsDocid(p, *pRowid);
+ rc = fts3PendingTermsDocid(p,
+ sqlite3_value_int(apVal[2 + p->nColumn + 2]),
+ *pRowid
+ );
}
if( rc==SQLITE_OK ){
assert( p->iPrevDocid==*pRowid );
-C Fix\sspurious\serrors\sthat\smay\soccur\sif\san\sempty\sdatabase\sis\sopened\sand\sthen\sinitialized\sas\sa\sWAL\sdatabase\sby\sa\ssecond\sconnection.
-D 2012-02-28T17:57:34.628
+C Add\sthe\s"languageid="\soption\sto\sfts4.\sThis\scode\sis\sstill\slargely\suntested\sand\salsmost\scertainly\sbuggy.
+D 2012-03-01T19:44:20.362
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c 4cf7b8e5bbb6667f5d7818fa0bf064fbbb72b086
+F ext/fts3/fts3.c 93a8eb6e6eb4cd0aa4856d841a9d8d0025a2784a
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
-F ext/fts3/fts3Int.h ce958a6fa92a95462853aa3acc0b69bcda39102f
-F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691
+F ext/fts3/fts3Int.h 8ba2d8ce5db6da67c5e5e7b8a0b90e6d80826546
+F ext/fts3/fts3_aux.c 72de4cb43db7bfc2f68fbda04b7d8095ae9a6239
F ext/fts3/fts3_expr.c f5df26bddf46a5916b2a5f80c4027996e92b7b15
F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c 6c8f395cdf9e1e3afa7fadb7e523dbbf381c6dfa
F ext/fts3/fts3_porter.c b7e5276f9f0a5fc7018b6fa55ce0f31f269ef881
F ext/fts3/fts3_snippet.c 1f9ee6a8e0e242649645968dcec4deb253d86c2a
-F ext/fts3/fts3_term.c a5457992723455a58804cb75c8cbd8978db5c2ef
+F ext/fts3/fts3_term.c d3466cf99432291be08e379d89645462431809d6
F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894
F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68
-F ext/fts3/fts3_write.c 1721187a4dec29ef9ae648ad8478da741085af18
+F ext/fts3/fts3_write.c 489d262b1ee9ab1dbb4da48bd8737fac15d0f58f
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659
F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68
F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f
+F test/fts4langid.test 3d968b7c0afb8be1609794267f34b89d378a81ea
F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
-P c267893a0813beb1764071409025e178318e1ca3
-R 751c4f9505bcae82ab2d498f42151225
+P 16330a2f7262173a32ae48a72c0ee2522b6dc554
+R 86036df8ba11902f17395620671e5794
+T *branch * fts4-languageid
+T *sym-fts4-languageid *
+T -sym-trunk *
U dan
-Z aacac9f6818b59f3fbe792ef77401913
+Z 6902c01b6e8a000d5e06f8fe8778490f
-16330a2f7262173a32ae48a72c0ee2522b6dc554
\ No newline at end of file
+bea257f70f10dd1111d79cabd1e1462dc651704d
\ No newline at end of file
--- /dev/null
+# 2012 March 01
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library. The
+# focus of this script is testing the languageid=xxx FTS4 option.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+set ::testprefix fts4content
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts3 {
+ finish_test
+ return
+}
+
+set ::testprefix fts4langid
+
+
+
+do_execsql_test 1.1 {
+ CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
+}
+
+do_execsql_test 1.2 {
+ SELECT sql FROM sqlite_master WHERE name = 't1_content';
+} {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}}
+
+do_execsql_test 1.3 {SELECT docid FROM t1} {}
+do_execsql_test 1.4 {SELECT lang_id FROM t1} {}
+
+do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')}
+do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0}
+
+do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)}
+do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4}
+
+do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')}
+do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0}
+
+do_execsql_test 1.11 {
+ CREATE VIRTUAL TABLE t2 USING fts4;
+ INSERT INTO t2 VALUES('abc');
+}
+do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1
+
+do_execsql_test 1.13 {
+ DROP TABLE t1;
+ CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
+ INSERT INTO t1(content) VALUES('a b c');
+ INSERT INTO t1(content, lang_id) VALUES('a b c', 1);
+}
+
+do_execsql_test 1.14 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'b';
+} {1}
+do_execsql_test 1.15 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0;
+} {1}
+
+do_execsql_test 1.16 {
+ SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1;
+} {2}
+
+do_catchsql_test 1.17 {
+ INSERT INTO t1(content, lang_id) VALUES('123', -1);
+} {1 {constraint failed}}
+
+finish_test
+