char *zContent; /* content table */
char *zContentRowid; /* "content_rowid=" option value */
int bColumnsize; /* "columnsize=" option value (dflt==1) */
+ int bOffsets; /* "offsets=" option value (dflt==1) */
char *zContentExprlist;
Fts5Tokenizer *pTok;
fts5_tokenizer *pTokApi;
/* Character set tests (like isspace(), isalpha() etc.) */
int sqlite3Fts5IsBareword(char t);
+
+/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
+typedef struct Fts5Termset Fts5Termset;
+int sqlite3Fts5TermsetNew(Fts5Termset**);
+int sqlite3Fts5TermsetAdd(Fts5Termset*, const char*, int, int *pbPresent);
+void sqlite3Fts5TermsetFree(Fts5Termset*);
+
/*
** End of interface to code in fts5_buffer.c.
**************************************************************************/
/*
** Create a hash table, free a hash table.
*/
-int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize);
+int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
void sqlite3Fts5HashFree(Fts5Hash*);
int sqlite3Fts5HashWrite(
}
+/*************************************************************************
+*/
+typedef struct Fts5TermsetEntry Fts5TermsetEntry;
+struct Fts5TermsetEntry {
+ char *pTerm;
+ int nTerm;
+ Fts5TermsetEntry *pNext;
+};
+
+struct Fts5Termset {
+ Fts5TermsetEntry *apHash[512];
+};
+
+int sqlite3Fts5TermsetNew(Fts5Termset **pp){
+ int rc = SQLITE_OK;
+ *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
+ return rc;
+}
+
+int sqlite3Fts5TermsetAdd(
+ Fts5Termset *p,
+ const char *pTerm, int nTerm,
+ int *pbPresent
+){
+ int rc = SQLITE_OK;
+ int i;
+ int hash = 13;
+ Fts5TermsetEntry *pEntry;
+
+ /* Calculate a hash value for this term */
+ for(i=0; i<nTerm; i++){
+ hash += (hash << 3) + (int)pTerm[i];
+ }
+ hash = hash % ArraySize(p->apHash);
+
+ *pbPresent = 0;
+ for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
+ if( pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){
+ *pbPresent = 1;
+ break;
+ }
+ }
+
+ if( pEntry==0 ){
+ pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
+ if( pEntry ){
+ pEntry->pTerm = (char*)&pEntry[1];
+ pEntry->nTerm = nTerm;
+ memcpy(pEntry->pTerm, pTerm, nTerm);
+ pEntry->pNext = p->apHash[hash];
+ p->apHash[hash] = pEntry;
+ }
+ }
+
+ return rc;
+}
+
+void sqlite3Fts5TermsetFree(Fts5Termset *p){
+ if( p ){
+ int i;
+ for(i=0; i<ArraySize(p->apHash); i++){
+ Fts5TermsetEntry *pEntry = p->apHash[i];
+ while( pEntry ){
+ Fts5TermsetEntry *pDel = pEntry;
+ pEntry = pEntry->pNext;
+ sqlite3_free(pDel);
+ }
+ }
+ sqlite3_free(p);
+ }
+}
+
+
+
*/
-
#include "fts5Int.h"
#define FTS5_DEFAULT_PAGE_SIZE 4050
return rc;
}
+ if( sqlite3_strnicmp("offsets", zCmd, nCmd)==0 ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
+ *pzErr = sqlite3_mprintf("malformed offsets=... directive");
+ rc = SQLITE_ERROR;
+ }else{
+ pConfig->bOffsets = (zArg[0]=='1');
+ }
+ return rc;
+ }
+
*pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
return SQLITE_ERROR;
}
pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
pRet->bColumnsize = 1;
+ pRet->bOffsets = 1;
#ifdef SQLITE_DEBUG
pRet->bPrefixIndex = 1;
#endif
struct Fts5Hash {
+ int bOffsets; /* Copy of Fts5Config.bOffsets */
int *pnByte; /* Pointer to bytes counter */
int nEntry; /* Number of entries currently in hash */
int nSlot; /* Size of aSlot[] array */
/*
** Allocate a new hash table.
*/
-int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
+int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){
int rc = SQLITE_OK;
Fts5Hash *pNew;
int nByte;
memset(pNew, 0, sizeof(Fts5Hash));
pNew->pnByte = pnByte;
+ pNew->bOffsets = pConfig->bOffsets;
pNew->nSlot = 1024;
nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
Fts5HashEntry *p;
u8 *pPtr;
int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
+ int bNew = pHash->bOffsets; /* If non-delete entry should be written */
/* Attempt to locate an existing hash entry */
iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
p->iSzPoslist = p->nData;
p->nData += 1;
p->iRowid = iRowid;
+ p->iCol = (pHash->bOffsets-1);
p->pHashNext = pHash->aSlot[iHash];
pHash->aSlot[iHash] = p;
pHash->nEntry++;
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
p->iSzPoslist = p->nData;
p->nData += 1;
- p->iCol = 0;
+ p->iCol = (pHash->bOffsets-1);
p->iPos = 0;
p->iRowid = iRowid;
+ bNew = 1;
}
if( iCol>=0 ){
/* Append a new column value, if necessary */
assert( iCol>=p->iCol );
if( iCol!=p->iCol ){
- pPtr[p->nData++] = 0x01;
- p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
- p->iCol = iCol;
- p->iPos = 0;
+ if( pHash->bOffsets==0 ){
+ bNew = 1;
+ p->iCol = iPos = iCol;
+ }else{
+ pPtr[p->nData++] = 0x01;
+ p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
+ p->iCol = iCol;
+ p->iPos = 0;
+ }
}
- /* Append the new position offset */
- p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
- p->iPos = iPos;
+ /* Append the new position offset, if necessary */
+ if( bNew ){
+ p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
+ p->iPos = iPos;
+ }
}else{
/* This is a delete. Set the delete flag. */
p->bDel = 1;
int eState; /* See above */
};
+typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
+struct PoslistOffsetsCtx {
+ Fts5Buffer *pBuf; /* Append to this buffer */
+ Fts5Colset *pColset; /* Restrict matches to this column */
+ int iRead;
+ int iWrite;
+};
+
/*
** TODO: Make this more efficient!
*/
return 0;
}
+static void fts5PoslistOffsetsCallback(
+ Fts5Index *p,
+ void *pContext,
+ const u8 *pChunk, int nChunk
+){
+ PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ int i = 0;
+ while( i<nChunk ){
+ int iVal;
+ i += fts5GetVarint32(&pChunk[i], iVal);
+ iVal += pCtx->iRead - 2;
+ pCtx->iRead = iVal;
+ if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
+ fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
+ pCtx->iWrite = iVal;
+ }
+ }
+ }
+}
+
static void fts5PoslistFilterCallback(
Fts5Index *p,
void *pContext,
if( pColset==0 ){
fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
}else{
- PoslistCallbackCtx sCtx;
- sCtx.pBuf = pBuf;
- sCtx.pColset = pColset;
- sCtx.eState = fts5IndexColsetTest(pColset, 0);
- assert( sCtx.eState==0 || sCtx.eState==1 );
- fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
+ if( p->pConfig->bOffsets==0 ){
+ PoslistOffsetsCtx sCtx;
+ memset(&sCtx, 0, sizeof(sCtx));
+ sCtx.pBuf = pBuf;
+ sCtx.pColset = pColset;
+ fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
+ }else{
+ PoslistCallbackCtx sCtx;
+ sCtx.pBuf = pBuf;
+ sCtx.pColset = pColset;
+ assert( sCtx.eState==0 || sCtx.eState==1 );
+ sCtx.eState = fts5IndexColsetTest(pColset, 0);
+ fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
+ }
}
}
}
/* Allocate the hash table if it has not already been allocated */
if( p->pHash==0 ){
- p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData);
+ p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
}
/* Flush the hash table to disk if required */
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
assert( pIter->pIndex->rc==SQLITE_OK );
*piRowid = pSeg->iRowid;
- if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
+ if( pIter->pIndex->pConfig->bOffsets
+ && pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf
+ ){
u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
if( pColset==0 || pIter->bFiltered ){
*pn = pSeg->nPos;
int iCol;
int szCol;
u64 cksum;
+ Fts5Termset *pTermset;
Fts5Config *pConfig;
};
** Tokenization callback used by integrity check.
*/
static int fts5StorageIntegrityCallback(
- void *pContext, /* Pointer to Fts5InsertCtx object */
+ void *pContext, /* Pointer to Fts5IntegrityCtx object */
int tflags,
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd /* End offset of token */
){
+ int rc = SQLITE_OK;
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
pCtx->szCol++;
}
- pCtx->cksum ^= sqlite3Fts5IndexCksum(
- pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
- );
- return SQLITE_OK;
+
+ if( pCtx->pTermset ){
+ int bPresent = 0;
+ rc = sqlite3Fts5TermsetAdd(pCtx->pTermset, pToken, nToken, &bPresent);
+ if( rc==SQLITE_OK && bPresent==0 ){
+ pCtx->cksum ^= sqlite3Fts5IndexCksum(
+ pCtx->pConfig, pCtx->iRowid, 0, pCtx->iCol, pToken, nToken
+ );
+ }
+ }else{
+ pCtx->cksum ^= sqlite3Fts5IndexCksum(
+ pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
+ );
+ }
+ return rc;
}
/*
if( pConfig->abUnindexed[i] ) continue;
ctx.iCol = i;
ctx.szCol = 0;
- rc = sqlite3Fts5Tokenize(pConfig,
- FTS5_TOKENIZE_DOCUMENT,
- (const char*)sqlite3_column_text(pScan, i+1),
- sqlite3_column_bytes(pScan, i+1),
- (void*)&ctx,
- fts5StorageIntegrityCallback
- );
- if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
+ if( pConfig->bOffsets==0 ){
+ rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ (const char*)sqlite3_column_text(pScan, i+1),
+ sqlite3_column_bytes(pScan, i+1),
+ (void*)&ctx,
+ fts5StorageIntegrityCallback
+ );
+ }
+ if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
rc = FTS5_CORRUPT;
}
aTotalSize[i] += ctx.szCol;
+ sqlite3Fts5TermsetFree(ctx.pTermset);
+ ctx.pTermset = 0;
}
if( rc!=SQLITE_OK ) break;
}
--- /dev/null
+# 2015 December 18
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library. The
+# focus of this script is testing the FTS5 module.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5offsets
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, offsets=0);
+
+ INSERT INTO t1 VALUES('h d g', 'j b b g b', 'i e i d h g g'); -- 1
+ INSERT INTO t1 VALUES('h j d', 'j h d a h', 'f d d g g f b'); -- 2
+ INSERT INTO t1 VALUES('j c i', 'f f h e f', 'c j i j c h f'); -- 3
+ INSERT INTO t1 VALUES('e g g', 'g e d h i', 'e d b e g d c'); -- 4
+ INSERT INTO t1 VALUES('b c c', 'd i h a f', 'd i j f a b c'); -- 5
+ INSERT INTO t1 VALUES('e d e', 'b c j g d', 'a i f d h b d'); -- 6
+ INSERT INTO t1 VALUES('g h e', 'b c d i d', 'e f c i f i c'); -- 7
+ INSERT INTO t1 VALUES('c f j', 'j j i e a', 'h a c f d h e'); -- 8
+ INSERT INTO t1 VALUES('a h i', 'c i a f a', 'c f d h g d g'); -- 9
+ INSERT INTO t1 VALUES('j g g', 'e f e f f', 'h j b i c g e'); -- 10
+}
+
+do_execsql_test 1.1 {
+ INSERT INTO t1(t1) VALUES('integrity-check');
+}
+
+foreach {tn match res} {
+ 1 "a:a" {9}
+ 2 "b:g" {1 4 6}
+ 3 "c:h" {1 3 6 8 9 10}
+} {
+ do_execsql_test 1.2.$tn.1 {
+ SELECT rowid FROM t1($match);
+ } $res
+
+ do_execsql_test 1.2.$tn.2 {
+ SELECT rowid FROM t1($match || '*');
+ } $res
+}
+
+finish_test
+
-C Fix\sthe\sspellfix1_scriptcode()\sfunction\sto\signore\swhitespace\sand\spunctuation,\nand\sto\srecognize\shebrew\sand\sarabic\sscripts.
-D 2015-12-17T14:18:21.904
+C Add\sthe\s"offsets=0"\soption\sto\sfts5,\sto\screate\sa\ssmaller\sindex\swithout\sterm\soffset\sinformation.\sA\sfew\sthings\sare\scurrently\sbroken\son\sthis\sbranch.
+D 2015-12-17T20:36:13.853
F Makefile.in 28bcd6149e050dff35d4dcfd97e890cd387a499d
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc 5fff077fcc46de7714ed6eebb6159a4c00eab751
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h 8b9a13b309b180e9fb88ea5666c0d8d73c6102d9
-F ext/fts5/fts5Int.h acf968e43d57b6b1caf7554d34ec35d6ed3b4fe8
+F ext/fts5/fts5Int.h 4e1bb66d8e607bf38e881eb455cdf36cc3fa9e42
F ext/fts5/fts5_aux.c 1f384972d606375b8fa078319f25ab4b5feb1b35
-F ext/fts5/fts5_buffer.c 1e49512a535045e621246dc7f4f65f3593fa0fc2
-F ext/fts5/fts5_config.c 0ee66188609a62342e9f9aeefa3c3e44518a4dd6
+F ext/fts5/fts5_buffer.c 389d377d04f6e622644c3343ab5e511f6646de36
+F ext/fts5/fts5_config.c ba5248a05c28ec6a6fdf2599a86e9fd67e5c61e2
F ext/fts5/fts5_expr.c 80075fa45091bad42100c4a5c4f2efc83e43e3af
-F ext/fts5/fts5_hash.c 25838d525e97f8662ff3504be94d0bad24f9a37e
-F ext/fts5/fts5_index.c 578f46697080f11a1e26cd45a1c039c043a3111d
+F ext/fts5/fts5_hash.c d4a6b52faca0134cc7bcc880f03a257a0dec2636
+F ext/fts5/fts5_index.c 53b3a8f1c9c1f6e5e896b6dc0a7ad26c2eea23a2
F ext/fts5/fts5_main.c ef04699949ab8e42d590ae30188afef7ad58776e
-F ext/fts5/fts5_storage.c 9ea3d92178743758b6c54d9fe8836bbbdcc92e3b
+F ext/fts5/fts5_storage.c 0dc37a6183e1061e255f23971198d8878159d4ef
F ext/fts5/fts5_tcl.c 3bf445e66de32137d4693694ff7b1fd6074e32bd
F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf
F ext/fts5/fts5_tokenize.c 618efe033bceb80c521b1e9ddfd9fee85fb5946e
F ext/fts5/test/fts5matchinfo.test 2163b0013e824bba65499da9e34ea4da41349cc2
F ext/fts5/test/fts5merge.test 8f3cdba2ec9c5e7e568246e81b700ad37f764367
F ext/fts5/test/fts5near.test b214cddb1c1f1bddf45c75af768f20145f7e71cc
+F ext/fts5/test/fts5offsets.test 09fc61d553ae4e985afc0146ec77f3439503fc6b
F ext/fts5/test/fts5onepass.test 7ed9608e258132cb8d55e7c479b08676ad68810c
F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5
F ext/fts5/test/fts5phrase.test f6d1d464da5beb25dc56277aa4f1d6102f0d9a2f
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 85ebd46c701e0a094a4690cd8f1d0cbae9aa257c
-R 70cdc580d7d9b92b032d33f5e0d50f2e
-U drh
-Z 6d4b5ed9cd1870281d7d851922f323d8
+P 7adfa4a5794e47f97491c08abeaaac90e826b331
+R a68b4412544bee6f6bc95a23674c55d4
+T *branch * fts5-offsets
+T *sym-fts5-offsets *
+T -sym-trunk *
+U dan
+Z a51f39853c3a5371cd0ec3358f50f2d7
-7adfa4a5794e47f97491c08abeaaac90e826b331
\ No newline at end of file
+40b5bbf02a824ca73b33aa4ae1c7d5f65b7cda10
\ No newline at end of file