From: dan Date: Tue, 8 Sep 2015 19:55:26 +0000 (+0000) Subject: Remove the 0x00 terminators from the end of doclists stored on disk. X-Git-Tag: version-3.9.0~122^2~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fe8e2eba0a64aa1234fecc4efa5a9c3f7efed202;p=thirdparty%2Fsqlite.git Remove the 0x00 terminators from the end of doclists stored on disk. FossilOrigin-Name: 00d990061dec3661b0376bd167082942d5563bfe --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 837ecb1ccd..c52ce67d70 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -117,6 +117,12 @@ typedef struct Fts5Config Fts5Config; ** bColumnsize: ** True if the %_docsize table is created. ** +** bPrefixIndex: +** This is only used for debugging. If set to false, any prefix indexes +** are ignored. This value is configured using: +** +** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); +** */ struct Fts5Config { sqlite3 *db; /* Database handle */ @@ -145,6 +151,10 @@ struct Fts5Config { /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; + +#ifdef SQLITE_DEBUG + int bPrefixIndex; /* True to use prefix-indexes */ +#endif }; /* Current expected value of %_config table 'version' field */ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 07e1243c36..1a7c0d0f8a 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -16,12 +16,14 @@ #include "fts5Int.h" int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ - /* A no-op if an error has already occurred */ - if( *pRc ) return 1; if( (pBuf->n + nByte) > pBuf->nSpace ){ u8 *pNew; int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; + + /* A no-op if an error has already occurred */ + if( *pRc ) return 1; + while( nNew<(pBuf->n + nByte) ){ nNew = nNew * 2; } diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 74faf6dd30..19e3d3ab1f 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -480,6 +480,9 @@ int sqlite3Fts5ConfigParse( pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; +#ifdef SQLITE_DEBUG + pRet->bPrefixIndex = 1; +#endif if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 87eb54b125..9bc08e7443 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -475,6 +475,9 @@ struct Fts5CResult { ** For each rowid on the page corresponding to the current term, the ** corresponding aRowidOffset[] entry is set to the byte offset of the ** start of the "position-list-size" field within the page. +** +** iTermIdx: +** Index of current term on iTermLeafPgno. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ @@ -489,6 +492,9 @@ struct Fts5SegIter { int iTermLeafPgno; int iTermLeafOffset; + int iTermIdx; + int iEndofDoclist; + /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ @@ -508,7 +514,7 @@ struct Fts5SegIter { ** leaf page. */ #define ASSERT_SZLEAF_OK(x) assert( \ - (x)->szLeaf==fts5GetU16(&(x)->p[2]) || (x)->szLeaf==(x)->nn \ + (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ ) #define FTS5_SEGITER_ONETERM 0x01 @@ -522,9 +528,12 @@ struct Fts5SegIter { */ #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) -#define fts5LeafFirstTermOff(x) (fts5GetU16(&(x)->p[(x)->szLeaf])) +#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) + #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) +#define fts5LeafFirstTermOff(x) fts5LeafTermOff(x, 0) + /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. @@ -1153,8 +1162,9 @@ static void fts5StructurePromote( int szPromote = 0; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ + int nSeg = pStruct->aLevel[iLvl].nSeg; - + if( nSeg==0 ) return; pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); @@ -1464,6 +1474,14 @@ static void fts5SegIterNextPage( }else{ pIter->pLeaf = 0; } + + if( pIter->pLeaf ){ + if( fts5LeafIsTermless(pIter->pLeaf) ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; + }else{ + pIter->iEndofDoclist = fts5LeafFirstTermOff(pIter->pLeaf); + } + } } /* @@ -1505,6 +1523,20 @@ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ } } +static void fts5SegIterLoadEod(Fts5Index *p, Fts5SegIter *pIter){ + Fts5Data *pLeaf = pIter->pLeaf; + int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; + + assert( pIter->iLeafPgno==pIter->iTermLeafPgno ); + if( (pIter->iTermIdx+1)szLeaf + (pIter->iTermIdx + 1) * 2; + pIter->iEndofDoclist = fts5GetU16(&pLeaf->p[iRead]); + }else{ + pIter->iEndofDoclist = pLeaf->nn+1; + } + +} + static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; @@ -1551,6 +1583,7 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; + fts5SegIterLoadEod(p, pIter); fts5SegIterLoadRowid(p, pIter); } @@ -1585,9 +1618,10 @@ static void fts5SegIterInit( } if( p->rc==SQLITE_OK ){ - u8 *a = pIter->pLeaf->p; pIter->iLeafOffset = 4; + assert_nc( pIter->pLeaf->nn>4 ); assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); + pIter->iTermIdx = 0; fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } @@ -1614,6 +1648,10 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; + if( n>pIter->iEndofDoclist ){ + n = pIter->iEndofDoclist; + } + ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; @@ -1624,7 +1662,6 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ i += nPos; if( i>=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); - if( iDelta==0 ) break; pIter->iRowid += iDelta; if( iRowidOffset>=pIter->nRowidOffset ){ @@ -1667,8 +1704,8 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ pIter->iLeafOffset = pIter->iTermLeafOffset; } }else{ - int iRowidOff, dummy; - fts5LeafHeader(pNew, &iRowidOff, &dummy); + int iRowidOff; + iRowidOff = fts5LeafFirstRowidOff(pNew); if( iRowidOff ){ pIter->pLeaf = pNew; pIter->iLeafOffset = iRowidOff; @@ -1686,6 +1723,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ } if( pIter->pLeaf ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; fts5SegIterReverseInitPage(p, pIter); } } @@ -1747,21 +1785,26 @@ static void fts5SegIterNext( iOff = pIter->iLeafOffset + pIter->nPos; if( iOffiLeafOffset = iOff; - if( iDelta==0 ){ + /* The next entry is on the current page. */ + assert_nc( iOff<=pIter->iEndofDoclist ); + if( iOff>=pIter->iEndofDoclist ){ bNewTerm = 1; - if( iOff>=n ){ - fts5SegIterNextPage(p, pIter); - pIter->iLeafOffset = 4; - }else if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ - pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); + if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ + pIter->iTermIdx++; + }else{ + pIter->iTermIdx = 0; + } + if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ + iOff += fts5GetVarint32(&a[iOff], nKeep); } }else{ + u64 iDelta; + iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); pIter->iRowid += iDelta; + assert_nc( iDelta>0 ); } + pIter->iLeafOffset = iOff; + }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm = 0; @@ -1777,6 +1820,7 @@ static void fts5SegIterNext( pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->nn = nList; pIter->pLeaf->szLeaf = nList; + pIter->iEndofDoclist = nList+1; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } @@ -1796,6 +1840,7 @@ static void fts5SegIterNext( iOff = fts5LeafFirstTermOff(pLeaf); pIter->iLeafOffset = iOff; bNewTerm = 1; + pIter->iTermIdx = 0; } if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; @@ -1847,31 +1892,11 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ ** byte of position-list content for the current rowid. Back it up ** so that it points to the start of the position-list size field. */ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); - iOff = pIter->iLeafOffset; - assert( iOff>=4 ); - - /* Search for a new term within the current leaf. If one can be found, - ** then this page contains the largest rowid for the current term. */ - while( iOffszLeaf ){ - int nPos; - i64 iDelta; - int bDummy; - - /* Read the position-list size field */ - iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); - iOff += nPos; - if( iOff>=pLeaf->szLeaf ) break; - - /* Rowid delta. Or, if 0x00, the end of doclist marker. */ - nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) break; - iOff += nPos; - } /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ - if( iOff>=pLeaf->szLeaf ){ + if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; @@ -1905,14 +1930,20 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ ** first rowid on this page. */ if( pLast ){ - int dummy; int iOff; fts5DataRelease(pIter->pLeaf); pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; - fts5LeafHeader(pLast, &iOff, &dummy); + iOff = fts5LeafFirstRowidOff(pLast); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; + + if( fts5LeafIsTermless(pLast) ){ + pIter->iEndofDoclist = pLast->nn+1; + }else{ + pIter->iEndofDoclist = fts5LeafTermOff(pLast, 0); + } + } fts5SegIterReverseInitPage(p, pIter); @@ -1935,30 +1966,20 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ - if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ - int iOff = pIter->iLeafOffset + pIter->nPos; - while( iOffszLeaf ){ - int bDummy; - int nPos; - i64 iDelta; - - /* iOff is currently the offset of the start of position list data */ - iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) return; - assert_nc( iOffszLeaf ); - iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); - iOff += nPos; - } + if( pIter->iTermLeafPgno==pIter->iLeafPgno + && pIter->iEndofDoclistszLeaf + ){ + return; } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } #define fts5IndexGetVarint32(a, iOff, nVal) { \ - nVal = a[iOff++]; \ + nVal = (a)[iOff++]; \ if( nVal & 0x80 ){ \ iOff--; \ - iOff += fts5GetVarint32(&a[iOff], nVal); \ + iOff += fts5GetVarint32(&(a)[iOff], nVal); \ } \ } @@ -2037,41 +2058,12 @@ static void fts5LeafSeek( } iOff += nNew; -#if 0 - /* Skip past the doclist. If the end of the page is reached, bail out. */ - while( 1 ){ - int nPos; - - /* Skip past rowid delta */ - fts5IndexSkipVarint(a, iOff); - - /* Skip past position list */ - fts5IndexGetVarint32(a, iOff, nPos); - iOff += (nPos >> 1); - if( iOff>=(n-1) ){ - iOff = n; - goto search_failed; - } - - /* If this is the end of the doclist, break out of the loop */ - if( a[iOff]==0x00 ){ - iOff++; - break; - } - }; - - iTerm++; - assert( iTerm=nPgTerm ){ iOff = n; break; } iOff = fts5GetU16(&a[n + iTerm*2]); -#endif /* Read the nKeep field of the next term. */ fts5IndexGetVarint32(a, iOff, nKeep); @@ -2084,6 +2076,7 @@ static void fts5LeafSeek( return; }else if( iOff>=n ){ do { + iTerm = 0; fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ) return; a = pIter->pLeaf->p; @@ -2101,6 +2094,8 @@ static void fts5LeafSeek( } search_success: + pIter->iTermIdx = iTerm; + pIter->iLeafOffset = iOff + nNew; pIter->iTermLeafOffset = pIter->iLeafOffset; pIter->iTermLeafPgno = pIter->iLeafPgno; @@ -2108,6 +2103,7 @@ static void fts5LeafSeek( fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); + fts5SegIterLoadEod(p, pIter); fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } @@ -2243,6 +2239,7 @@ static void fts5SegIterHashInit( pLeaf->nn = pLeaf->szLeaf = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); + pIter->iEndofDoclist = pLeaf->nn+1; if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; @@ -2770,6 +2767,7 @@ static void fts5MultiIterNew2( if( pData->szLeaf>0 ){ pIter->pLeaf = pData; pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); + pIter->iEndofDoclist = pData->nn; pNew->aFirst[1].iFirst = 1; if( bDesc ){ pNew->bRev = 1; @@ -3201,25 +3199,23 @@ static void fts5WriteAppendTerm( Fts5PageWriter *pPage = &pWriter->writer; Fts5Buffer *pPgidx = &pWriter->writer.pgidx; - assert( pPage->buf.n==0 || pPage->buf.n>4 ); - if( pPage->buf.n==0 ){ - /* Zero the first term and first rowid fields */ - static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; - fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); - assert( pWriter->bFirstTermInPage ); - } if( p->rc ) return; + assert( pPage->buf.n>=4 ); + assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); + + /* If the current leaf page is full, flush it to disk. */ + if( (pPage->buf.n + pPage->pgidx.n + nTerm + 2)>=p->pConfig->pgsz ){ + if( pPage->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); + } - /* TODO1: Can this be consolidated with FlushOneHash version? */ + /* TODO1: Updating pgidx here. */ fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); pPgidx->n += 2; if( pWriter->bFirstTermInPage ){ - /* Update the "first term" field of the page header. */ -#if 0 - assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); - fts5PutU16(&pPage->buf.p[2], pPage->buf.n); -#endif nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in @@ -3261,11 +3257,6 @@ static void fts5WriteAppendTerm( assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); pWriter->aDlidx[0].pgno = pPage->pgno; - - /* If the current leaf page is full, flush it to disk. */ - if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - } } /* @@ -3280,6 +3271,10 @@ static void fts5WriteAppendRowid( if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->writer; + if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + } + /* If this is to be the first rowid written to the page, set the ** rowid-pointer in the page-header. Also append a value to the dlidx ** buffer, in case a doclist-index is required. */ @@ -3300,10 +3295,6 @@ static void fts5WriteAppendRowid( pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); - - if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - } } } @@ -3383,6 +3374,8 @@ static void fts5WriteInit( Fts5SegWriter *pWriter, int iSegid ){ + const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; + memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; @@ -3391,7 +3384,10 @@ static void fts5WriteInit( pWriter->bFirstTermInPage = 1; pWriter->iBtPage = 1; - fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, p->pConfig->pgsz + 20); + /* Grow the two buffers to pgsz + padding bytes in size. */ + fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer); + fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer); + if( p->pIdxWriter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( @@ -3401,6 +3397,13 @@ static void fts5WriteInit( } if( p->rc==SQLITE_OK ){ + /* Initialize the 4-byte leaf-page header to 0x00. */ + memset(pWriter->writer.buf.p, 0, 4); + pWriter->writer.buf.n = 4; + + /* Bind the current output segment id to the index-writer. This is an + ** optimization over binding the same value over and over as rows are + ** inserted into %_idx by the current writer. */ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); } } @@ -3414,38 +3417,39 @@ static void fts5WriteInit( ** This function appends a page-index to the buffer. The buffer is ** guaranteed to be large enough to fit the page-index. */ -static void fts5MakePageidx(Fts5Index *p, Fts5Buffer *pBuf){ +static void fts5MakePageidx( + Fts5Index *p, /* Fts index object to store any error in */ + Fts5Data *pOld, /* Original page data */ + int iOldidx, /* Index of term in pOld pgidx */ + Fts5Buffer *pBuf /* Buffer containing new page (no pgidx) */ +){ if( p->rc==SQLITE_OK ){ - u8 *a = pBuf->p; - int szLeaf = pBuf->n; - int iOff = 4; - int nTerm; + int iOff; + int iEnd; + int nByte; + int iEndOld; /* Byte after term iOldIdx on old page */ + int iEndNew; /* Byte after term iOldIdx on new page */ + int ii; + int nPgIdx = (pOld->nn - pOld->szLeaf) / 2; + + /* Determine end of term on old page */ + iEndOld = fts5LeafTermOff(pOld, iOldidx); + if( iOldidx>0 ){ + iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte); + } + iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte); + iEndOld += nByte; + + /* Determine end of term on new page */ + iEndNew = 4 + fts5GetVarint32(&pBuf->p[4], nByte); + iEndNew += nByte; - fts5PutU16(&pBuf->p[pBuf->n], iOff); + fts5PutU16(&pBuf->p[pBuf->n], 4); pBuf->n += 2; - fts5IndexGetVarint32(a, iOff, nTerm); - iOff += nTerm; - - while( iOff=szLeaf ) break; - - /* Skip past position list */ - fts5IndexGetVarint32(a, iOff, nTerm); - iOff += (nTerm >> 1); - - if( iOff>=(szLeaf-2) ) break; - - /* If this is the end of the doclist, break out of the loop */ - if( a[iOff]==0x00 ){ - iOff++; - fts5PutU16(&pBuf->p[pBuf->n], iOff); - pBuf->n += 2; - fts5IndexGetVarint32(a, iOff, nTerm); - fts5IndexGetVarint32(a, iOff, nTerm); - iOff += nTerm; - } + for(ii=iOldidx+1; iip[pBuf->n], iVal + (iEndNew - iEndOld)); + pBuf->n += 2; } } } @@ -3491,7 +3495,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ } /* Set up the new page-index array */ - fts5MakePageidx(p, &buf); + fts5MakePageidx(p, pData, pSeg->iTermIdx, &buf); fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; @@ -3595,8 +3599,9 @@ static void fts5IndexMergeLevel( } /* This is a new term. Append a term to the output segment. */ + /* TODO2: Doclist 0x00 term */ if( bRequireDoclistTerm ){ - fts5WriteAppendZerobyte(p, &writer); + /* fts5WriteAppendZerobyte(p, &writer); */ } fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); @@ -3739,6 +3744,7 @@ static void fts5IndexCrisismerge( assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, &pStruct, iLvl, 0); + assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } @@ -3766,10 +3772,12 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret; u32 dummy; ret = fts5GetVarint32(aBuf, dummy); - while( 1 ){ - int i = fts5GetVarint32(&aBuf[ret], dummy); - if( (ret + i) > nMax ) break; - ret += i; + if( ret nMax ) break; + ret += i; + } } return ret; } @@ -3810,72 +3818,32 @@ static void fts5FlushOneHash(Fts5Index *p){ Fts5SegWriter writer; fts5WriteInit(p, &writer, iSegid); - /* Pre-allocate the buffer used to assemble leaf pages to the target - ** page size. */ - assert( pgsz>0 ); pBuf = &writer.writer.buf; pPgidx = &writer.writer.pgidx; - fts5BufferGrow(&p->rc, pBuf, pgsz + 20); + + /* fts5WriteInit() should have initialized the buffers to (most likely) + ** the maximum space required. */ + assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); + assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); /* Begin scanning through hash table entries. This loop runs once for each ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ - memset(pBuf->p, 0, 4); - pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; /* Buffer containing term */ - int nTerm; /* Size of zTerm in bytes */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ + /* Write the term for this entry to disk. */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); - nTerm = strlen(zTerm); - - /* Decide if the term will fit on the current leaf. If it will not, - ** flush the leaf to disk here. - ** TODO1: Is this calculation still correct? */ - if( pBuf->n>4 && (pBuf->n + nTerm + 2 + pPgidx->n + 2) > pgsz ){ - fts5WriteFlushLeaf(p, &writer); - if( (nTerm + 32) > pBuf->nSpace ){ - fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); - if( p->rc ) break; - } - } - - /* Write the term to the leaf. And if it is the first on the leaf, and - ** the leaf is not page number 1, push it up into the b-tree hierarchy - ** as well. */ - - /* TODO1: Writing pgidx here! */ - fts5PutU16(&pPgidx->p[pPgidx->n], pBuf->n); - pPgidx->n += 2; - if( writer.bFirstTermInPage==0 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); - pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre); - nSuffix = nTerm - nPre; - }else{ - writer.bFirstTermInPage = 0; - if( writer.writer.pgno!=1 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); - fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); - pBuf = &writer.writer.buf; - assert( nPren += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix); - fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); - - /* We just wrote a term into page writer.aWriter[0].pgno. If a - ** doclist-index is to be generated for this doclist, it will be - ** associated with this page. */ - assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); - writer.aDlidx[0].pgno = writer.writer.pgno; + fts5WriteAppendTerm(p, &writer, strlen(zTerm), zTerm); - if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ + if( writer.bFirstRowidInPage==0 + && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) + ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ @@ -3883,7 +3851,7 @@ static void fts5FlushOneHash(Fts5Index *p){ i64 iDelta = 0; int iOff = 0; - writer.bFirstRowidInPage = 0; + /* writer.bFirstRowidInPage = 0; */ /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current @@ -3938,7 +3906,8 @@ static void fts5FlushOneHash(Fts5Index *p){ } } - pBuf->p[pBuf->n++] = '\0'; + /* TODO2: Doclist terminator written here. */ + /* pBuf->p[pBuf->n++] = '\0'; */ assert( pBuf->n<=pBuf->nSpace ); zPrev = (const u8*)zTerm; sqlite3Fts5HashScanNext(pHash); @@ -4527,7 +4496,7 @@ int sqlite3Fts5IndexQuery( ** evaluate the prefix query using the main FTS index. This is used ** for internal sanity checking by the integrity-check in debug ** mode only. */ - if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else @@ -4971,6 +4940,48 @@ static void fts5IndexIntegrityCheckEmpty( } } +static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ + int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; + int ii; + Fts5Buffer buf1 = {0,0,0}; + Fts5Buffer buf2 = {0,0,0}; + + for(ii=0; p->rc==SQLITE_OK && ii=pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else if( ii==0 ){ + int nByte; + iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); + if( (iOff+nByte)>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else{ + fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); + } + }else{ + int nKeep, nByte; + iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); + iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); + if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else{ + buf1.n = nKeep; + fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); + } + + if( p->rc==SQLITE_OK ){ + res = fts5BufferCompare(&buf1, &buf2); + if( res<=0 ) p->rc = FTS5_CORRUPT; + } + } + fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); + } + + fts5BufferFree(&buf1); + fts5BufferFree(&buf2); +} + static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ @@ -5027,6 +5038,8 @@ static void fts5IndexIntegrityCheckSegment( if( res==0 ) res = nTerm - nIdxTerm; if( res<0 ) p->rc = FTS5_CORRUPT; } + + fts5IntegrityCheckPgidx(p, pLeaf); } fts5DataRelease(pLeaf); if( p->rc ) break; @@ -5418,51 +5431,56 @@ static void fts5DecodeFunction( int szLeaf = 0; int iRowidOff = 0; int iOff; - int nKeep = 0; + int nPgTerm = 0; + int nDoclist; + int i; memset(&term, 0, sizeof(Fts5Buffer)); - if( n>=4 ){ + if( n<4 ){ + sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); + goto decode_out; + }else{ iRowidOff = fts5GetU16(&a[0]); szLeaf = fts5GetU16(&a[2]); - if( szLeaf0 ){ + iOff += fts5GetVarint32(&a[iOff], nByte); + term.n = nByte; + } iOff += fts5GetVarint32(&a[iOff], nByte); - term.n= nKeep; fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; - sqlite3Fts5BufferAppendPrintf( &rc, &s, " term=%.*s", term.n, (const char*)term.p ); - iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], szLeaf-iOff); - if( iOffpStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); +#ifdef SQLITE_TEST + }else if( 0==sqlite3_stricmp("prefix-index", z) ){ + pConfig->bPrefixIndex = sqlite3_value_int(pVal); +#endif }else{ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); if( rc==SQLITE_OK ){ diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index e20893dcf7..ae445f740d 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -203,11 +203,6 @@ for {set i 1} {$i <= 10} {incr i} { if {[set_test_counter errors]} break } -#db eval { SELECT fts5_decode(rowid, block) as x FROM t1_data } { puts $x } -#puts [db eval {SELECT rowid FROM t1 WHERE t1 MATCH 'aaa' ORDER BY rowid ASC}] -#puts [db eval {SELECT rowid FROM t1 WHERE t1 MATCH 'aaa' ORDER BY rowid DESC}] -#exit - #------------------------------------------------------------------------- # reset_db diff --git a/ext/fts5/test/fts5simple.test b/ext/fts5/test/fts5simple.test index c146090f3f..eeac11b5ca 100644 --- a/ext/fts5/test/fts5simple.test +++ b/ext/fts5/test/fts5simple.test @@ -11,7 +11,7 @@ # source [file join [file dirname [info script]] fts5_common.tcl] -set testprefix fts5aa +set testprefix fts5simple # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { @@ -50,7 +50,6 @@ do_execsql_test 2.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} -} #------------------------------------------------------------------------- # @@ -63,12 +62,58 @@ do_execsql_test 3.0 { SELECT * FROM t1 WHERE t1 MATCH 'o*'; } {one} -breakpoint do_execsql_test 3.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} -# db eval { SELECT fts5_decode(rowid, block) as x FROM t1_data } { puts $x } +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 4.1 { + CREATE VIRTUAL TABLE t11 USING fts5(content); + INSERT INTO t11(t11, rank) VALUES('pgsz', 32); + INSERT INTO t11 VALUES('another'); + INSERT INTO t11 VALUES('string'); + INSERT INTO t11 VALUES('of'); + INSERT INTO t11 VALUES('text'); +} +do_test 4.2 { + execsql { INSERT INTO t11(t11) VALUES('optimize') } +} {} +do_execsql_test 4.3 { + INSERT INTO t11(t11) VALUES('integrity-check'); +} {} + +#db eval { SELECT fts5_decode(rowid, block) as x FROM t11_data } { puts $x } + +#------------------------------------------------------------------------- +reset_db +set doc [string repeat "x y " 5] +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE yy USING fts5(content); + INSERT INTO yy(yy, rank) VALUES('pgsz', 32); + BEGIN; + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + COMMIT; +} + +do_execsql_test 5.2 { + SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid ASC +} {1 2 3 4 5 6 7 8} + +do_execsql_test 5.3 { + SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC +} {8 7 6 5 4 3 2 1} + +#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x } finish_test diff --git a/main.mk b/main.mk index d07e473be6..a10199f3d1 100644 --- a/main.mk +++ b/main.mk @@ -332,7 +332,9 @@ TESTSRC += \ $(TOP)/ext/misc/vfslog.c \ $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/fts5/fts5_test_mi.c \ - fts5.c + $(FTS5_SRC) + +# fts5.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c diff --git a/manifest b/manifest index f47bf05938..36d5700540 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Use\smacros\sto\smake\sthe\scode\sin\sfts5_index.c\seasier\sto\sread. -D 2015-09-07T08:14:30.857 +C Remove\sthe\s0x00\sterminators\sfrom\sthe\send\sof\sdoclists\sstored\son\sdisk. +D 2015-09-08T19:55:26.385 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h f04659e0df5af83731b102189a32280f74f4a6bc -F ext/fts5/fts5Int.h f65d41f66accad0a289d6bd66b13c07d2932f9be +F ext/fts5/fts5Int.h 7e6002582133cb795a21468732cf7c35027a28e4 F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e -F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015 -F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384 +F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4 +F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 -F ext/fts5/fts5_index.c 213e5aea27100a2ebb7a576d6574bcc28c594520 -F ext/fts5/fts5_main.c e9d0892424bb7f0a8b58613d4ff75cb650cf286e +F ext/fts5/fts5_index.c 677c859b2064e00903a9ad847a1cfca8d36ce595 +F ext/fts5/fts5_main.c 3ec19f23693e034028afb9b4fa7c800dc3eda5ab F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf @@ -124,7 +124,7 @@ F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl b6e6a40ef5d069c8e86ca4fbad491e1195485dbc -F ext/fts5/test/fts5aa.test 1ac5a3bd88406183b00ea7cb4701bd58e5c3c7ff +F ext/fts5/test/fts5aa.test fb84c53e90dc4f9b4dc485858b53e70b67d6f064 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c F ext/fts5/test/fts5ad.test e3dfb150fce971b4fd832498c29f56924d451b63 @@ -173,7 +173,7 @@ F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1 F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17 F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460 -F ext/fts5/test/fts5simple.test f520b360c40a5a61aabebead53e1e5f68683e5a3 +F ext/fts5/test/fts5simple.test 40b76f77d56524a882473664c792bd4e8f2f2a0a F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671 F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89 F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841 @@ -261,7 +261,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 61821e43596648bfacce2d6283377bee35986131 +F main.mk e67b73755e1f28c2d18d7953ca75c901c0be617b F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1384,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a1f4c3b543eed84e808f6b901a38179786fffe16 -R d9f9c87817152716ded47406f74ba235 +P 67ff5ae81357eb7fa28049bb724a22cb6f52e076 +R 7e6723f8162dfa594f88012bb5737a33 U dan -Z 4c8df884c6ae97a50d0e8662a6ca4b0e +Z 0d6a020581263053b4291f892521bd3c diff --git a/manifest.uuid b/manifest.uuid index a4473a78af..55934271fe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -67ff5ae81357eb7fa28049bb724a22cb6f52e076 \ No newline at end of file +00d990061dec3661b0376bd167082942d5563bfe \ No newline at end of file