From: dan Date: Wed, 13 May 2015 17:15:32 +0000 (+0000) Subject: Change fts5 doclist-index structures to be trees instead of flat lists. This only... X-Git-Tag: version-3.8.11~114^2~45 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=23d538885eb5b4db3056f0cce47004248bf1949f;p=thirdparty%2Fsqlite.git Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token. FossilOrigin-Name: aa34bf666c384cf32a8d8166ab6d9afbca26a256 --- diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 77274eda16..e519635149 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -17,6 +17,14 @@ #include "fts5Int.h" +/* +** This variable is set to true when running corruption tests. Otherwise +** false. If it is false, extra assert() conditions in the fts5 code are +** activated - conditions that are only true if it is guaranteed that the +** fts5 database is not corrupt. +*/ +int sqlite3_fts5_may_be_corrupt = 0; + typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index afef22679f..3c84c9ed2a 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -44,7 +44,7 @@ int sqlite3Fts5Corrupt(void); ** is used for assert() conditions that are true only if it can be ** guranteed that the database is not corrupt. */ -#ifdef SQLITE_TEST +#ifdef SQLITE_DEBUG extern int sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) #else @@ -115,7 +115,7 @@ struct Fts5Config { }; /* Current expected value of %_config table 'version' field */ -#define FTS5_CURRENT_VERSION 1 +#define FTS5_CURRENT_VERSION 2 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index cd15c71b56..6df3774ebb 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -202,15 +202,33 @@ ** ** 5. Segment doclist indexes: ** -** A list of varints. If the first termless page contains at least one -** docid, the list begins with that docid as a varint followed by the -** value 1 (0x01). Or, if the first termless page contains no docids, -** a varint containing the last docid stored on the term page followed -** by a 0 (0x00) value. +** Doclist indexes are themselves b-trees, however they usually consist of +** a single leaf record only. The format of each doclist index leaf page +** is: +** +** * Flags byte. Bits are: +** 0x01: Clear if leaf is also the root page, otherwise set. +** +** * Page number of fts index leaf page. As a varint. +** +** * First docid on page indicated by previous field. As a varint. +** +** * A list of varints, one for each subsequent termless page. A +** positive delta if the termless page contains at least one docid, +** or an 0x00 byte otherwise. +** +** Internal doclist index nodes are: +** +** * Flags byte. Bits are: +** 0x01: Clear for root page, otherwise set. +** +** * Page number of first child page. As a varint. +** +** * Copy of first docid on page indicated by previous field. As a varint. +** +** * A list of delta-encoded varints - the first docid on each subsequent +** child page. ** -** For each subsequent page in the doclist, either a 0x00 byte if the -** page contains no terms, or a delta-encoded docid (always +ve) -** representing the first docid on the page otherwise. */ /* @@ -240,33 +258,42 @@ ** SQLITE_FULL and fails the current operation if they ever prove too small. */ #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ +#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ #define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */ #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ -#define FTS5_SEGMENT_ROWID(segid, height, pgno) ( \ - ((i64)(segid) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ +#define fts5_dri(segid, dlidx, height, pgno) ( \ + ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ + ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ ((i64)(pgno)) \ ) +#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno) +#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) + +#if 0 /* ** The height of segment b-trees is actually limited to one less than ** (1<iLeafPgno is the page number the -** doclist is associated with (the one featuring the term). +** Advance the iterator passed as the only argument. If the end of the +** doclist-index page is reached, return non-zero. */ -static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ - Fts5Data *pData = pIter->pData; - int i; - int bPresent; - - assert( pIter->pData ); - assert( pIter->iLeafPgno>0 ); +static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ + Fts5Data *pData = pLvl->pData; - /* Read the first rowid value. And the "present" flag that follows it. */ - pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid); - bPresent = pData->p[pIter->iOff++]; - if( bPresent ){ - i = 0; + if( pLvl->iOff==0 ){ + assert( pLvl->bEof==0 ); + pLvl->iOff = 1; + pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); + pLvl->iOff += getVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); + pLvl->iFirstOff = pLvl->iOff; }else{ - /* Count the number of leading 0x00 bytes. */ - for(i=1; pIter->iOffn; i++){ - if( pData->p[pIter->iOff] ) break; - pIter->iOff++; + int iOff; + for(iOff=pLvl->iOff; iOffn; iOff++){ + if( pData->p[iOff] ) break; } - /* Unless we are already at the end of the doclist-index, load the first - ** rowid value. */ - if( pIter->iOffn ){ + if( iOffn ){ i64 iVal; - pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal); - pIter->iRowid += iVal; + pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; + iOff += getVarint(&pData->p[iOff], (u64*)&iVal); + pLvl->iRowid += iVal; + pLvl->iOff = iOff; }else{ - pIter->bEof = 1; + pLvl->bEof = 1; } } - pIter->iLeafPgno += (i+1); - pIter->iFirstOff = pIter->iOff; - return pIter->bEof; + return pLvl->bEof; } /* ** Advance the iterator passed as the only argument. */ -static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ - Fts5Data *pData = pIter->pData; - int iOff; - - for(iOff=pIter->iOff; iOffn; iOff++){ - if( pData->p[iOff] ) break; +static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; + + assert( iLvlnLvl ); + if( fts5DlidxLvlNext(pLvl) ){ + if( (iLvl+1) < pIter->nLvl ){ + fts5DlidxIterNextR(p, pIter, iLvl+1); + if( pLvl[1].bEof==0 ){ + fts5DataRelease(pLvl->pData); + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) + ); + if( pLvl->pData ) fts5DlidxLvlNext(pLvl); + } + } } - if( iOffn ){ - i64 iVal; - pIter->iLeafPgno += (iOff - pIter->iOff) + 1; - iOff += getVarint(&pData->p[iOff], (u64*)&iVal); - pIter->iRowid += iVal; - pIter->iOff = iOff; - }else{ - pIter->bEof = 1; - } + return pIter->aLvl[0].bEof; +} +static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ + return fts5DlidxIterNextR(p, pIter, 0); +} - return pIter->bEof; +/* +** The iterator passed as the first argument has the following fields set +** as follows. This function sets up the rest of the iterator so that it +** points to the first rowid in the doclist-index. +** +** pData: +** pointer to doclist-index record, +** +** When this function is called pIter->iLeafPgno is the page number the +** doclist is associated with (the one featuring the term). +*/ +static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ + int i; + for(i=0; inLvl; i++){ + fts5DlidxLvlNext(&pIter->aLvl[i]); + } + return pIter->aLvl[0].bEof; } + static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ - return pIter->bEof; + return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; } -static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ - if( fts5DlidxIterFirst(pIter)==0 ){ - while( 0==fts5DlidxIterNext(pIter) ); - pIter->bEof = 0; +static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ + int i; + + /* Advance each level to the last entry on the last page */ + for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; + while( fts5DlidxLvlNext(pLvl)==0 ); + pLvl->bEof = 0; + + if( i>0 ){ + Fts5DlidxLvl *pChild = &pLvl[-1]; + fts5DataRelease(pChild->pData); + memset(pChild, 0, sizeof(Fts5DlidxLvl)); + pChild->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) + ); + } } } -static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ - int iOff = pIter->iOff; +/* +** Move the iterator passed as the only argument to the previous entry. +*/ +static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ + int iOff = pLvl->iOff; - assert( pIter->bEof==0 ); - if( iOff<=pIter->iFirstOff ){ - pIter->bEof = 1; + assert( pLvl->bEof==0 ); + if( iOff<=pLvl->iFirstOff ){ + pLvl->bEof = 1; }else{ - u8 *a = pIter->pData->p; + u8 *a = pLvl->pData->p; i64 iVal; int iLimit; + int ii; + int nZero = 0; /* Currently iOff points to the first byte of a varint. This block ** decrements iOff until it points to the first byte of the previous @@ -1510,20 +1578,70 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ } getVarint(&a[iOff], (u64*)&iVal); - pIter->iRowid -= iVal; - pIter->iLeafPgno--; + pLvl->iRowid -= iVal; + pLvl->iLeafPgno--; - /* Skip backwards passed any 0x00 bytes. */ - while( iOff>pIter->iFirstOff - && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 - ){ - iOff--; - pIter->iLeafPgno--; + /* Skip backwards past any 0x00 varints. */ + for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ + nZero++; + } + if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ + /* The byte immediately before the last 0x00 byte has the 0x80 bit + ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 + ** bytes before a[ii]. */ + int bZero = 0; /* True if last 0x00 counts */ + if( (ii-8)>=pLvl->iFirstOff ){ + int j; + for(j=1; j<=8 && (a[ii-j] & 0x80); j++); + bZero = (j>8); + } + if( bZero==0 ) nZero--; + } + pLvl->iLeafPgno -= nZero; + pLvl->iOff = iOff - nZero; + } + + return pLvl->bEof; +} + +static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; + + assert( iLvlnLvl ); + if( fts5DlidxLvlPrev(pLvl) ){ + if( (iLvl+1) < pIter->nLvl ){ + fts5DlidxIterPrevR(p, pIter, iLvl+1); + if( pLvl[1].bEof==0 ){ + fts5DataRelease(pLvl->pData); + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) + ); + if( pLvl->pData ){ + while( fts5DlidxLvlNext(pLvl)==0 ); + pLvl->bEof = 0; + } + } } - pIter->iOff = iOff; } - return pIter->bEof; + return pIter->aLvl[0].bEof; +} +static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ + return fts5DlidxIterPrevR(p, pIter, 0); +} + +/* +** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). +*/ +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ + if( pIter ){ + int i; + for(i=0; inLvl; i++){ + fts5DataRelease(pIter->aLvl[i].pData); + } + sqlite3_free(pIter); + } } static Fts5DlidxIter *fts5DlidxIterInit( @@ -1532,35 +1650,52 @@ static Fts5DlidxIter *fts5DlidxIterInit( int iSegid, /* Segment id */ int iLeafPg /* Leaf page number to load dlidx for */ ){ - Fts5DlidxIter *pIter; + Fts5DlidxIter *pIter = 0; + int i; + int bDone = 0; - pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); - if( pIter==0 ) return 0; + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ + int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); + Fts5DlidxIter *pNew; - pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iSegid, iLeafPg)); - if( pIter->pData==0 ){ - sqlite3_free(pIter); - pIter = 0; - }else{ - pIter->iLeafPgno = iLeafPg; + pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); + if( pNew==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); + Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; + pIter = pNew; + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, iRowid); + if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ + bDone = 1; + } + pIter->nLvl = i+1; + } + } + + if( p->rc==SQLITE_OK ){ + pIter->iSegid = iSegid; if( bRev==0 ){ fts5DlidxIterFirst(pIter); }else{ - fts5DlidxIterLast(pIter); + fts5DlidxIterLast(p, pIter); } } + if( p->rc!=SQLITE_OK ){ + fts5DlidxIterFree(pIter); + pIter = 0; + } + return pIter; } -/* -** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). -*/ -static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ - if( pIter ){ - fts5DataRelease(pIter->pData); - sqlite3_free(pIter); - } +static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ + return pIter->aLvl[0].iRowid; +} +static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ + return pIter->aLvl[0].iLeafPgno; } static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ @@ -1940,7 +2075,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ ** contains no entries except those on the current page. */ if( fts5DlidxIterEof(p, pDlidx)==0 ){ int iSegid = pIter->pSeg->iSegid; - pgnoLast = pDlidx->iLeafPgno; + pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); }else{ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); @@ -2346,7 +2481,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. -** It is an error if leaf iLeafPgno contains no rowid. +** It is an error if leaf iLeafPgno does not exist or contains no rowids. */ static void fts5SegIterGotoPage( Fts5Index *p, /* FTS5 backend object */ @@ -2354,22 +2489,26 @@ static void fts5SegIterGotoPage( int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); - pIter->iLeafPgno = iLeafPgno-1; - fts5SegIterNextPage(p, pIter); - assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); + if( iLeafPgno>pIter->pSeg->pgnoLast ){ + p->rc = FTS5_CORRUPT; + }else{ + pIter->iLeafPgno = iLeafPgno-1; + fts5SegIterNextPage(p, pIter); + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); - if( p->rc==SQLITE_OK ){ - int iOff; - u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->n; + if( p->rc==SQLITE_OK ){ + int iOff; + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; - iOff = fts5GetU16(&a[0]); - if( iOff<4 || iOff>=n ){ - p->rc = FTS5_CORRUPT; - }else{ - iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); - pIter->iLeafOffset = iOff; - fts5SegIterLoadNPos(p, pIter); + iOff = fts5GetU16(&a[0]); + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + fts5SegIterLoadNPos(p, pIter); + } } } } @@ -2394,21 +2533,21 @@ static void fts5SegIterNextFrom( assert( pIter->pLeaf ); if( bRev==0 ){ - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ - iLeafPgno = pDlidx->iLeafPgno; - fts5DlidxIterNext(pDlidx); + while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ + iLeafPgno = fts5DlidxIterPgno(pDlidx); + fts5DlidxIterNext(p, pDlidx); } - assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); + assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); if( iLeafPgno>pIter->iLeafPgno ){ fts5SegIterGotoPage(p, pIter, iLeafPgno); bMove = 0; } }else{ assert( iMatchiRowid ); - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ - fts5DlidxIterPrev(pDlidx); + while( !fts5DlidxIterEof(p, pDlidx) && iMatchiLeafPgno; + iLeafPgno = fts5DlidxIterPgno(pDlidx); assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); @@ -2804,6 +2943,53 @@ static int fts5PrefixCompress( return i; } +static void fts5WriteDlidxClear( + Fts5Index *p, + Fts5SegWriter *pWriter, + int bFlush /* If true, write dlidx to disk */ +){ + int i; + assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); + for(i=0; inDlidx; i++){ + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; + if( pDlidx->buf.n==0 ) break; + if( bFlush ){ + assert( pDlidx->pgno!=0 ); + fts5DataWrite(p, + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), + pDlidx->buf.p, pDlidx->buf.n + ); + } + sqlite3Fts5BufferZero(&pDlidx->buf); + pDlidx->bPrevValid = 0; + } +} + +/* +** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. +** Any new array elements are zeroed before returning. +*/ +static int fts5WriteDlidxGrow( + Fts5Index *p, + Fts5SegWriter *pWriter, + int nLvl +){ + if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ + Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( + pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl + ); + if( aDlidx==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); + memset(&aDlidx[pWriter->nDlidx], 0, nByte); + pWriter->aDlidx = aDlidx; + pWriter->nDlidx = nLvl; + } + } + return p->rc; +} + /* ** If an "nEmpty" record must be written to the b-tree before the next ** term, write it now. @@ -2813,23 +2999,22 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ - i64 iKey = FTS5_DOCLIST_IDX_ROWID( - pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty - ); - assert( pWriter->cdlidx.n>0 ); - fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); + + /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written + ** to the database, also write the doclist-index to disk. */ + if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ bFlag = 1; } + fts5WriteDlidxClear(p, pWriter, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); pWriter->nEmpty = 0; + }else{ + fts5WriteDlidxClear(p, pWriter, 0); } - /* Whether or not it was written to disk, zero the doclist index at this - ** point */ - sqlite3Fts5BufferZero(&pWriter->cdlidx); - pWriter->bDlidxPrevValid = 0; + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 ); + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 ); } static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ @@ -2900,43 +3085,98 @@ static void fts5WriteBtreeTerm( } } +/* +** This function is called when flushing a leaf page that contains no +** terms at all to disk. +*/ static void fts5WriteBtreeNoTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter /* Writer object */ ){ - if( pWriter->bFirstRowidInPage ){ - /* No rowids on this page. Append an 0x00 byte to the current - ** doclist-index */ - if( pWriter->bDlidxPrevValid==0 ){ - i64 iRowid = pWriter->iPrevRowid; - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); - pWriter->bDlidxPrevValid = 1; - pWriter->iDlidxPrev = iRowid; - } - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0); + /* If there were no rowids on the leaf page either and the doclist-index + ** has already been started, append an 0x00 byte to it. */ + if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; + assert( pDlidx->bPrevValid ); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); } + + /* Increment the "number of sequential leaves without a term" counter. */ pWriter->nEmpty++; } +static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ + i64 iRowid; + int iOff; + + iOff = 1 + getVarint(&pBuf->p[1], (u64*)&iRowid); + getVarint(&pBuf->p[iOff], (u64*)&iRowid); + return iRowid; +} + /* -** Rowid iRowid has just been appended to the current leaf page. As it is -** the first on its page, append an entry to the current doclist-index. +** Rowid iRowid has just been appended to the current leaf page. It is the +** first on the page. This function appends an appropriate entry to the current +** doclist-index. */ static void fts5WriteDlidxAppend( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid ){ - i64 iVal; - if( pWriter->bDlidxPrevValid ){ - iVal = iRowid - pWriter->iDlidxPrev; - }else{ - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); - iVal = 1; + int i; + int bDone = 0; + + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ + i64 iVal; + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; + + if( pDlidx->buf.n>=p->pConfig->pgsz ){ + /* The current doclist-index page is full. Write it to disk and push + ** a copy of iRowid (which will become the first rowid on the next + ** doclist-index leaf page) up into the next level of the b-tree + ** hierarchy. If the node being flushed is currently the root node, + ** also push its first rowid upwards. */ + pDlidx->buf.p[0] = 0x01; /* Not the root node */ + fts5DataWrite(p, + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), + pDlidx->buf.p, pDlidx->buf.n + ); + fts5WriteDlidxGrow(p, pWriter, i+2); + pDlidx = &pWriter->aDlidx[i]; + if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ + i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); + + /* This was the root node. Push its first rowid up to the new root. */ + pDlidx[1].pgno = pDlidx->pgno; + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); + pDlidx[1].bPrevValid = 1; + pDlidx[1].iPrev = iFirst; + } + + sqlite3Fts5BufferZero(&pDlidx->buf); + pDlidx->bPrevValid = 0; + pDlidx->pgno++; + }else{ + bDone = 1; + } + + if( pDlidx->bPrevValid ){ + iVal = iRowid - pDlidx->iPrev; + }else{ + i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno); + assert( pDlidx->buf.n==0 ); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); + iVal = iRowid; + } + + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); + pDlidx->bPrevValid = 1; + pDlidx->iPrev = iRowid; } - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal); - pWriter->bDlidxPrevValid = 1; - pWriter->iDlidxPrev = iRowid; } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ @@ -3034,6 +3274,9 @@ static void fts5WriteAppendTerm( pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; + assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); + pWriter->aDlidx[0].pgno = pPage->pgno; + /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); @@ -3171,7 +3414,11 @@ static void fts5WriteFinish( fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); - sqlite3Fts5BufferFree(&pWriter->cdlidx); + + for(i=0; inDlidx; i++){ + sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); + } + sqlite3_free(pWriter->aDlidx); } static void fts5WriteInit( @@ -3182,9 +3429,11 @@ static void fts5WriteInit( memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; - pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); - if( pWriter->aWriter==0 ) return; + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); + if( pWriter->aDlidx==0 ) return; pWriter->nWriter = 1; + pWriter->nDlidx = 1; pWriter->aWriter[0].pgno = 1; pWriter->bFirstTermInPage = 1; } @@ -3198,10 +3447,12 @@ static void fts5WriteInitForAppend( memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = pSeg->iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); if( p->rc==SQLITE_OK ){ int pgno = 1; int i; + pWriter->nDlidx = 1; pWriter->nWriter = pSeg->nHeight; pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; for(i=pSeg->nHeight-1; i>0; i--){ @@ -3583,18 +3834,18 @@ static void fts5FlushOneHash(Fts5Index *p){ pBuf = &writer.aWriter[0].buf; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); - /* Begin scanning through hash table entries. */ + /* Begin scanning through hash table entries. This loop runs once for each + ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ memset(pBuf->p, 0, 4); pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } - while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ - const char *zTerm; - int nTerm; - const u8 *pDoclist; - int nDoclist; + const char *zTerm; /* Buffer containing term */ + int nTerm; /* Size of zTerm in bytes */ + const u8 *pDoclist; /* Pointer to doclist for this term */ + int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); @@ -3611,7 +3862,9 @@ static void fts5FlushOneHash(Fts5Index *p){ } } - /* Write the term to the leaf. And push it up into the b-tree hierarchy */ + /* Write the term to the leaf. And if it is the first on the leaf, and + ** the leaf is not page number 1, push it up into the b-tree hierarchy + ** as well. */ if( writer.bFirstTermInPage==0 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); @@ -3630,6 +3883,12 @@ static void fts5FlushOneHash(Fts5Index *p){ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); + /* We just wrote a term into page writer.aWriter[0].pgno. If a + ** doclist-index is to be generated for this doclist, it will be + ** associated with this page. */ + assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); + writer.aDlidx[0].pgno = writer.aWriter[0].pgno; + if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); @@ -3825,8 +4084,6 @@ static void fts5MultiIterPoslist( Fts5ChunkIter iter; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); - static int nCall = 0; - nCall++; fts5ChunkIterInit(p, pSeg, &iter); @@ -4416,7 +4673,7 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ */ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ int n; - const char *z = fts5MultiIterTerm(pIter->pMulti, &n); + const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n); *pn = n-1; return &z[1]; } @@ -4654,32 +4911,35 @@ static void fts5DlidxIterTestReverse( int iLeaf /* Load doclist-index for this leaf */ ){ Fts5DlidxIter *pDlidx = 0; - i64 cksum1 = 13; - i64 cksum2 = 13; + u64 cksum1 = 13; + u64 cksum2 = 13; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterNext(pDlidx) + fts5DlidxIterNext(p, pDlidx) ){ - assert( pDlidx->iLeafPgno>iLeaf ); - cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); - cksum1 = (cksum1 ^ pDlidx->iRowid); + i64 iRowid = fts5DlidxIterRowid(pDlidx); + int pgno = fts5DlidxIterPgno(pDlidx); + assert( pgno>iLeaf ); + cksum1 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterPrev(pDlidx) + fts5DlidxIterPrev(p, pDlidx) ){ - assert( pDlidx->iLeafPgno>iLeaf ); - cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); - cksum2 = (cksum2 ^ pDlidx->iRowid); + i64 iRowid = fts5DlidxIterRowid(pDlidx); + int pgno = fts5DlidxIterPgno(pDlidx); + + assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); + cksum2 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; - if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; + if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } #else # define fts5DlidxIterTestReverse(x,y,z) @@ -4748,11 +5008,11 @@ static void fts5IndexIntegrityCheckSegment( for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterNext(pDlidx) + fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ - for(iPg=iPrevLeaf+1; iPgiLeafPgno; iPg++){ + for(iPg=iPrevLeaf+1; iPgiLeafPgno; + iPrevLeaf = fts5DlidxIterPgno(pDlidx); /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ - iKey = FTS5_SEGMENT_ROWID(iSegid, 0, pDlidx->iLeafPgno); + iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; + if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } - } for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ @@ -4994,6 +5253,7 @@ u64 sqlite3Fts5IndexCksum( static void fts5DecodeRowid( i64 iRowid, /* Rowid from %_data table */ int *piSegid, /* OUT: Segment id */ + int *pbDlidx, /* OUT: Dlidx flag */ int *piHeight, /* OUT: Height */ int *piPgno /* OUT: Page number */ ){ @@ -5003,29 +5263,26 @@ static void fts5DecodeRowid( *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); iRowid >>= FTS5_DATA_HEIGHT_B; + *pbDlidx = (int)(iRowid & 0x0001); + iRowid >>= FTS5_DATA_DLI_B; + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); } static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ - int iSegid, iHeight, iPgno; /* Rowid compenents */ - fts5DecodeRowid(iKey, &iSegid, &iHeight, &iPgno); + int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - "{structure idx=%d}", (int)(iKey-10) - ); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)"); } } - else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx segid=%d pgno=%d)", - iSegid, iPgno - ); - }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(segid=%d h=%d pgno=%d)", - iSegid, iHeight, iPgno + else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)", + bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno ); } } @@ -5135,7 +5392,7 @@ static void fts5DecodeFunction( sqlite3_value **apVal /* Function arguments */ ){ i64 iRowid; /* Rowid for record being decoded */ - int iSegid,iHeight,iPgno; /* Rowid components */ + int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ const u8 *aBlob; int n; /* Record to decode */ u8 *a = 0; Fts5Buffer s; /* Build up text to return here */ @@ -5152,24 +5409,24 @@ static void fts5DecodeFunction( a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; memcpy(a, aBlob, n); - fts5DecodeRowid(iRowid, &iSegid, &iHeight, &iPgno); + fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); - if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + if( bDlidx ){ Fts5Data dlidx; - Fts5DlidxIter iter; + Fts5DlidxLvl lvl; dlidx.p = a; dlidx.n = n; dlidx.nRef = 2; - memset(&iter, 0, sizeof(Fts5DlidxIter)); - iter.pData = &dlidx; - iter.iLeafPgno = iPgno; + memset(&lvl, 0, sizeof(Fts5DlidxLvl)); + lvl.pData = &dlidx; + lvl.iLeafPgno = iPgno; - for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){ + for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ sqlite3Fts5BufferAppendPrintf(&rc, &s, - " %d(%lld)", iter.iLeafPgno, iter.iRowid + " %d(%lld)", lvl.iLeafPgno, lvl.iRowid ); } }else if( iSegid==0 ){ diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 171b4849a5..46e2f121b5 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -22,13 +22,7 @@ #include #include -/* -** This variable is set to true when running corruption tests. Otherwise -** false. If it is false, extra assert() conditions in the fts5 code are -** activated - conditions that are only true if it is guaranteed that the -** fts5 database is not corrupt. -*/ -int sqlite3_fts5_may_be_corrupt = 0; +extern int sqlite3_fts5_may_be_corrupt; /************************************************************************* ** This is a copy of the first part of the SqliteDb structure in diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index ce4c90b9a1..559a5a1256 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -49,7 +49,7 @@ do_execsql_test 2.1 { } do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } -} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} +} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } @@ -181,7 +181,6 @@ for {set i 1} {$i <= 10} {incr i} { } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} -# if {$i==1} break } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} #exit @@ -243,6 +242,7 @@ for {set i 1} {$i <= 10} {incr i} { if {[set_test_counter errors]} break } + #------------------------------------------------------------------------- # reset_db diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 9f712ffc86..cc6435bb0e 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -26,17 +26,17 @@ ifcapable !fts5 { do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; -} {version 1} +} {version 2} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; -} {pgsz 32 version 1} +} {pgsz 32 version 2} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; -} {pgsz 64 version 1} +} {pgsz 64 version 2} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index 7cbd7b00e8..15868d1de1 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -16,6 +16,7 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt2 +sqlite3_fts5_may_be_corrupt 1 # Create a simple FTS5 table containing 100 documents. Each document # contains 10 terms, each of which start with the character "x". @@ -30,6 +31,7 @@ do_execsql_test 1.0 { } set mask [expr 31 << 31] + # Test 1: # # For each page in the t1_data table, open a transaction and DELETE @@ -194,5 +196,6 @@ foreach {tn nCut} { } +sqlite3_fts5_may_be_corrupt 0 finish_test diff --git a/ext/fts5/test/fts5dlidx.test b/ext/fts5/test/fts5dlidx.test index 6a23622ce0..76c05e8fd0 100644 --- a/ext/fts5/test/fts5dlidx.test +++ b/ext/fts5/test/fts5dlidx.test @@ -61,6 +61,7 @@ proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { } execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } + breakpoint execsql COMMIT do_test $tn.1 { @@ -82,8 +83,8 @@ proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { do_dlidx_test1 1.1 10 100 10000 0 1000 do_dlidx_test1 1.2 10 10 10000 0 128 -do_dlidx_test1 1.3 10 10 100 0 36028797018963970 -do_dlidx_test1 1.3 10 10 50 0 150000000000000000 +do_dlidx_test1 1.3 10 10 66 0 36028797018963970 +do_dlidx_test1 1.4 10 10 50 0 150000000000000000 diff --git a/ext/fts5/test/fts5integrity.test b/ext/fts5/test/fts5integrity.test index a6dc34a90e..9e244c26fa 100644 --- a/ext/fts5/test/fts5integrity.test +++ b/ext/fts5/test/fts5integrity.test @@ -31,5 +31,27 @@ do_execsql_test 2.1 { INSERT INTO yy(yy) VALUES('integrity-check'); } +#-------------------------------------------------------------------- +# +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE zz USING fts5(z); + INSERT INTO zz(zz, rank) VALUES('pgsz', 32); + INSERT INTO zz VALUES('b b b b b b b b b b b b b b'); + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz(zz) VALUES('optimize'); +} + +do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); } + + +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r} +#exit + + finish_test diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test index 57bb0bb2bd..5d0253472d 100644 --- a/ext/fts5/test/fts5rowid.test +++ b/ext/fts5/test/fts5rowid.test @@ -25,7 +25,7 @@ do_catchsql_test 1.2 { do_execsql_test 1.3 { SELECT fts5_rowid('segment', 1, 1, 1) -} {70866960385} +} {139586437121} do_catchsql_test 1.4 { SELECT fts5_rowid('nosucharg'); diff --git a/ext/fts5/test/fts5version.test b/ext/fts5/test/fts5version.test index 4e5df579e5..ccd035ae4b 100644 --- a/ext/fts5/test/fts5version.test +++ b/ext/fts5/test/fts5version.test @@ -24,35 +24,35 @@ do_execsql_test 1.1 { do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' -} {version 1} +} {version 2} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'a'; } {1} do_execsql_test 1.4 { - UPDATE t1_config set v=2 WHERE k='version'; + UPDATE t1_config set v=3 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} breakpoint do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} do_test 1.7 { execsql { DELETE FROM t1_config WHERE k='version' } db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 0, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}} finish_test diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index ef543552dc..3110954a8c 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -109,6 +109,7 @@ db transaction { if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" + # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { diff --git a/manifest b/manifest index 12d81ba731..78e665497d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Allow\sthe\sfts5vocab\stable\sto\soptionally\sprovide\sdata\son\sa\sper-column\sbasis. -D 2015-05-09T18:28:27.134 +C Change\sfts5\sdoclist-index\sstructures\sto\sbe\strees\sinstead\sof\sflat\slists.\sThis\sonly\smakes\sa\sdifference\sfor\sdatabases\sthat\scontain\smillions\sof\sinstances\sof\sthe\ssame\stoken. +D 2015-05-13T17:15:32.981 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,24 +104,24 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c a5a908a68c79c352a0dfa77d16712de43896bd07 +F ext/fts5/fts5.c 2899b3c60a382613889500571fd5158b2c1aa159 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 5b9e4afe80d18648bc236b9b5bc2f873634326f6 +F ext/fts5/fts5Int.h e01aec94c0d927924134c30afd9803425cd88812 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 6a4fed2d64d7dbb0416c4278b23201f77daf94ea +F ext/fts5/fts5_index.c b9a3382af3027f5c9717d90613fda5f29f7d57fa F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d -F ext/fts5/fts5_tcl.c aa3b102bb01f366174718be7ce8e9311b9abb482 +F ext/fts5/fts5_tcl.c f18eeb125d733f4e815a11679b971fa61cd7ec77 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5_vocab.c 2e37ea9b4d4d5460bc778f2adb872c6a869601e7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 -F ext/fts5/test/fts5aa.test e17657bd749cb3982745ec503ce22372dee7340d +F ext/fts5/test/fts5aa.test 5f221b82487abfb915e1b040eb4e305cf79a2ef5 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 8b3c2938840da8f3f6a53b1324fb03e0bac12d1e F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d @@ -132,14 +132,14 @@ F ext/fts5/test/fts5ah.test d74cf8b7de5b8424f732acef69fe12122a12f2bf F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 -F ext/fts5/test/fts5al.test 8cde0a064ffe452281b7c90a759d220f796bbb20 +F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 F ext/fts5/test/fts5aux.test d9c724351d8e4dc46cad1308c0b4b8ac94d07660 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 -F ext/fts5/test/fts5corrupt2.test c65a6619a1f712b87be0ccb3ef1a2120bf1f6430 -F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f +F ext/fts5/test/fts5corrupt2.test bdad9241f73076917512f5298501d50d9e9d7dc7 +F ext/fts5/test/fts5dlidx.test 74c3c8c33dfde594c0d8a22b9358d82fe56c8c7a F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 F ext/fts5/test/fts5ea.test ed163ed820fd503354bd7dcf9d3b0e3801ade962 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e @@ -149,7 +149,7 @@ F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test 420f2e23775b458eeb9a325bcdfe84650c2e9d39 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa -F ext/fts5/test/fts5integrity.test 39deee579b84df2786d9c8298e9196b339cfc872 +F ext/fts5/test/fts5integrity.test 98801bd0fb7c53a40bc770280134865d61724f3a F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 @@ -159,14 +159,14 @@ F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 -F ext/fts5/test/fts5rowid.test 0dd51524739ebe5f1251a25f3d3ece9840fdc1a8 +F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 F ext/fts5/test/fts5tokenizer.test bbcde2a7473dcaa9a1fc6809aa8965acb7b846ff F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 -F ext/fts5/test/fts5version.test 1c902eaa7359336293ac45c7a34616527513e9fb +F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 F ext/fts5/test/fts5vocab.test 2d1bddfb6e1effd9e1d2f5d1d25bf05e9ab33e64 -F ext/fts5/tool/loadfts5.tcl 8a8f10d7d2d0d77f622e0a84cc0824c158c34a52 +F ext/fts5/tool/loadfts5.tcl add4d349ae5463c5f60b26e821c24e60ed8054d3 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -860,7 +860,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test 4243af26b8f3f4555abe166f723715a1f74c77ff -F test/permutations.test 62ff8c49738c72a70b034ecc31957bee437f76ff +F test/permutations.test fd77d410331d76399cdd31175c00c54eabe11fca F test/pragma.test be7195f0aa72bdb8a512133e9640ac40f15b57a2 F test/pragma2.test f624a496a95ee878e81e59961eade66d5c00c028 F test/pragma3.test 6f849ccffeee7e496d2f2b5e74152306c0b8757c @@ -1284,7 +1284,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 94f196c9961e0ca3513e29f57125a3197808be2d F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 69bae8ce4aa52d2ff82d4a8a856bf283ec035b2e -F tool/mksqlite3c.tcl e3136f007fcdaac00c207306ef4b352ca87bf9af +F tool/mksqlite3c.tcl eea6aa21d76f47c7932af2baa9291517ec72f5ce F tool/mksqlite3h.tcl 44730d586c9031638cdd2eb443b801c0d2dbd9f8 F tool/mksqlite3internalh.tcl eb994013e833359137eb53a55acdad0b5ae1049b F tool/mkvsix.tcl 3b58b9398f91c7dbf18d49eb87cefeee9efdbce1 @@ -1319,7 +1319,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 6bf93e3b56e6705b7d12bab5024fc615f373b36c -R f31ac5d295b9e0df865bd081bc32aa0b +P 3922276135a7825d0ede8d9c757e9cfe492f803a +R 80fc221c857ceeffac4c1ca1e8e3c4c0 U dan -Z 9e8a79e0ffff336d7475aff60e841c57 +Z ea2587087e805cefd8e74a1d1357bdd2 diff --git a/manifest.uuid b/manifest.uuid index b81db19d90..3338b019bb 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3922276135a7825d0ede8d9c757e9cfe492f803a \ No newline at end of file +aa34bf666c384cf32a8d8166ab6d9afbca26a256 \ No newline at end of file diff --git a/test/permutations.test b/test/permutations.test index 2ee3953d58..1bb5985b30 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -242,6 +242,14 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] +test_suite "fts5-light" -prefix "" -description { + All FTS5 tests. +} -files [ + test_set \ + [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \ + -exclude *corrupt* *fault* *big* *fts5aj* +] + test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index ca0eb02598..97b31f1a02 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -377,6 +377,7 @@ foreach file { fts5_storage.c fts5_tokenize.c fts5_unicode2.c + fts5_vocab.c rtree.c icu.c