From: dan Date: Tue, 1 Jul 2014 20:45:18 +0000 (+0000) Subject: Change the position list format so that its size in bytes is stored at the start... X-Git-Tag: version-3.8.11~114^2~173 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=acf6642819e65b48e58c26fd1eb4b7ff0dd8ccbc;p=thirdparty%2Fsqlite.git Change the position list format so that its size in bytes is stored at the start of the list itself. FossilOrigin-Name: 62f2ff20418702ed0fbf708369edf5638445b51b --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 774b9f9687..ff217c881b 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -75,7 +75,6 @@ void sqlite3Fts5Dequote(char *z); typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; - /* ** Values used as part of the flags argument passed to IndexQuery(). */ @@ -117,18 +116,9 @@ void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); /* -** Position list iteration. -** -** for( -** iPos=sqlite3Fts5IterFirstPos(pIter, iCol); -** iPos>=0; -** iPos=sqlite3Fts5IterNextPos(pIter) -** ){ -** // token appears at position iPos of column iCol of the current document -** } +** Obtain the position list that corresponds to the current position. */ -// int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); -// int sqlite3Fts5IterNextPos(Fts5IndexIter*); +const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter*, int *pn); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e620058b2e..8f3ce6dca2 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -110,13 +110,13 @@ ** ** poslist format: ** +** varint: size of poslist in bytes. not including this field. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte ** varint: column number (I) ** collist: collist for column I ** } -** 0x00 byte ** ** collist format: ** @@ -255,6 +255,7 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5Buffer Fts5Buffer; +typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; @@ -297,13 +298,6 @@ struct Fts5Index { sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; -struct Fts5IndexIter { - Fts5Index *pIndex; - Fts5Structure *pStruct; - Fts5MultiSegIter *pMulti; -}; - - /* ** Buffer object for the incremental building of string data. */ @@ -313,6 +307,13 @@ struct Fts5Buffer { int nSpace; }; +struct Fts5IndexIter { + Fts5Index *pIndex; + Fts5Structure *pStruct; + Fts5MultiSegIter *pMulti; + Fts5Buffer poslist; /* Buffer containing current poslist */ +}; + /* ** A single record read from the %_data table. */ @@ -422,9 +423,9 @@ struct Fts5MultiSegIter { ** ** iLeafOffset: ** Byte offset within the current leaf that is one byte past the end of the -** rowid field of the current entry. Usually this is the first byte of -** the position list data. The exception is if the rowid for the current -** entry is the last thing on the leaf page. +** rowid field of the current entry. Usually this is the size field of the +** position list data. The exception is if the rowid for the current entry +** is the last thing on the leaf page. ** ** pLeaf: ** Buffer containing current leaf page data. Set to NULL at EOF. @@ -454,12 +455,24 @@ struct Fts5SegIter { }; /* -** Object for iterating through a single position list. +** Object for iterating through paginated data. */ -struct Fts5PosIter { +struct Fts5ChunkIter { Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ i64 iLeafRowid; /* Absolute rowid of current leaf */ - int iLeafOffset; /* Current offset within leaf */ + int nRem; /* Remaining bytes of data to read */ + + /* Output parameters */ + u8 *p; /* Pointer to chunk of data */ + int n; /* Size of buffer p in bytes */ +}; + +/* +** Object for iterating through a single position list. +*/ +struct Fts5PosIter { + Fts5ChunkIter chunk; /* Current chunk of data */ + int iOff; /* Offset within chunk data */ int iCol; int iPos; @@ -1107,6 +1120,10 @@ static void fts5SegIterNextPage( } } +/* +** Leave pIter->iLeafOffset as the offset to the size field of the first +** position list. The position list belonging to document pIter->iRowid. +*/ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; /* Offset to read at */ @@ -1218,14 +1235,17 @@ static void fts5SegIterSeekInit( while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){ if( res<0 && pIter->iLeafPgno==iPg ){ - /* Search for the end of the current doclist within the current - ** page. The end of a doclist is marked by a pair of successive - ** 0x00 bytes. */ - int iOff; - for(iOff=pIter->iLeafOffset+1; iOffiLeafOffset; + while( iOffp; int n = pLeaf->n; - for(iOff=pIter->iLeafOffset; iOffiLeafOffset; + if( iOff<=n ){ + int nPoslist; + iOff += getVarint32(&a[iOff], nPoslist); + iOff += nPoslist; + } if( iOffterm.p; } +/* +** Return true if the chunk iterator passed as the second argument is +** at EOF. Or if an error has already occurred. Otherwise, return false. +*/ +static int fts5ChunkIterEof(Fts5Index *p, Fts5ChunkIter *pIter){ + return (p->rc || pIter->pLeaf==0); +} + +/* +** Advance the chunk-iterator to the next chunk of data to read. +*/ +static void fts5ChunkIterNext(Fts5Index *p, Fts5ChunkIter *pIter){ + assert( pIter->nRem>=pIter->n ); + pIter->nRem -= pIter->n; + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + pIter->p = 0; + if( pIter->nRem>0 ){ + Fts5Data *pLeaf; + pIter->iLeafRowid++; + pLeaf = pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); + if( pLeaf ){ + pIter->n = MIN(pIter->nRem, pLeaf->n-4); + pIter->p = pLeaf->p+4; + } + } +} + +/* +** Intialize the chunk iterator to read the position list data for which +** the size field is at offset iOff of leaf pLeaf. +*/ +static void fts5ChunkIterInit( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pSeg, /* Segment iterator to read poslist from */ + Fts5ChunkIter *pIter /* Initialize this object */ +){ + int iId = pSeg->pSeg->iSegid; + i64 rowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); + Fts5Data *pLeaf = pSeg->pLeaf; + int iOff = pSeg->iLeafOffset; + + memset(pIter, 0, sizeof(*pIter)); + pIter->iLeafRowid = rowid; + if( iOffn ){ + fts5DataReference(pLeaf); + pIter->pLeaf = pLeaf; + }else{ + pIter->nRem = 1; + fts5ChunkIterNext(p, pIter); + if( p->rc ) return; + iOff = 4; + pLeaf = pIter->pLeaf; + } + + iOff += getVarint32(&pLeaf->p[iOff], pIter->nRem); + pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); + pIter->p = pLeaf->p + iOff; + + if( pIter->n==0 ){ + fts5ChunkIterNext(p, pIter); + } +} + /* ** Read and return the next 32-bit varint from the position-list iterator ** passed as the second argument. @@ -1543,17 +1632,12 @@ static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ int iVal = 0; if( p->rc==SQLITE_OK ){ - int iOff = pIter->iLeafOffset; - if( iOff < pIter->pLeaf->n ){ - pIter->iLeafOffset += getVarint32(&pIter->pLeaf->p[iOff], iVal); - }else{ - fts5DataRelease(pIter->pLeaf); - pIter->iLeafRowid++; - pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); - if( pIter->pLeaf ){ - pIter->iLeafOffset = 4 + getVarint32(&pIter->pLeaf->p[4], iVal); - } + if( pIter->iOff>=pIter->chunk.n ){ + fts5ChunkIterNext(p, &pIter->chunk); + if( fts5ChunkIterEof(p, &pIter->chunk) ) return 0; + pIter->iOff = 0; } + pIter->iOff += getVarint32(&pIter->chunk.p[pIter->iOff], iVal); } return iVal; } @@ -1563,16 +1647,15 @@ static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ */ static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){ int iVal; + assert( fts5ChunkIterEof(p, &pIter->chunk)==0 ); iVal = fts5PosIterReadVarint(p, pIter); - if( iVal==0 ){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - } - else if( iVal==1 ){ - pIter->iCol = fts5PosIterReadVarint(p, pIter); - pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; - }else{ - pIter->iPos += (iVal - 2); + if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ + if( iVal==1 ){ + pIter->iCol = fts5PosIterReadVarint(p, pIter); + pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; + }else{ + pIter->iPos += (iVal - 2); + } } } @@ -1588,14 +1671,11 @@ static void fts5PosIterInit( ){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; - int iId = pSeg->pSeg->iSegid; - memset(pIter, 0, sizeof(*pIter)); - pIter->pLeaf = pSeg->pLeaf; - pIter->iLeafOffset = pSeg->iLeafOffset; - pIter->iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); - fts5DataReference(pIter->pLeaf); - fts5PosIterNext(p, pIter); + fts5ChunkIterInit(p, pSeg, &pIter->chunk); + if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ + fts5PosIterNext(p, pIter); + } } } @@ -1604,7 +1684,7 @@ static void fts5PosIterInit( ** at EOF. Or if an error has already occurred. Otherwise, return false. */ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ - return (p->rc || pIter->pLeaf==0); + return (p->rc || pIter->chunk.pLeaf==0); } @@ -2106,15 +2186,15 @@ static void fts5WritePendingDoclist( /* Append the rowid itself */ fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid); + /* Append the size of the position list in bytes */ + fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n); + /* Copy the position list to the output segment */ while( ibuf.n){ int iVal; i += getVarint32(&pPoslist->buf.p[i], iVal); fts5WriteAppendPoslistInt(p, pWriter, iVal); } - - /* Write the position list terminator */ - fts5WriteAppendZerobyte(p, pWriter); } /* Write the doclist terminator */ @@ -2297,9 +2377,8 @@ fflush(stdout); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ - Fts5PosIter sPos; /* Used to iterate through position list */ - int iCol = 0; /* Current output column */ - int iPos = 0; /* Current output position */ + Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; + Fts5ChunkIter sPos; /* Used to iterate through position list */ int nTerm; const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); @@ -2319,20 +2398,16 @@ fflush(stdout); fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); /* Copy the position list from input to output */ - for(fts5PosIterInit(p, pIter, &sPos); - fts5PosIterEof(p, &sPos)==0; - fts5PosIterNext(p, &sPos) - ){ - if( sPos.iCol!=iCol ){ - fts5WriteAppendPoslistInt(p, &writer, 1); - fts5WriteAppendPoslistInt(p, &writer, sPos.iCol); - iCol = sPos.iCol; - iPos = 0; + fts5ChunkIterInit(p, pSeg, &sPos); + fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); + for(/* noop */; fts5ChunkIterEof(p, &sPos)==0; fts5ChunkIterNext(p, &sPos)){ + int iOff = 0; + while( iOffposlist); fts5MultiIterNext(pIter->pIndex, pIter->pMulti); } @@ -3125,6 +3207,21 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ return fts5MultiIterRowid(pIter->pMulti); } +/* +** Return a pointer to a buffer containing a copy of the position list for +** the current entry. Output variable *pn is set to the size of the buffer +** in bytes before returning. +** +** The returned buffer does not include the 0x00 terminator byte stored on +** disk. +*/ +const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ + assert( sqlite3Fts5IterEof(pIter)==0 ); + + *pn = pIter->poslist.n; + return pIter->poslist.p; +} + /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ diff --git a/manifest b/manifest index 34ec6a1a05..e5d38010a4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sminor\sproblems\sin\sterm\smatching. -D 2014-06-26T12:31:41.784 +C Change\sthe\sposition\slist\sformat\sso\sthat\sits\ssize\sin\sbytes\sis\sstored\sat\sthe\sstart\sof\sthe\slist\sitself. +D 2014-07-01T20:45:18.496 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,10 +104,10 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 -F ext/fts5/fts5Int.h 3fd1ebeb58963727cae0ccc8e4e80751bd870296 +F ext/fts5/fts5Int.h 80f3d38a69a0c58ccc94428c8fc8adbcf7561a2d F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 1874b17f10a38d0b21e0c38a28637f74e4d2570a -F ext/fts5/fts5_index.c c8b21d12f15ca6fe028ede3f8e040dcb2aaef6f6 +F ext/fts5/fts5_index.c ea3dfe56a16813fcf59e03f6156965894b4b5e6f F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1188,7 +1188,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 47a9f3cc92deefe163108e3507bd4614bf1f5da7 -R b3c853e0de2f65908d06e4caafc2bc71 +P 94eeb077d08a1d2607f3ff3a9fbf18229ba475bb +R 5d9b8f6933c58725a24e426a963b0d97 U dan -Z 840b486e1cfb6171cd4015bc26a5f123 +Z bb8816e0d501865bff7c4c8da87350cb diff --git a/manifest.uuid b/manifest.uuid index 3e7acc7005..ac69385214 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -94eeb077d08a1d2607f3ff3a9fbf18229ba475bb \ No newline at end of file +62f2ff20418702ed0fbf708369edf5638445b51b \ No newline at end of file