/* If there is more data, read the next doclist element. */
if( pReader->nData!=0 ){
- int iDummy, n = getVarint(pReader->pData, &pReader->iDocid);
+ sqlite_int64 iDocidDelta;
+ int iDummy, n = getVarint(pReader->pData, &iDocidDelta);
+ pReader->iDocid += iDocidDelta;
if( pReader->iType>=DL_POSITIONS ){
assert( n<pReader->nData );
while( 1 ){
*/
static int docListValidate(DocListType iType, const char *pData, int nData,
sqlite_int64 *pLastDocid){
- int has_prevDocid = 0;
- sqlite_int64 iPrevDocid;
+ sqlite_int64 iPrevDocid = 0;
assert( pData!=0 );
assert( nData!=0 );
while( nData!=0 ){
- int n;
- sqlite_int64 iDocid;
- n = getVarint(pData, &iDocid);
- assert( !has_prevDocid || iPrevDocid<iDocid );
- has_prevDocid = 1;
- iPrevDocid = iDocid;
+ sqlite_int64 iDocidDelta;
+ int n = getVarint(pData, &iDocidDelta);
+ iPrevDocid += iDocidDelta;
if( iType>DL_DOCIDS ){
int iDummy;
while( 1 ){
pData += n;
nData -= n;
}
- assert( has_prevDocid );
if( pLastDocid ) *pLastDocid = iPrevDocid;
return 1;
}
** dlwAppend - append raw doclist data to buffer.
** dlwAdd - construct doclist element and append to buffer.
*/
-/* TODO(shess) Modify to handle delta-encoding docids. This should be
-** fairly simple. The changes to dlwAdd() are obvious. dlwAppend()
-** would need to decode the leading docid, rencode as a delta, and
-** copy the rest of the data (which would already be delta-encoded).
-** Note that this will require a change to pass the trailing docid.
-*/
typedef struct DLWriter {
DocListType iType;
DataBuffer *b;
-#ifndef NDEBUG
- int has_prevDocid;
sqlite_int64 iPrevDocid;
-#endif
} DLWriter;
static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){
pWriter->b = b;
pWriter->iType = iType;
-#ifndef NDEBUG
- pWriter->has_prevDocid = 0;
pWriter->iPrevDocid = 0;
-#endif
}
static void dlwDestroy(DLWriter *pWriter){
SCRAMBLE(pWriter);
}
+/* iFirstDocid is the first docid in the doclist in pData. It is
+** needed because pData may point within a larger doclist, in which
+** case the first item would be delta-encoded.
+**
+** iLastDocid is the final docid in the doclist in pData. It is
+** needed to create the new iPrevDocid for future delta-encoding. The
+** code could decode the passed doclist to recreate iLastDocid, but
+** the only current user (docListMerge) already has decoded this
+** information.
+*/
+/* TODO(shess) This has become just a helper for docListMerge.
+** Consider a refactor to make this cleaner.
+*/
static void dlwAppend(DLWriter *pWriter,
- const char *pData, int nData){
+ const char *pData, int nData,
+ sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){
+ sqlite_int64 iDocid = 0;
+ char c[VARINT_MAX];
+ int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */
#ifndef NDEBUG
- sqlite_int64 iDocid;
- int n;
- n = getVarint(pData, &iDocid);
- assert( n<=nData );
- assert( !pWriter->has_prevDocid || pWriter->iPrevDocid<iDocid );
- assert( n<nData || pWriter->iType>DL_DOCIDS );
- assert( docListValidate(pWriter->iType, pData, nData, &iDocid) );
- pWriter->has_prevDocid = 1;
- pWriter->iPrevDocid = iDocid;
+ sqlite_int64 iLastDocidDelta;
#endif
- dataBufferAppend(pWriter->b, pData, nData);
+
+ /* Recode the initial docid as delta from iPrevDocid. */
+ nFirstOld = getVarint(pData, &iDocid);
+ assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
+ nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid);
+
+ /* Verify that the incoming doclist is valid AND that it ends with
+ ** the expected docid. This is essential because we'll trust this
+ ** docid in future delta-encoding.
+ */
+ assert( docListValidate(pWriter->iType, pData, nData, &iLastDocidDelta) );
+ assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );
+
+ /* Append recoded initial docid and everything else. Rest of docids
+ ** should have been delta-encoded from previous initial docid.
+ */
+ if( nFirstOld<nData ){
+ dataBufferAppend2(pWriter->b, c, nFirstNew,
+ pData+nFirstOld, nData-nFirstOld);
+ }else{
+ dataBufferAppend(pWriter->b, c, nFirstNew);
+ }
+ pWriter->iPrevDocid = iLastDocid;
}
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid,
const char *pPosList, int nPosList){
char c[VARINT_MAX];
- int n = putVarint(c, iDocid);
+ int n = putVarint(c, iDocid-pWriter->iPrevDocid);
- assert( !pWriter->has_prevDocid || pWriter->iPrevDocid<iDocid );
+ assert( pWriter->iPrevDocid<iDocid );
assert( pPosList==0 || pWriter->iType>DL_DOCIDS );
dataBufferAppend(pWriter->b, c, n);
dataBufferAppend(pWriter->b, c, n);
}
}
-#ifndef NDEBUG
- pWriter->has_prevDocid = 1;
pWriter->iPrevDocid = iDocid;
-#endif
}
/*******************************************************************/
int i, n;
const char *pStart = 0;
int nStart = 0;
+ sqlite_int64 iFirstDocid = 0, iLastDocid = 0;
assert( nReaders>0 );
if( nReaders==1 ){
if( dlrDocData(readers[0].pReader)==pStart+nStart ){
nStart += dlrDocDataBytes(readers[0].pReader);
}else{
- if( pStart!=0 ) dlwAppend(&writer, pStart, nStart);
+ if( pStart!=0 ){
+ dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
+ }
pStart = dlrDocData(readers[0].pReader);
nStart = dlrDocDataBytes(readers[0].pReader);
+ iFirstDocid = iDocid;
}
+ iLastDocid = iDocid;
dlrStep(readers[0].pReader);
/* Drop all of the older elements with the same docid. */
}
/* Copy over any remaining elements. */
- if( nStart>0 ) dlwAppend(&writer, pStart, nStart);
+ if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
dlwDestroy(&writer);
}
rc = leafWriterFlush(v, pWriter);
if( rc!=SQLITE_OK ) return rc;
}
+ assert( leafNodeValidate(pWriter->data.pData, pWriter->data.nData) );
return SQLITE_OK;
}
-C Require\sa\sminimum\sfanout\sfor\sinterior\snodes.\s\sThis\sprevents\scases\nwhere\sexcessively\slarge\sterms\skeep\sthe\stree\sfrom\sfinding\sa\ssingle\nroot.\s\sA\sdownside\sis\sthat\sthis\scould\sresult\sin\slarge\sinterior\snodes\sin\nthe\spresence\sof\slarge\sterms,\swhich\smay\sbe\sprone\sto\sfragmentation,\nthough\sif\sthe\snodes\swere\ssmaller\sthat\swould\stranslate\sinto\smore\slevels\nin\sthe\stree,\swhich\swould\salso\shave\sthat\sproblem.\s(CVS\s3510)
-D 2006-11-13T21:00:55
+C Delta-encode\sdocids.\s\sThis\sis\sgood\sfor\saround\s22%\sreduction\sin\sindex\nsize\swith\sDL_POSITIONS.\s\sIt\simproves\sperformance\sabout\s5%-6%.\s(CVS\s3511)
+D 2006-11-13T21:09:25
F Makefile.in 8e14898d41a53033ecb687d93c9cd5d109fb9ae3
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts2/fts2.c 667a93b3fe079f20870a3042bd4b4c3841925c01
+F ext/fts2/fts2.c 7909381760660b3da9918ff3e618e2c83315234b
F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P 9628a61a6f33b7bec3455086534b76437d2622b4
-R 2b75980ff8122f283fe2f8c11a712490
+P 64b7e3406134ac4891113b9bb432ad97504268bb
+R 5cca903a493ab0c4e72312813e09cd62
U shess
-Z 5a39d4513967a7196d065949839c93cd
+Z 6c02cb52391a3d0abd67d903b02caa78