** than the first term encoded (or all terms if no term is encoded).
** Otherwise, for terms greater than or equal to pTerm[i] but less
** than pTerm[i+1], the subtree for that term will be rooted at
-** iBlockid+i.
+** iBlockid+i. Interior nodes only store enough term data to
+** distinguish adjacent children (if the rightmost term of the left
+** child is "something", and the leftmost term of the right child is
+** "wicked", only "w" is stored).
**
** New data is spilled to a new interior node at the same height when
** the current node exceeds INTERIOR_MAX bytes (default 2048).
DataBuffer term; /* previous encoded term */
DataBuffer data; /* encoding buffer */
+ /* bytes of first term in the current node which distinguishes that
+ ** term from the last term of the previous node.
+ */
+ int nTermDistinct;
+
InteriorWriter parentWriter; /* if we overflow */
int has_parent;
} LeafWriter;
n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
pStartingTerm = pWriter->data.pData+iData+1+n;
assert( pWriter->data.nData>iData+1+n+nStartingTerm );
+ assert( pWriter->nTermDistinct>0 );
+ assert( pWriter->nTermDistinct<=nStartingTerm );
+ nStartingTerm = pWriter->nTermDistinct;
if( pWriter->has_parent ){
interiorWriterAppend(&pWriter->parentWriter,
dataBufferDestroy(&pWriter->data);
}
-/* Encode a term into the leafWriter, delta-encoding as appropriate. */
-static void leafWriterEncodeTerm(LeafWriter *pWriter,
- const char *pTerm, int nTerm){
+/* Encode a term into the leafWriter, delta-encoding as appropriate.
+** Returns the length of the new term which distinguishes it from the
+** previous term, which can be used to set nTermDistinct when a node
+** boundary is crossed.
+*/
+static int leafWriterEncodeTerm(LeafWriter *pWriter,
+ const char *pTerm, int nTerm){
char c[VARINT_MAX+VARINT_MAX];
- int n;
+ int n, nPrefix = 0;
+
+ assert( nTerm>0 );
+ while( nPrefix<pWriter->term.nData &&
+ pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
+ nPrefix++;
+ /* Failing this implies that the terms weren't in order. */
+ assert( nPrefix<nTerm );
+ }
if( pWriter->data.nData==0 ){
/* Encode the node header and leading term as:
** varint(nSuffix)
** char pTermSuffix[nSuffix]
*/
- int nPrefix = 0;
-
- assert( nTerm>0 );
- while( nPrefix<pWriter->term.nData &&
- pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
- nPrefix++;
- /* Failing this implies that the terms weren't in order. */
- assert( nPrefix<nTerm );
- }
-
n = putVarint(c, nPrefix);
n += putVarint(c+n, nTerm-nPrefix);
dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
}
dataBufferReplace(&pWriter->term, pTerm, nTerm);
+
+ return nPrefix+1;
}
/* Used to avoid a memmove when a large amount of doclist data is in
DLReader *pReaders, int nReaders){
char c[VARINT_MAX+VARINT_MAX];
int iTermData = pWriter->data.nData, iDoclistData;
- int i, nData, n, nActualData, nActual, rc;
+ int i, nData, n, nActualData, nActual, rc, nTermDistinct;
ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
- leafWriterEncodeTerm(pWriter, pTerm, nTerm);
+ nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm);
+
+ /* Remember nTermDistinct if opening a new node. */
+ if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct;
iDoclistData = pWriter->data.nData;
if( iTermData>0 ){
rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
if( rc!=SQLITE_OK ) return rc;
+
+ pWriter->nTermDistinct = nTermDistinct;
}
/* Fix the encoded doclist length. */
rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
if( rc!=SQLITE_OK ) return rc;
+ pWriter->nTermDistinct = nTermDistinct;
+
/* Rebuild header using the current term */
n = putVarint(pWriter->data.pData, 0);
n += putVarint(pWriter->data.pData+n, nTerm);
-C Refactoring\sgroundwork\sfor\scoming\swork\son\sinterior\snodes.\s\sChange\nLeafWriter\sto\suse\sempty\sdata\sbuffer\s(instead\sof\sempty\sterm)\sto\sdetect\nan\sempty\sblock.\s\sCode\sto\svalidate\sinterior\snodes.\s\sModerate\srevisions\nto\sleaf-node\sand\sdoclist\svalidation.\s\sRecast\sleafWriterStep()\sin\sterms\nof\sLeafWriterStepMerge().\s(CVS\s3512)
-D 2006-11-17T21:12:16
+C Store\sminimal\sterms\sin\sinterior\snodes.\s\sWhenever\sthere's\sa\sbreak\nbetween\sleaf\snodes,\sinstead\sof\sstoring\sthe\sentire\sleftmost\sterm\sof\sthe\nrightmost\schild,\sstore\sonly\sthat\sportion\sof\sthe\sleftmost\sterm\nnecessary\sto\sdistinguish\sit\sfrom\sthe\srightmost\sterm\sof\sthe\sleftmost\nchild.\s(CVS\s3513)
+D 2006-11-18T00:12:45
F Makefile.in 8e14898d41a53033ecb687d93c9cd5d109fb9ae3
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts2/fts2.c 57d8cd57ce18c3ce7b194b4810fe7e119ec7e6a3
+F ext/fts2/fts2.c 74a5db3f7f8e49dfa2a5d40e5fdece09bf23e5a8
F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P 9b6d413d751d962b67cb4e3a208efe61581cb822
-R ff81ed1c8b4721212823c87e00f2e6b9
+P f30771d5c7ef2b502af95d81a18796b75271ada4
+R ef562280fdedbce7ff0fcbb49811b62c
U shess
-Z 82e324f504b7a8a8d9f0a515f8d329aa
+Z 674bc601b7eaf4b058181b0cf2143d33