]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Store minimal terms in interior nodes. Whenever there's a break
authorshess <shess@noemail.net>
Sat, 18 Nov 2006 00:12:44 +0000 (00:12 +0000)
committershess <shess@noemail.net>
Sat, 18 Nov 2006 00:12:44 +0000 (00:12 +0000)
between leaf nodes, instead of storing the entire leftmost term of the
rightmost child, store only that portion of the leftmost term
necessary to distinguish it from the rightmost term of the leftmost
child. (CVS 3513)

FossilOrigin-Name: f6e0b080dcfaf554b2c05df5e7d4db69d012fba3

ext/fts2/fts2.c
manifest
manifest.uuid

index 48d0af55d64ee1bd69a2fd4cdbe260e99da88adf..b55443d3c37d19d93e6b4c9f34a34d01779073e6 100644 (file)
 ** than the first term encoded (or all terms if no term is encoded).
 ** Otherwise, for terms greater than or equal to pTerm[i] but less
 ** than pTerm[i+1], the subtree for that term will be rooted at
-** iBlockid+i.
+** iBlockid+i.  Interior nodes only store enough term data to
+** distinguish adjacent children (if the rightmost term of the left
+** child is "something", and the leftmost term of the right child is
+** "wicked", only "w" is stored).
 **
 ** New data is spilled to a new interior node at the same height when
 ** the current node exceeds INTERIOR_MAX bytes (default 2048).
@@ -3961,6 +3964,11 @@ typedef struct LeafWriter {
   DataBuffer term;                /* previous encoded term */
   DataBuffer data;                /* encoding buffer */
 
+  /* bytes of first term in the current node which distinguishes that
+  ** term from the last term of the previous node.
+  */
+  int nTermDistinct;
+
   InteriorWriter parentWriter;    /* if we overflow */
   int has_parent;
 } LeafWriter;
@@ -4072,6 +4080,9 @@ static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter,
   n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
   pStartingTerm = pWriter->data.pData+iData+1+n;
   assert( pWriter->data.nData>iData+1+n+nStartingTerm );
+  assert( pWriter->nTermDistinct>0 );
+  assert( pWriter->nTermDistinct<=nStartingTerm );
+  nStartingTerm = pWriter->nTermDistinct;
 
   if( pWriter->has_parent ){
     interiorWriterAppend(&pWriter->parentWriter,
@@ -4166,11 +4177,23 @@ static void leafWriterDestroy(LeafWriter *pWriter){
   dataBufferDestroy(&pWriter->data);
 }
 
-/* Encode a term into the leafWriter, delta-encoding as appropriate. */
-static void leafWriterEncodeTerm(LeafWriter *pWriter,
-                                 const char *pTerm, int nTerm){
+/* Encode a term into the leafWriter, delta-encoding as appropriate.
+** Returns the length of the new term which distinguishes it from the
+** previous term, which can be used to set nTermDistinct when a node
+** boundary is crossed.
+*/
+static int leafWriterEncodeTerm(LeafWriter *pWriter,
+                                const char *pTerm, int nTerm){
   char c[VARINT_MAX+VARINT_MAX];
-  int n;
+  int n, nPrefix = 0;
+
+  assert( nTerm>0 );
+  while( nPrefix<pWriter->term.nData &&
+         pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
+    nPrefix++;
+    /* Failing this implies that the terms weren't in order. */
+    assert( nPrefix<nTerm );
+  }
 
   if( pWriter->data.nData==0 ){
     /* Encode the node header and leading term as:
@@ -4187,21 +4210,13 @@ static void leafWriterEncodeTerm(LeafWriter *pWriter,
     **  varint(nSuffix)
     **  char pTermSuffix[nSuffix]
     */
-    int nPrefix = 0;
-
-    assert( nTerm>0 );
-    while( nPrefix<pWriter->term.nData &&
-           pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
-      nPrefix++;
-      /* Failing this implies that the terms weren't in order. */
-      assert( nPrefix<nTerm );
-    }
-
     n = putVarint(c, nPrefix);
     n += putVarint(c+n, nTerm-nPrefix);
     dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
   }
   dataBufferReplace(&pWriter->term, pTerm, nTerm);
+
+  return nPrefix+1;
 }
 
 /* Used to avoid a memmove when a large amount of doclist data is in
@@ -4238,10 +4253,13 @@ static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
                                DLReader *pReaders, int nReaders){
   char c[VARINT_MAX+VARINT_MAX];
   int iTermData = pWriter->data.nData, iDoclistData;
-  int i, nData, n, nActualData, nActual, rc;
+  int i, nData, n, nActualData, nActual, rc, nTermDistinct;
 
   ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
-  leafWriterEncodeTerm(pWriter, pTerm, nTerm);
+  nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm);
+
+  /* Remember nTermDistinct if opening a new node. */
+  if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct;
 
   iDoclistData = pWriter->data.nData;
 
@@ -4283,6 +4301,8 @@ static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
     if( iTermData>0 ){
       rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
       if( rc!=SQLITE_OK ) return rc;
+
+      pWriter->nTermDistinct = nTermDistinct;
     }
 
     /* Fix the encoded doclist length. */
@@ -4323,6 +4343,8 @@ static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
     rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
     if( rc!=SQLITE_OK ) return rc;
 
+    pWriter->nTermDistinct = nTermDistinct;
+
     /* Rebuild header using the current term */
     n = putVarint(pWriter->data.pData, 0);
     n += putVarint(pWriter->data.pData+n, nTerm);
index 4cdc1061a09fe5feca11d48901c67cfb9f7c4981..5d275357c669635cfa4a228344b1c2c5c76eb411 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Refactoring\sgroundwork\sfor\scoming\swork\son\sinterior\snodes.\s\sChange\nLeafWriter\sto\suse\sempty\sdata\sbuffer\s(instead\sof\sempty\sterm)\sto\sdetect\nan\sempty\sblock.\s\sCode\sto\svalidate\sinterior\snodes.\s\sModerate\srevisions\nto\sleaf-node\sand\sdoclist\svalidation.\s\sRecast\sleafWriterStep()\sin\sterms\nof\sLeafWriterStepMerge().\s(CVS\s3512)
-D 2006-11-17T21:12:16
+C Store\sminimal\sterms\sin\sinterior\snodes.\s\sWhenever\sthere's\sa\sbreak\nbetween\sleaf\snodes,\sinstead\sof\sstoring\sthe\sentire\sleftmost\sterm\sof\sthe\nrightmost\schild,\sstore\sonly\sthat\sportion\sof\sthe\sleftmost\sterm\nnecessary\sto\sdistinguish\sit\sfrom\sthe\srightmost\sterm\sof\sthe\sleftmost\nchild.\s(CVS\s3513)
+D 2006-11-18T00:12:45
 F Makefile.in 8e14898d41a53033ecb687d93c9cd5d109fb9ae3
 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@@ -33,7 +33,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
 F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
 F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
 F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts2/fts2.c 57d8cd57ce18c3ce7b194b4810fe7e119ec7e6a3
+F ext/fts2/fts2.c 74a5db3f7f8e49dfa2a5d40e5fdece09bf23e5a8
 F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1
 F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
 F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
@@ -421,7 +421,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
 F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P 9b6d413d751d962b67cb4e3a208efe61581cb822
-R ff81ed1c8b4721212823c87e00f2e6b9
+P f30771d5c7ef2b502af95d81a18796b75271ada4
+R ef562280fdedbce7ff0fcbb49811b62c
 U shess
-Z 82e324f504b7a8a8d9f0a515f8d329aa
+Z 674bc601b7eaf4b058181b0cf2143d33
index 7d116f7a681688d12eeef0008476d8d1030ece62..f6e6ca6a8f297d9d8a8460a3ffaab44b8b338de8 100644 (file)
@@ -1 +1 @@
-f30771d5c7ef2b502af95d81a18796b75271ada4
\ No newline at end of file
+f6e0b080dcfaf554b2c05df5e7d4db69d012fba3
\ No newline at end of file