]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Re-use deleted rowids for new segments. This has a somewhat
authorshess <shess@noemail.net>
Mon, 11 Sep 2006 21:39:21 +0000 (21:39 +0000)
committershess <shess@noemail.net>
Mon, 11 Sep 2006 21:39:21 +0000 (21:39 +0000)
surprising impact on performance, I believe because it keeps the index
smaller (by keeping rowids smaller), and also because it improves
locality in the table (deleting a row means we've already touched the
pages leading to that rowid). (CVS 3405)

FossilOrigin-Name: 2f5f6290c9ef99c7b060aecc4d996c976c50c9d7

ext/fts1/fts1.c
manifest
manifest.uuid

index 90a22be689e69f2a7d34f0a864fd7a70234be3ea..a4134f38aec1fbb739cbdb9bfb5fdb50104d6f52 100644 (file)
@@ -796,7 +796,7 @@ static const char *const fulltext_zStatement[MAX_STMT] = {
   /* TERM_SELECT_ALL */
   "select doclist from %_term where term = ? order by segment",
   /* TERM_INSERT */
-  "insert into %_term (term, segment, doclist) values (?, ?, ?)",
+  "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
   /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
   /* TERM_DELETE */ "delete from %_term where rowid = ?",
 };
@@ -1036,21 +1036,34 @@ static int term_select_all(fulltext_vtab *v, const char *pTerm, int nTerm,
   return SQLITE_OK;
 }
 
-/* insert into %_term (term, segment, doclist)
-               values ([pTerm], [iSegment], [doclist]) */
-static int term_insert(fulltext_vtab *v, const char *pTerm, int nTerm,
+/* insert into %_term (rowid, term, segment, doclist)
+               values ([piRowid], [pTerm], [iSegment], [doclist])
+** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
+**
+** NOTE(shess) piRowid is IN, with values of "space of int64" plus
+** null, it is not used to pass data back to the caller.
+*/
+static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
+                       const char *pTerm, int nTerm,
                        int iSegment, DocList *doclist){
   sqlite3_stmt *s;
   int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
   if( rc!=SQLITE_OK ) return rc;
 
-  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
+  if( piRowid==NULL ){
+    rc = sqlite3_bind_null(s, 1);
+  }else{
+    rc = sqlite3_bind_int64(s, 1, *piRowid);
+  }
   if( rc!=SQLITE_OK ) return rc;
 
-  rc = sqlite3_bind_int(s, 2, iSegment);
+  rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
+  if( rc!=SQLITE_OK ) return rc;
+
+  rc = sqlite3_bind_int(s, 3, iSegment);
   if( rc!=SQLITE_OK ) return rc;
 
-  rc = sqlite3_bind_blob(s, 3, doclist->pData, doclist->nData, SQLITE_STATIC);
+  rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
   if( rc!=SQLITE_OK ) return rc;
 
   return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
@@ -1931,7 +1944,7 @@ static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
     docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
     docListUpdate(&doclist, d);
     /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
-    rc = term_insert(v, pTerm, nTerm, iSegment, &doclist);
+    rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
     goto err;
   }
   if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
@@ -1953,19 +1966,24 @@ static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
   ** bucket, and put results in the next bucket.
   */
   iSegment++;
-  while( (rc=term_insert(v, pTerm, nTerm, iSegment, &doclist))!=SQLITE_OK ){
+  while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
+                         &doclist))!=SQLITE_OK ){
+    sqlite_int64 iSegmentRow;
     DocList old;
     int rc2;
 
     /* Retain old error in case the term_insert() error was really an
     ** error rather than a bounced insert.
     */
-    rc2 = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &old);
+    rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
     if( rc2!=SQLITE_ROW ) goto err;
 
-    rc = term_delete(v, iIndexRow);
+    rc = term_delete(v, iSegmentRow);
     if( rc!=SQLITE_OK ) goto err;
 
+    /* Reusing lowest-number deleted row keeps the index smaller. */
+    if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
+
     /* doclist contains the newer data, so accumulate it over old.
     ** Then steal accumulated data for doclist.
     */
index d908fd5e9d68ea6e460b4e357fddede917ae6ac3..05334df8c43990510065e9cab3470ac6963d9eb8 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Get\sVACUUM\sworking\swith\svirtual\stables.\s(CVS\s3404)
-D 2006-09-11T11:13:27
+C Re-use\sdeleted\srowids\sfor\snew\ssegments.\s\sThis\shas\sa\ssomewhat\nsurprising\simpact\son\sperformance,\sI\sbelieve\sbecause\sit\skeeps\sthe\sindex\nsmaller\s(by\skeeping\srowids\ssmaller),\sand\salso\sbecause\sit\simproves\nlocality\sin\sthe\stable\s(deleting\sa\srow\smeans\swe've\salready\stouched\sthe\npages\sleading\sto\sthat\srowid).\s(CVS\s3405)
+D 2006-09-11T21:39:22
 F Makefile.in cabd42d34340f49260bc2a7668c38eba8d4cfd99
 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@@ -21,7 +21,7 @@ F ext/README.txt 913a7bd3f4837ab14d7e063304181787658b14e1
 F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e
 F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b
 F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5
-F ext/fts1/fts1.c 022a985bafaecdd6d245ddfeba68f9d268fccd9d
+F ext/fts1/fts1.c 5c5e362ec08487a0bdcf58b7467a28321eed8025
 F ext/fts1/fts1.h fe8e8f38dd6d2d2645b9b0d6972e80985249575f
 F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114
 F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089
@@ -397,7 +397,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
 F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P 227dc3feb537e6efd5b0c1d2dad40193db07d5aa
-R c821cee55c3f4e8fdfcd092fa19a7e1a
-U drh
-Z ab0686f184c2e0785ce95f3b8f82b4c4
+P d5ffef3870f06d2dd744ce9470d3c0e68062e804
+R 4f28d2c19780234e639fe3db42d9de5e
+U shess
+Z 323322cfc0451e1239808887226bd556
index 502aaf35b3d9965de6c8a40c60c49bfa196acc0d..298e7e013e4f100dddf4e6beb70d79c14b228dd0 100644 (file)
@@ -1 +1 @@
-d5ffef3870f06d2dd744ce9470d3c0e68062e804
\ No newline at end of file
+2f5f6290c9ef99c7b060aecc4d996c976c50c9d7
\ No newline at end of file