Implement optimize() function. This merges all segments in the fts

author shess <shess@noemail.net>

Tue, 15 Jul 2008 21:32:07 +0000 (21:32 +0000)

committer shess <shess@noemail.net>

Tue, 15 Jul 2008 21:32:07 +0000 (21:32 +0000)
author shess <shess@noemail.net>
Tue, 15 Jul 2008 21:32:07 +0000 (21:32 +0000)
committer shess <shess@noemail.net>
Tue, 15 Jul 2008 21:32:07 +0000 (21:32 +0000)
diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c

index 9aff336600e71e27fe09fda0df87f1278ad39bb6..f9f8c4aa31d2403af045e614f5b0425641bdb2f5 100644 (file)
--- a/ext/fts3/fts3.c
+++ b/ext/fts3/fts3.c
@@ -1920,6 +1920,7 @@ typedef enum fulltext_statement {
    SEGDIR_SELECT_SEGMENT_STMT,
    SEGDIR_SELECT_ALL_STMT,
    SEGDIR_DELETE_ALL_STMT,
+  SEGDIR_COUNT_STMT,
  
    MAX_STMT                     /* Always at end! */
  } fulltext_statement;
@@ -1962,6 +1963,7 @@ static const char *const fulltext_zStatement[MAX_STMT] = {
    "select start_block, leaves_end_block, root from %_segdir "
    " order by level desc, idx asc",
    /* SEGDIR_DELETE_ALL */ "delete from %_segdir",
+  /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir",
  };
  
  /*
@@ -2126,15 +2128,18 @@ static int sql_single_step(sqlite3_stmt *s){
  }
  
  /* Like sql_get_statement(), but for special replicated LEAF_SELECT
-** statements.
+** statements.  idx -1 is a special case for an uncached version of
+** the statement (used in the optimize implementation).
  */
  /* TODO(shess) Write version for generic statements and then share
  ** that between the cached-statement functions.
  */
  static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
                                    sqlite3_stmt **ppStmt){
-  assert( idx>=0 && idx<MERGE_COUNT );
-  if( v->pLeafSelectStmts[idx]==NULL ){
+  assert( idx>=-1 && idx<MERGE_COUNT );
+  if( idx==-1 ){
+    return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT);
+  }else if( v->pLeafSelectStmts[idx]==NULL ){
      int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
                           LEAF_SELECT);
      if( rc!=SQLITE_OK ) return rc;
@@ -2465,6 +2470,37 @@ static int segdir_delete_all(fulltext_vtab *v){
    return sql_single_step(s);
  }
  
+/* Returns SQLITE_OK with *pnSegments set to the number of entries in
+** %_segdir and *piMaxLevel set to the highest level which has a
+** segment.  Otherwise returns the SQLite error which caused failure.
+*/
+static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){
+  sqlite3_stmt *s;
+  int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s);
+  if( rc!=SQLITE_OK ) return rc;
+
+  rc = sqlite3_step(s);
+  /* TODO(shess): This case should not be possible?  Should stronger
+  ** measures be taken if it happens?
+  */
+  if( rc==SQLITE_DONE ){
+    *pnSegments = 0;
+    *piMaxLevel = 0;
+    return SQLITE_OK;
+  }
+  if( rc!=SQLITE_ROW ) return rc;
+
+  *pnSegments = sqlite3_column_int(s, 0);
+  *piMaxLevel = sqlite3_column_int(s, 1);
+
+  /* We expect only one row.  We must execute another sqlite3_step()
+   * to complete the iteration; otherwise the table will remain locked. */
+  rc = sqlite3_step(s);
+  if( rc==SQLITE_DONE ) return SQLITE_OK;
+  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
+  return rc;
+}
+
  /* TODO(shess) clearPendingTerms() is far down the file because
  ** writeZeroSegment() is far down the file because LeafWriter is far
  ** down the file.  Consider refactoring the code to move the non-vtab
@@ -5340,6 +5376,12 @@ static int leavesReaderReset(LeavesReader *pReader){
  }
  
  static void leavesReaderDestroy(LeavesReader *pReader){
+  /* If idx is -1, that means we're using a non-cached statement
+  ** handle in the optimize() case, so we need to release it.
+  */
+  if( pReader->pStmt!=NULL && pReader->idx==-1 ){
+    sqlite3_finalize(pReader->pStmt);
+  }
    leafReaderDestroy(&pReader->leafReader);
    dataBufferDestroy(&pReader->rootData);
    SCRAMBLE(pReader);
@@ -6306,6 +6348,285 @@ static void snippetOffsetsFunc(
    }
  }
  
+/* OptLeavesReader is nearly identical to LeavesReader, except that
+** where LeavesReader is geared towards the merging of complete
+** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader
+** is geared towards implementation of the optimize() function, and
+** can merge all segments simultaneously.  This version may be
+** somewhat less efficient than LeavesReader because it merges into an
+** accumulator rather than doing an N-way merge, but since segment
+** size grows exponentially (so segment count logrithmically) this is
+** probably not an immediate problem.
+*/
+/* TODO(shess): Prove that assertion, or extend the merge code to
+** merge tree fashion (like the prefix-searching code does).
+*/
+/* TODO(shess): OptLeavesReader and LeavesReader could probably be
+** merged with little or no loss of performance for LeavesReader.  The
+** merged code would need to handle >MERGE_COUNT segments, and would
+** also need to be able to optionally optimize away deletes.
+*/
+typedef struct OptLeavesReader {
+  /* Segment number, to order readers by age. */
+  int segment;
+  LeavesReader reader;
+} OptLeavesReader;
+
+static int optLeavesReaderAtEnd(OptLeavesReader *pReader){
+  return leavesReaderAtEnd(&pReader->reader);
+}
+static int optLeavesReaderTermBytes(OptLeavesReader *pReader){
+  return leavesReaderTermBytes(&pReader->reader);
+}
+static const char *optLeavesReaderData(OptLeavesReader *pReader){
+  return leavesReaderData(&pReader->reader);
+}
+static int optLeavesReaderDataBytes(OptLeavesReader *pReader){
+  return leavesReaderDataBytes(&pReader->reader);
+}
+static const char *optLeavesReaderTerm(OptLeavesReader *pReader){
+  return leavesReaderTerm(&pReader->reader);
+}
+static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){
+  return leavesReaderStep(v, &pReader->reader);
+}
+static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
+  return leavesReaderTermCmp(&lr1->reader, &lr2->reader);
+}
+/* Order by term ascending, segment ascending (oldest to newest), with
+** exhausted readers to the end.
+*/
+static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
+  int c = optLeavesReaderTermCmp(lr1, lr2);
+  if( c!=0 ) return c;
+  return lr1->segment-lr2->segment;
+}
+/* Bubble pLr[0] to appropriate place in pLr[1..nLr-1].  Assumes that
+** pLr[1..nLr-1] is already sorted.
+*/
+static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){
+  while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){
+    OptLeavesReader tmp = pLr[0];
+    pLr[0] = pLr[1];
+    pLr[1] = tmp;
+    nLr--;
+    pLr++;
+  }
+}
+
+/* optimize() helper function.  Put the readers in order and iterate
+** through them, merging doclists for matching terms into pWriter.
+** Returns SQLITE_OK on success, or the SQLite error code which
+** prevented success.
+*/
+static int optimizeInternal(fulltext_vtab *v,
+                            OptLeavesReader *readers, int nReaders,
+                            LeafWriter *pWriter){
+  int i, rc = SQLITE_OK;
+  DataBuffer doclist, merged, tmp;
+
+  /* Order the readers. */
+  i = nReaders;
+  while( i-- > 0 ){
+    optLeavesReaderReorder(&readers[i], nReaders-i);
+  }
+
+  dataBufferInit(&doclist, LEAF_MAX);
+  dataBufferInit(&merged, LEAF_MAX);
+
+  /* Exhausted readers bubble to the end, so when the first reader is
+  ** at eof, all are at eof.
+  */
+  while( !optLeavesReaderAtEnd(&readers[0]) ){
+
+    /* Figure out how many readers share the next term. */
+    for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){
+      if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break;
+    }
+
+    /* Special-case for no merge. */
+    if( i==1 ){
+      /* Trim deletions from the doclist. */
+      dataBufferReset(&merged);
+      docListTrim(DL_DEFAULT,
+                  optLeavesReaderData(&readers[0]),
+                  optLeavesReaderDataBytes(&readers[0]),
+                  -1, DL_DEFAULT, &merged);
+    }else{
+      DLReader dlReaders[MERGE_COUNT];
+      int iReader, nReaders;
+
+      /* Prime the pipeline with the first reader's doclist.  After
+      ** one pass index 0 will reference the accumulated doclist.
+      */
+      dlrInit(&dlReaders[0], DL_DEFAULT,
+              optLeavesReaderData(&readers[0]),
+              optLeavesReaderDataBytes(&readers[0]));
+      iReader = 1;
+
+      assert( iReader<i );  /* Must execute the loop at least once. */
+      while( iReader<i ){
+        /* Merge 16 inputs per pass. */
+        for( nReaders=1; iReader<i && nReaders<MERGE_COUNT;
+             iReader++, nReaders++ ){
+          dlrInit(&dlReaders[nReaders], DL_DEFAULT,
+                  optLeavesReaderData(&readers[iReader]),
+                  optLeavesReaderDataBytes(&readers[iReader]));
+        }
+
+        /* Merge doclists and swap result into accumulator. */
+        dataBufferReset(&merged);
+        docListMerge(&merged, dlReaders, nReaders);
+        tmp = merged;
+        merged = doclist;
+        doclist = tmp;
+
+        while( nReaders-- > 0 ){
+          dlrDestroy(&dlReaders[nReaders]);
+        }
+
+        /* Accumulated doclist to reader 0 for next pass. */
+        dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData);
+      }
+
+      /* Destroy reader that was left in the pipeline. */
+      dlrDestroy(&dlReaders[0]);
+
+      /* Trim deletions from the doclist. */
+      dataBufferReset(&merged);
+      docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
+                  -1, DL_DEFAULT, &merged);
+    }
+
+    /* Only pass doclists with hits (skip if all hits deleted). */
+    if( merged.nData>0 ){
+      rc = leafWriterStep(v, pWriter,
+                          optLeavesReaderTerm(&readers[0]),
+                          optLeavesReaderTermBytes(&readers[0]),
+                          merged.pData, merged.nData);
+      if( rc!=SQLITE_OK ) goto err;
+    }
+
+    /* Step merged readers to next term and reorder. */
+    while( i-- > 0 ){
+      rc = optLeavesReaderStep(v, &readers[i]);
+      if( rc!=SQLITE_OK ) goto err;
+
+      optLeavesReaderReorder(&readers[i], nReaders-i);
+    }
+  }
+
+ err:
+  dataBufferDestroy(&doclist);
+  dataBufferDestroy(&merged);
+  return rc;
+}
+
+/* Implement optimize() function for FTS3.  optimize(t) merges all
+** segments in the fts index into a single segment.  't' is the magic
+** table-named column.
+*/
+static void optimizeFunc(sqlite3_context *pContext,
+                         int argc, sqlite3_value **argv){
+  fulltext_cursor *pCursor;
+  if( argc>1 ){
+    sqlite3_result_error(pContext, "excess arguments to optimize()",-1);
+  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
+            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
+    sqlite3_result_error(pContext, "illegal first argument to optimize",-1);
+  }else{
+    fulltext_vtab *v;
+    int i, rc, iMaxLevel;
+    OptLeavesReader *readers;
+    int nReaders;
+    LeafWriter writer;
+    sqlite3_stmt *s;
+
+    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
+    v = cursor_vtab(pCursor);
+
+    /* Flush any buffered updates before optimizing. */
+    rc = flushPendingTerms(v);
+    if( rc!=SQLITE_OK ) goto err;
+
+    rc = segdir_count(v, &nReaders, &iMaxLevel);
+    if( rc!=SQLITE_OK ) goto err;
+    if( nReaders==0 || nReaders==1 ){
+      sqlite3_result_text(pContext, "Index already optimal", -1,
+                          SQLITE_STATIC);
+      return;
+    }
+
+    rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
+    if( rc!=SQLITE_OK ) goto err;
+
+    readers = sqlite3_malloc(nReaders*sizeof(readers[0]));
+    if( readers==NULL ) goto err;
+
+    /* Note that there will already be a segment at this position
+    ** until we call segdir_delete() on iMaxLevel.
+    */
+    leafWriterInit(iMaxLevel, 0, &writer);
+
+    i = 0;
+    while( (rc = sqlite3_step(s))==SQLITE_ROW ){
+      sqlite_int64 iStart = sqlite3_column_int64(s, 0);
+      sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
+      const char *pRootData = sqlite3_column_blob(s, 2);
+      int nRootData = sqlite3_column_bytes(s, 2);
+
+      assert( i<nReaders );
+      rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
+                            &readers[i].reader);
+      if( rc!=SQLITE_OK ) break;
+
+      readers[i].segment = i;
+      i++;
+    }
+
+    /* If we managed to succesfully read them all, optimize them. */
+    if( rc==SQLITE_DONE ){
+      assert( i==nReaders );
+      rc = optimizeInternal(v, readers, nReaders, &writer);
+    }
+
+    while( i-- > 0 ){
+      leavesReaderDestroy(&readers[i].reader);
+    }
+    sqlite3_free(readers);
+
+    /* If we've successfully gotten to here, delete the old segments
+    ** and flush the interior structure of the new segment.
+    */
+    if( rc==SQLITE_OK ){
+      for( i=0; i<=iMaxLevel; i++ ){
+        rc = segdir_delete(v, i);
+        if( rc!=SQLITE_OK ) break;
+      }
+
+      if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer);
+    }
+
+    leafWriterDestroy(&writer);
+
+    if( rc!=SQLITE_OK ) goto err;
+
+    sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
+    return;
+
+    /* TODO(shess): Error-handling needs to be improved along the
+    ** lines of the dump_ functions.
+    */
+ err:
+    {
+      char buf[512];
+      sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s",
+                       sqlite3_errmsg(sqlite3_context_db_handle(pContext)));
+      sqlite3_result_error(pContext, buf, -1);
+    }
+  }
+}
+
  #ifdef SQLITE_TEST
  /* Generate an error of the form "<prefix>: <msg>".  If msg is NULL,
  ** pull the error from the context's db handle.
@@ -6703,6 +7024,9 @@ static int fulltextFindFunction(
    }else if( strcmp(zName,"offsets")==0 ){
      *pxFunc = snippetOffsetsFunc;
      return 1;
+  }else if( strcmp(zName,"optimize")==0 ){
+    *pxFunc = optimizeFunc;
+    return 1;
  #ifdef SQLITE_TEST
      /* NOTE(shess): These functions are present only for testing
      ** purposes.  No particular effort is made to optimize their
@@ -6836,6 +7160,7 @@ int sqlite3Fts3Init(sqlite3 *db){
     && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
     && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
     && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1))
  #ifdef SQLITE_TEST
     && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
     && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
diff --git a/manifest b/manifest

index 5ab9e9e4eb734ef1f368c84ea4b313e3ad234380..db117b7e440649bfdc0d4b4c2847c49bc8981ca1 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Update\scolumn\snaming\srules.\s\sTicket\s#3221.\s\sRules\sfor\scolumn\snaming\nare\sstill\ssubject\sto\schange\s(except\sfor\sthe\sAS\srule\swhich\swe\spromise\sto\nkeep\sthe\ssame)\sbut\sare\smore\sconsistent\snow.\s\sAnd\sthe\srules\sare\stested\nusing\sa\snew\stest\sscript.\s(CVS\s5416)
-D 2008-07-15T20:56:17
+C Implement\soptimize()\sfunction.\s\sThis\smerges\sall\ssegments\sin\sthe\sfts\nindex\sinto\sa\ssingle\ssegment,\sincluding\sdropping\sdelete\scookies.\s(CVS\s5417)
+D 2008-07-15T21:32:07
  F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
  F Makefile.in a03f7cb4f7ad50bc53a788c6c544430e81f95de4
  F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -51,7 +51,7 @@ F ext/fts2/fts2_tokenizer1.c 8a545c232bdffafd117c4eeaf59789691909f26a
  F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
  F ext/fts3/README.tokenizers 226644a0eab97724e8de83061912e8bb248461b6
  F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c c4037314d324d900638665e802b0e97725a0bd07
+F ext/fts3/fts3.c 14ac2e37889fda1555d903648772b6be3061d2bb
  F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
  F ext/fts3/fts3_hash.c 83e7bb4042106b32811681dd2859b4577a7a6b35
  F ext/fts3/fts3_hash.h 004b759e1602ff16dfa02fea3ca1c77336ad6798
@@ -334,7 +334,7 @@ F test/fts3ao.test 0aa29dd4fc1c8d46b1f7cfe5926f7ac97551bea9
  F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa
  F test/fts3b.test b3a25180a633873d37d86e1ccd00ed690d37237a
  F test/fts3c.test 4c7ef29b37aca3e8ebb6a39b57910caa6506034e
-F test/fts3d.test 12ad44b84e2b71b4217288747a9744201f779892
+F test/fts3d.test d92a47fe8ed59c9e53d2d8e6d2685bb380aadadc
  F test/fts3near.test 2d4dadcaac5025ab65bb87e66c45f39e92966194
  F test/func.test 92f017ac3c1fb85c8015800aa578a1d588a237a4
  F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a
@@ -608,7 +608,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
  F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
  F tool/speedtest8.c 1dbced29de5f59ba2ebf877edcadf171540374d1
  F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
-P c942a38e9aa80770e7e2819e51b43fa7aa854d71
-R 4b3353fa20b1c242a7f991bd99f44a48
-U drh
-Z 6835a5cfc38caa7fcd38147e81944a8f
+P 61f6e19755b85bcb065f85fc425c2172badea308
+R 801d6f0acc1dc5b802184549b77ec890
+U shess
+Z 136d2d664a48bc67bfa7ab0ac78c0104
diff --git a/manifest.uuid b/manifest.uuid

index a0b998fecb7977516ce28a1bf52effd85be7c33f..37da664a7f04d008bff3dd08295fe04ab60df8c2 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-61f6e19755b85bcb065f85fc425c2172badea308
-\ No newline at end of file
+b22e187bc2b38bd219dd0feba19b97279bd83089
+\ No newline at end of file
diff --git a/test/fts3d.test b/test/fts3d.test

index 9cf5c99c7554d66ae85c46ad29a8986bc7f785b5..bec488d04028b504b772b2b42493b2018d37d9c7 100644 (file)
--- a/test/fts3d.test
+++ b/test/fts3d.test
@@ -11,7 +11,7 @@
  # This file implements regression tests for SQLite library.  The focus
  # of this script is testing the FTS3 module's optimize() function.
  #
-# $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $
+# $Id: fts3d.test,v 1.2 2008/07/15 21:32:07 shess Exp $
  #
  
  set testdir [file dirname $argv0]
@@ -123,6 +123,224 @@ check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
  check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
  check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}
  
-# TODO(shess): optimize() tests here.
+#*************************************************************************
+# Test results when everything is optimized manually.
+# NOTE(shess): This is a copy of fts3c-1.3.  I've pulled a copy here
+# because fts3d-2 and fts3d-3 should have identical results.
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts3(c);
+  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
+  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
+  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
+  DELETE FROM t1 WHERE docid IN (1,3);
+  DROP TABLE IF EXISTS t1old;
+  ALTER TABLE t1 RENAME TO t1old;
+  CREATE VIRTUAL TABLE t1 USING fts3(c);
+  INSERT INTO t1 (docid, c) SELECT docid, c FROM t1old;
+  DROP TABLE t1old;
+}
+
+# Should be a single optimal segment with the same logical results.
+do_test fts3d-2.segments {
+  execsql {
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {0 0}
+do_test fts3d-2.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
+  }
+} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
+
+check_terms_all fts3d-2.1 {a test that was}
+check_doclist_all fts3d-2.1.1 a {[2 0[2]]}
+check_doclist_all fts3d-2.1.2 test {[2 0[3]]}
+check_doclist_all fts3d-2.1.3 that {[2 0[0]]}
+check_doclist_all fts3d-2.1.4 was {[2 0[1]]}
+
+check_terms fts3d-2.2 0 0 {a test that was}
+check_doclist fts3d-2.2.1 0 0 a {[2 0[2]]}
+check_doclist fts3d-2.2.2 0 0 test {[2 0[3]]}
+check_doclist fts3d-2.2.3 0 0 that {[2 0[0]]}
+check_doclist fts3d-2.2.4 0 0 was {[2 0[1]]}
+
+#*************************************************************************
+# Test results when everything is optimized via optimize().
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts3(c);
+  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
+  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
+  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
+  DELETE FROM t1 WHERE docid IN (1,3);
+  SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
+}
+
+# Should be a single optimal segment with the same logical results.
+do_test fts3d-3.segments {
+  execsql {
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {0 0}
+do_test fts3d-3.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
+  }
+} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
+
+check_terms_all fts3d-3.1 {a test that was}
+check_doclist_all fts3d-3.1.1 a {[2 0[2]]}
+check_doclist_all fts3d-3.1.2 test {[2 0[3]]}
+check_doclist_all fts3d-3.1.3 that {[2 0[0]]}
+check_doclist_all fts3d-3.1.4 was {[2 0[1]]}
+
+check_terms fts3d-3.2 0 0 {a test that was}
+check_doclist fts3d-3.2.1 0 0 a {[2 0[2]]}
+check_doclist fts3d-3.2.2 0 0 test {[2 0[3]]}
+check_doclist fts3d-3.2.3 0 0 that {[2 0[0]]}
+check_doclist fts3d-3.2.4 0 0 was {[2 0[1]]}
+
+#*************************************************************************
+# Test optimize() against a table involving segment merges.
+# NOTE(shess): Since there's no transaction, each of the INSERT/UPDATE
+# statements generates a segment.
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts3(c);
+
+  INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
+  INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
+  INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
+
+  UPDATE t1 SET c = 'This is a test one' WHERE rowid = 1;
+  UPDATE t1 SET c = 'That was a test one' WHERE rowid = 2;
+  UPDATE t1 SET c = 'This is a test one' WHERE rowid = 3;
+
+  UPDATE t1 SET c = 'This is a test two' WHERE rowid = 1;
+  UPDATE t1 SET c = 'That was a test two' WHERE rowid = 2;
+  UPDATE t1 SET c = 'This is a test two' WHERE rowid = 3;
+
+  UPDATE t1 SET c = 'This is a test three' WHERE rowid = 1;
+  UPDATE t1 SET c = 'That was a test three' WHERE rowid = 2;
+  UPDATE t1 SET c = 'This is a test three' WHERE rowid = 3;
+
+  UPDATE t1 SET c = 'This is a test four' WHERE rowid = 1;
+  UPDATE t1 SET c = 'That was a test four' WHERE rowid = 2;
+  UPDATE t1 SET c = 'This is a test four' WHERE rowid = 3;
+
+  UPDATE t1 SET c = 'This is a test' WHERE rowid = 1;
+  UPDATE t1 SET c = 'That was a test' WHERE rowid = 2;
+  UPDATE t1 SET c = 'This is a test' WHERE rowid = 3;
+}
+
+# 2 segments in level 0, 1 in level 1 (18 segments created, 16
+# merged).
+do_test fts3d-4.segments {
+  execsql {
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {0 0 0 1 1 0}
+
+do_test fts3d-4.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
+  }
+} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
+        {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
+        {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
+
+check_terms_all fts3d-4.1      {a four is one test that this three two was}
+check_doclist_all fts3d-4.1.1  a {[1 0[2]] [2 0[2]] [3 0[2]]}
+check_doclist_all fts3d-4.1.2  four {}
+check_doclist_all fts3d-4.1.3  is {[1 0[1]] [3 0[1]]}
+check_doclist_all fts3d-4.1.4  one {}
+check_doclist_all fts3d-4.1.5  test {[1 0[3]] [2 0[3]] [3 0[3]]}
+check_doclist_all fts3d-4.1.6  that {[2 0[0]]}
+check_doclist_all fts3d-4.1.7  this {[1 0[0]] [3 0[0]]}
+check_doclist_all fts3d-4.1.8  three {}
+check_doclist_all fts3d-4.1.9  two {}
+check_doclist_all fts3d-4.1.10 was {[2 0[1]]}
+
+check_terms fts3d-4.2     0 0 {a four test that was}
+check_doclist fts3d-4.2.1 0 0 a {[2 0[2]]}
+check_doclist fts3d-4.2.2 0 0 four {[2]}
+check_doclist fts3d-4.2.3 0 0 test {[2 0[3]]}
+check_doclist fts3d-4.2.4 0 0 that {[2 0[0]]}
+check_doclist fts3d-4.2.5 0 0 was {[2 0[1]]}
+
+check_terms fts3d-4.3     0 1 {a four is test this}
+check_doclist fts3d-4.3.1 0 1 a {[3 0[2]]}
+check_doclist fts3d-4.3.2 0 1 four {[3]}
+check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]}
+check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]}
+check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]}
+
+check_terms fts3d-4.4      1 0 {a four is one test that this three two was}
+check_doclist fts3d-4.4.1  1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
+check_doclist fts3d-4.4.2  1 0 four {[1] [2 0[4]] [3 0[4]]}
+check_doclist fts3d-4.4.3  1 0 is {[1 0[1]] [3 0[1]]}
+check_doclist fts3d-4.4.4  1 0 one {[1] [2] [3]}
+check_doclist fts3d-4.4.5  1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
+check_doclist fts3d-4.4.6  1 0 that {[2 0[0]]}
+check_doclist fts3d-4.4.7  1 0 this {[1 0[0]] [3 0[0]]}
+check_doclist fts3d-4.4.8  1 0 three {[1] [2] [3]}
+check_doclist fts3d-4.4.9  1 0 two {[1] [2] [3]}
+check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]}
+
+# Optimize should leave the result in the level of the highest-level
+# prior segment.
+do_test fts3d-4.5 {
+  execsql {
+    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {{Index optimized} 1 0}
+
+# Identical to fts3d-4.matches.
+do_test fts3d-4.5.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
+  }
+} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
+        {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
+        {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
+
+check_terms_all fts3d-4.5.1     {a is test that this was}
+check_doclist_all fts3d-4.5.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
+check_doclist_all fts3d-4.5.1.2 is {[1 0[1]] [3 0[1]]}
+check_doclist_all fts3d-4.5.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]}
+check_doclist_all fts3d-4.5.1.4 that {[2 0[0]]}
+check_doclist_all fts3d-4.5.1.5 this {[1 0[0]] [3 0[0]]}
+check_doclist_all fts3d-4.5.1.6 was {[2 0[1]]}
+
+check_terms fts3d-4.5.2     1 0 {a is test that this was}
+check_doclist fts3d-4.5.2.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
+check_doclist fts3d-4.5.2.2 1 0 is {[1 0[1]] [3 0[1]]}
+check_doclist fts3d-4.5.2.3 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
+check_doclist fts3d-4.5.2.4 1 0 that {[2 0[0]]}
+check_doclist fts3d-4.5.2.5 1 0 this {[1 0[0]] [3 0[0]]}
+check_doclist fts3d-4.5.2.6 1 0 was {[2 0[1]]}
+
+# Re-optimizing does nothing.
+do_test fts3d-5.0 {
+  execsql {
+    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {{Index already optimal} 1 0}
+
+# Even if we move things around, still does nothing.
+do_test fts3d-5.1 {
+  execsql {
+    UPDATE t1_segdir SET level = 2 WHERE level = 1 AND idx = 0;
+    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {{Index already optimal} 2 0}
  
  finish_test
author	shess <shess@noemail.net>
	Tue, 15 Jul 2008 21:32:07 +0000 (21:32 +0000)
committer	shess <shess@noemail.net>
	Tue, 15 Jul 2008 21:32:07 +0000 (21:32 +0000)
ext/fts3/fts3.c		patch \| blob \| blame \| history
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
test/fts3d.test		patch \| blob \| blame \| history