fts2 functions for testing scripts.

author shess <shess@noemail.net>

Tue, 22 Jul 2008 23:32:27 +0000 (23:32 +0000)

committer shess <shess@noemail.net>

Tue, 22 Jul 2008 23:32:27 +0000 (23:32 +0000)
author shess <shess@noemail.net>
Tue, 22 Jul 2008 23:32:27 +0000 (23:32 +0000)
committer shess <shess@noemail.net>
Tue, 22 Jul 2008 23:32:27 +0000 (23:32 +0000)
diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c

index 3279f655d86cc22cb04d194de9d3a9cdc8a02260..a7a6fc86b0556aaed8e05f0d8c276fdd0ea36a83 100644 (file)
--- a/ext/fts2/fts2.c
+++ b/ext/fts2/fts2.c
@@ -1781,9 +1781,10 @@ typedef enum fulltext_statement {
  
    SEGDIR_MAX_INDEX_STMT,
    SEGDIR_SET_STMT,
-  SEGDIR_SELECT_STMT,
+  SEGDIR_SELECT_LEVEL_STMT,
    SEGDIR_SPAN_STMT,
    SEGDIR_DELETE_STMT,
+  SEGDIR_SELECT_SEGMENT_STMT,
    SEGDIR_SELECT_ALL_STMT,
  
    MAX_STMT                     /* Always at end! */
@@ -1806,15 +1807,23 @@ static const char *const fulltext_zStatement[MAX_STMT] = {
  
    /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
    /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
-  /* SEGDIR_SELECT */
+  /* SEGDIR_SELECT_LEVEL */
    "select start_block, leaves_end_block, root from %_segdir "
    " where level = ? order by idx",
    /* SEGDIR_SPAN */
    "select min(start_block), max(end_block) from %_segdir "
    " where level = ? and start_block <> 0",
    /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",
+
+  /* NOTE(shess): The first three results of the following two
+  ** statements must match.
+  */
+  /* SEGDIR_SELECT_SEGMENT */
+  "select start_block, leaves_end_block, root from %_segdir "
+  " where level = ? and idx = ?",
    /* SEGDIR_SELECT_ALL */
-  "select root, leaves_end_block from %_segdir order by level desc, idx",
+  "select start_block, leaves_end_block, root from %_segdir "
+  " order by level desc, idx asc",
  };
  
  /*
@@ -5073,7 +5082,7 @@ static void leavesReaderReorder(LeavesReader *pLr, int nLr){
  static int leavesReadersInit(fulltext_vtab *v, int iLevel,
                               LeavesReader *pReaders, int *piReaders){
    sqlite3_stmt *s;
-  int i, rc = sql_get_statement(v, SEGDIR_SELECT_STMT, &s);
+  int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s);
    if( rc!=SQLITE_OK ) return rc;
  
    rc = sqlite3_bind_int(s, 1, iLevel);
@@ -5611,8 +5620,8 @@ static int termSelect(fulltext_vtab *v, int iColumn,
    ** elements for given docids overwrite older elements.
    */
    while( (rc = sqlite3_step(s))==SQLITE_ROW ){
-    const char *pData = sqlite3_column_blob(s, 0);
-    const int nData = sqlite3_column_bytes(s, 0);
+    const char *pData = sqlite3_column_blob(s, 2);
+    const int nData = sqlite3_column_bytes(s, 2);
      const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
      rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix,
                       &doclist);
@@ -5881,6 +5890,386 @@ static void snippetOffsetsFunc(
    }
  }
  
+#ifdef SQLITE_TEST
+/* Generate an error of the form "<prefix>: <msg>".  If msg is NULL,
+** pull the error from the context's db handle.
+*/
+static void generateError(sqlite3_context *pContext,
+                          const char *prefix, const char *msg){
+  char buf[512];
+  if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext));
+  sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg);
+  sqlite3_result_error(pContext, buf, -1);
+}
+
+/* Helper function to collect the set of terms in the segment into
+** pTerms.  The segment is defined by the leaf nodes between
+** iStartBlockid and iEndBlockid, inclusive, or by the contents of
+** pRootData if iStartBlockid is 0 (in which case the entire segment
+** fit in a leaf).
+*/
+static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s,
+                               fts2Hash *pTerms){
+  const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0);
+  const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1);
+  const char *pRootData = sqlite3_column_blob(s, 2);
+  const int nRootData = sqlite3_column_bytes(s, 2);
+  LeavesReader reader;
+  int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid,
+                            pRootData, nRootData, &reader);
+  if( rc!=SQLITE_OK ) return rc;
+
+  while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){
+    const char *pTerm = leavesReaderTerm(&reader);
+    const int nTerm = leavesReaderTermBytes(&reader);
+    void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm);
+    void *newValue = (void *)((char *)oldValue+1);
+
+    /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c,
+    ** the data value passed is returned in case of malloc failure.
+    */
+    if( newValue==sqlite3Fts2HashInsert(pTerms, pTerm, nTerm, newValue) ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = leavesReaderStep(v, &reader);
+    }
+  }
+
+  leavesReaderDestroy(&reader);
+  return rc;
+}
+
+/* Helper function to build the result string for dump_terms(). */
+static int generateTermsResult(sqlite3_context *pContext, fts2Hash *pTerms){
+  int iTerm, nTerms, nResultBytes, iByte;
+  char *result;
+  TermData *pData;
+  fts2HashElem *e;
+
+  /* Iterate pTerms to generate an array of terms in pData for
+  ** sorting.
+  */
+  nTerms = fts2HashCount(pTerms);
+  assert( nTerms>0 );
+  pData = sqlite3_malloc(nTerms*sizeof(TermData));
+  if( pData==NULL ) return SQLITE_NOMEM;
+
+  nResultBytes = 0;
+  for(iTerm = 0, e = fts2HashFirst(pTerms); e; iTerm++, e = fts2HashNext(e)){
+    nResultBytes += fts2HashKeysize(e)+1;   /* Term plus trailing space */
+    assert( iTerm<nTerms );
+    pData[iTerm].pTerm = fts2HashKey(e);
+    pData[iTerm].nTerm = fts2HashKeysize(e);
+    pData[iTerm].pCollector = fts2HashData(e);  /* unused */
+  }
+  assert( iTerm==nTerms );
+
+  assert( nResultBytes>0 );   /* nTerms>0, nResultsBytes must be, too. */
+  result = sqlite3_malloc(nResultBytes);
+  if( result==NULL ){
+    sqlite3_free(pData);
+    return SQLITE_NOMEM;
+  }
+
+  if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp);
+
+  /* Read the terms in order to build the result. */
+  iByte = 0;
+  for(iTerm=0; iTerm<nTerms; ++iTerm){
+    memcpy(result+iByte, pData[iTerm].pTerm, pData[iTerm].nTerm);
+    iByte += pData[iTerm].nTerm;
+    result[iByte++] = ' ';
+  }
+  assert( iByte==nResultBytes );
+  assert( result[nResultBytes-1]==' ' );
+  result[nResultBytes-1] = '\0';
+
+  /* Passes away ownership of result. */
+  sqlite3_result_text(pContext, result, nResultBytes-1, sqlite3_free);
+  sqlite3_free(pData);
+  return SQLITE_OK;
+}
+
+/* Implements dump_terms() for use in inspecting the fts2 index from
+** tests.  TEXT result containing the ordered list of terms joined by
+** spaces.  dump_terms(t, level, idx) dumps the terms for the segment
+** specified by level, idx (in %_segdir), while dump_terms(t) dumps
+** all terms in the index.  In both cases t is the fts table's magic
+** table-named column.
+*/
+static void dumpTermsFunc(
+  sqlite3_context *pContext,
+  int argc, sqlite3_value **argv
+){
+  fulltext_cursor *pCursor;
+  if( argc!=3 && argc!=1 ){
+    generateError(pContext, "dump_terms", "incorrect arguments");
+  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
+            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
+    generateError(pContext, "dump_terms", "illegal first argument");
+  }else{
+    fulltext_vtab *v;
+    fts2Hash terms;
+    sqlite3_stmt *s = NULL;
+    int rc;
+
+    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
+    v = cursor_vtab(pCursor);
+
+    /* If passed only the cursor column, get all segments.  Otherwise
+    ** get the segment described by the following two arguments.
+    */
+    if( argc==1 ){
+      rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
+    }else{
+      rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s);
+      if( rc==SQLITE_OK ){
+        rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[1]));
+        if( rc==SQLITE_OK ){
+          rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[2]));
+        }
+      }
+    }
+
+    if( rc!=SQLITE_OK ){
+      generateError(pContext, "dump_terms", NULL);
+      return;
+    }
+
+    /* Collect the terms for each segment. */
+    sqlite3Fts2HashInit(&terms, FTS2_HASH_STRING, 1);
+    while( (rc = sqlite3_step(s))==SQLITE_ROW ){
+      rc = collectSegmentTerms(v, s, &terms);
+      if( rc!=SQLITE_OK ) break;
+    }
+
+    if( rc!=SQLITE_DONE ){
+      sqlite3_reset(s);
+      generateError(pContext, "dump_terms", NULL);
+    }else{
+      const int nTerms = fts2HashCount(&terms);
+      if( nTerms>0 ){
+        rc = generateTermsResult(pContext, &terms);
+        if( rc==SQLITE_NOMEM ){
+          generateError(pContext, "dump_terms", "out of memory");
+        }else{
+          assert( rc==SQLITE_OK );
+        }
+      }else if( argc==3 ){
+        /* The specific segment asked for could not be found. */
+        generateError(pContext, "dump_terms", "segment not found");
+      }else{
+        /* No segments found. */
+        /* TODO(shess): It should be impossible to reach this.  This
+        ** case can only happen for an empty table, in which case
+        ** SQLite has no rows to call this function on.
+        */
+        sqlite3_result_null(pContext);
+      }
+    }
+    sqlite3Fts2HashClear(&terms);
+  }
+}
+
+/* Expand the DL_DEFAULT doclist in pData into a text result in
+** pContext.
+*/
+static void createDoclistResult(sqlite3_context *pContext,
+                                const char *pData, int nData){
+  DataBuffer dump;
+  DLReader dlReader;
+
+  assert( pData!=NULL && nData>0 );
+
+  dataBufferInit(&dump, 0);
+  dlrInit(&dlReader, DL_DEFAULT, pData, nData);
+  for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){
+    char buf[256];
+    PLReader plReader;
+
+    plrInit(&plReader, &dlReader);
+    if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){
+      sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader));
+      dataBufferAppend(&dump, buf, strlen(buf));
+    }else{
+      int iColumn = plrColumn(&plReader);
+
+      sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[",
+                       dlrDocid(&dlReader), iColumn);
+      dataBufferAppend(&dump, buf, strlen(buf));
+
+      for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){
+        if( plrColumn(&plReader)!=iColumn ){
+          iColumn = plrColumn(&plReader);
+          sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn);
+          assert( dump.nData>0 );
+          dump.nData--;                     /* Overwrite trailing space. */
+          assert( dump.pData[dump.nData]==' ');
+          dataBufferAppend(&dump, buf, strlen(buf));
+        }
+        if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){
+          sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ",
+                           plrPosition(&plReader),
+                           plrStartOffset(&plReader), plrEndOffset(&plReader));
+        }else if( DL_DEFAULT==DL_POSITIONS ){
+          sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader));
+        }else{
+          assert( NULL=="Unhandled DL_DEFAULT value");
+        }
+        dataBufferAppend(&dump, buf, strlen(buf));
+      }
+      plrDestroy(&plReader);
+
+      assert( dump.nData>0 );
+      dump.nData--;                     /* Overwrite trailing space. */
+      assert( dump.pData[dump.nData]==' ');
+      dataBufferAppend(&dump, "]] ", 3);
+    }
+  }
+  dlrDestroy(&dlReader);
+
+  assert( dump.nData>0 );
+  dump.nData--;                     /* Overwrite trailing space. */
+  assert( dump.pData[dump.nData]==' ');
+  dump.pData[dump.nData] = '\0';
+  assert( dump.nData>0 );
+
+  /* Passes ownership of dump's buffer to pContext. */
+  sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free);
+  dump.pData = NULL;
+  dump.nData = dump.nCapacity = 0;
+}
+
+/* Implements dump_doclist() for use in inspecting the fts2 index from
+** tests.  TEXT result containing a string representation of the
+** doclist for the indicated term.  dump_doclist(t, term, level, idx)
+** dumps the doclist for term from the segment specified by level, idx
+** (in %_segdir), while dump_doclist(t, term) dumps the logical
+** doclist for the term across all segments.  The per-segment doclist
+** can contain deletions, while the full-index doclist will not
+** (deletions are omitted).
+**
+** Result formats differ with the setting of DL_DEFAULTS.  Examples:
+**
+** DL_DOCIDS: [1] [3] [7]
+** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]]
+** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]]
+**
+** In each case the number after the outer '[' is the docid.  In the
+** latter two cases, the number before the inner '[' is the column
+** associated with the values within.  For DL_POSITIONS the numbers
+** within are the positions, for DL_POSITIONS_OFFSETS they are the
+** position, the start offset, and the end offset.
+*/
+static void dumpDoclistFunc(
+  sqlite3_context *pContext,
+  int argc, sqlite3_value **argv
+){
+  fulltext_cursor *pCursor;
+  if( argc!=2 && argc!=4 ){
+    generateError(pContext, "dump_doclist", "incorrect arguments");
+  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
+            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
+    generateError(pContext, "dump_doclist", "illegal first argument");
+  }else if( sqlite3_value_text(argv[1])==NULL ||
+            sqlite3_value_text(argv[1])[0]=='\0' ){
+    generateError(pContext, "dump_doclist", "empty second argument");
+  }else{
+    const char *pTerm = (const char *)sqlite3_value_text(argv[1]);
+    const int nTerm = strlen(pTerm);
+    fulltext_vtab *v;
+    int rc;
+    DataBuffer doclist;
+
+    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
+    v = cursor_vtab(pCursor);
+
+    dataBufferInit(&doclist, 0);
+
+    /* termSelect() yields the same logical doclist that queries are
+    ** run against.
+    */
+    if( argc==2 ){
+      rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist);
+    }else{
+      sqlite3_stmt *s = NULL;
+
+      /* Get our specific segment's information. */
+      rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s);
+      if( rc==SQLITE_OK ){
+        rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2]));
+        if( rc==SQLITE_OK ){
+          rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3]));
+        }
+      }
+
+      if( rc==SQLITE_OK ){
+        rc = sqlite3_step(s);
+
+        if( rc==SQLITE_DONE ){
+          dataBufferDestroy(&doclist);
+          generateError(pContext, "dump_doclist", "segment not found");
+          return;
+        }
+
+        /* Found a segment, load it into doclist. */
+        if( rc==SQLITE_ROW ){
+          const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
+          const char *pData = sqlite3_column_blob(s, 2);
+          const int nData = sqlite3_column_bytes(s, 2);
+
+          /* loadSegment() is used by termSelect() to load each
+          ** segment's data.
+          */
+          rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0,
+                           &doclist);
+          if( rc==SQLITE_OK ){
+            rc = sqlite3_step(s);
+
+            /* Should not have more than one matching segment. */
+            if( rc!=SQLITE_DONE ){
+              sqlite3_reset(s);
+              dataBufferDestroy(&doclist);
+              generateError(pContext, "dump_doclist", "invalid segdir");
+              return;
+            }
+            rc = SQLITE_OK;
+          }
+        }
+      }
+
+      sqlite3_reset(s);
+    }
+
+    if( rc==SQLITE_OK ){
+      if( doclist.nData>0 ){
+        createDoclistResult(pContext, doclist.pData, doclist.nData);
+      }else{
+        /* TODO(shess): This can happen if the term is not present, or
+        ** if all instances of the term have been deleted and this is
+        ** an all-index dump.  It may be interesting to distinguish
+        ** these cases.
+        */
+        sqlite3_result_text(pContext, "", 0, SQLITE_STATIC);
+      }
+    }else if( rc==SQLITE_NOMEM ){
+      /* Handle out-of-memory cases specially because if they are
+      ** generated in fts2 code they may not be reflected in the db
+      ** handle.
+      */
+      /* TODO(shess): Handle this more comprehensively.
+      ** sqlite3ErrStr() has what I need, but is internal.
+      */
+      generateError(pContext, "dump_doclist", "out of memory");
+    }else{
+      generateError(pContext, "dump_doclist", NULL);
+    }
+
+    dataBufferDestroy(&doclist);
+  }
+}
+#endif
+
  /*
  ** This routine implements the xFindFunction method for the FTS2
  ** virtual table.
@@ -5898,6 +6287,20 @@ static int fulltextFindFunction(
    }else if( strcmp(zName,"offsets")==0 ){
      *pxFunc = snippetOffsetsFunc;
      return 1;
+#ifdef SQLITE_TEST
+    /* NOTE(shess): These functions are present only for testing
+    ** purposes.  No particular effort is made to optimize their
+    ** execution or how they build their results.
+    */
+  }else if( strcmp(zName,"dump_terms")==0 ){
+    /* fprintf(stderr, "Found dump_terms\n"); */
+    *pxFunc = dumpTermsFunc;
+    return 1;
+  }else if( strcmp(zName,"dump_doclist")==0 ){
+    /* fprintf(stderr, "Found dump_doclist\n"); */
+    *pxFunc = dumpDoclistFunc;
+    return 1;
+#endif
    }
    return 0;
  }
@@ -6017,6 +6420,10 @@ int sqlite3Fts2Init(sqlite3 *db){
     && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
     && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
     && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
+#ifdef SQLITE_TEST
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
+#endif
    ){
      return sqlite3_create_module_v2(
          db, "fts2", &fts2Module, (void *)pHash, hashDestroy
diff --git a/manifest b/manifest

index 20e990b6fc0f63104f37f4912e04a7defe2fea51..7ea48d4c7e19e5e16cab65d8190bf4938cf34ed1 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Change\sprefix\ssearch\sfrom\sO(N*M)\sto\sO(NlogM).\r\nBackports\s(4599)\sfrom\sfts3.\s(CVS\s5455)
-D 2008-07-22T23:08:40
+C fts2\sfunctions\sfor\stesting\sscripts.\r\nBackports\s(5340)\sfrom\sfts3.\s(CVS\s5456)
+D 2008-07-22T23:32:28
  F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
  F Makefile.in 77ff156061bb870aa0a8b3d545c670d08070f7e6
  F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -39,7 +39,7 @@ F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
  F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
  F ext/fts2/README.tokenizers 21e3684ea5a095b55d70f6878b4ce6af5932dfb7
  F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts2/fts2.c 7a2e88d110d059c986234c3d7734133d59a709ef
+F ext/fts2/fts2.c af6d11365c4ae66be9779dde0948887e92d8b867
  F ext/fts2/fts2.h da5f76c65163301d1068a971fd32f4119e3c95fa
  F ext/fts2/fts2_hash.c 2689e42e1107ea67207f725cf69cf8972d00cf93
  F ext/fts2/fts2_hash.h 9a5b1be94664139f93217a0770d7144425cffb3a
@@ -315,6 +315,7 @@ F test/fts2l.test 3333336621524cf7d60bb62d6ef6ab69647866ed
  F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
  F test/fts2n.test 12b9c5352128cebd1c6b8395e43788d4b09087c2
  F test/fts2o.test c6a79567d85403dc4d15b89f3f9799a0a0aef065
+F test/fts2p.test 4b48c35c91e6a7dbf5ac8d1e5691823cc999aafb
  F test/fts2token.test d8070b241a15ff13592a9ae4a8b7c171af6f445a
  F test/fts3.test 6ee4c38b0864583c80e82a2d4372f63aae8b10c7
  F test/fts3aa.test 432d1d5c41939bb5405d4d6c80a9ec759b363393
@@ -609,7 +610,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
  F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
  F tool/speedtest8.c 1dbced29de5f59ba2ebf877edcadf171540374d1
  F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
-P ecf2dec66cb979cb7d8db3b7ce5c64cab57fe2bb
-R 89694177ea5ff7c878e6daa15283840d
+P 3f614453d2d7c753a5963b027fe8618b50b4f6b9
+R 76720fcdb90c8738e414944367c3b0e7
  U shess
-Z 48033bdd7abd5465e3195f281a547d67
+Z 807f812f3e691d0e714ba8b89e45124c
diff --git a/manifest.uuid b/manifest.uuid

index 8e4895f1e430e5c5524171938618517dc295a718..c80705daff9bd6ea7bfd0cd358c669f77fbf5df5 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-3f614453d2d7c753a5963b027fe8618b50b4f6b9
-\ No newline at end of file
+4e47394be9dfbf0f9309e55eb6c6a3a517ea2006
+\ No newline at end of file
diff --git a/test/fts2p.test b/test/fts2p.test

new file mode 100644 (file)

index 0000000..38a8079
--- /dev/null
+++ b/test/fts2p.test
@@ -0,0 +1,357 @@
+# 2008 June 26
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file exercises some new testing functions in the FTS2 module,
+# and then uses them to do some basic tests that FTS2 is internally
+# working as expected.
+#
+# $Id: fts2p.test,v 1.1 2008/07/22 23:32:28 shess Exp $
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# If SQLITE_ENABLE_FTS2 is not defined, omit this file.
+ifcapable !fts2 {
+  finish_test
+  return
+}
+
+#*************************************************************************
+# Probe to see if support for these functions is compiled in.
+# TODO(shess): Change main.mk to do the right thing and remove this test.
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts2(c);
+  INSERT INTO t1 (rowid, c) VALUES (1, 'x');
+}
+
+set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1}
+set r {1 {unable to use function dump_terms in the requested context}}
+if {[catchsql $s]==$r} {
+  finish_test
+  return
+}
+
+#*************************************************************************
+# Test that the new functions give appropriate errors.
+do_test fts2p-0.0 {
+  catchsql {
+    SELECT dump_terms(t1, 1) FROM t1 LIMIT 1;
+  }
+} {1 {dump_terms: incorrect arguments}}
+
+do_test fts2p-0.1 {
+  catchsql {
+    SELECT dump_terms(t1, 0, 0, 0) FROM t1 LIMIT 1;
+  }
+} {1 {dump_terms: incorrect arguments}}
+
+do_test fts2p-0.2 {
+  catchsql {
+    SELECT dump_terms(1, t1) FROM t1 LIMIT 1;
+  }
+} {1 {unable to use function dump_terms in the requested context}}
+
+do_test fts2p-0.3 {
+  catchsql {
+    SELECT dump_terms(t1, 16, 16) FROM t1 LIMIT 1;
+  }
+} {1 {dump_terms: segment not found}}
+
+do_test fts2p-0.4 {
+  catchsql {
+    SELECT dump_doclist(t1) FROM t1 LIMIT 1;
+  }
+} {1 {dump_doclist: incorrect arguments}}
+
+do_test fts2p-0.5 {
+  catchsql {
+    SELECT dump_doclist(t1, NULL) FROM t1 LIMIT 1;
+  }
+} {1 {dump_doclist: empty second argument}}
+
+do_test fts2p-0.6 {
+  catchsql {
+    SELECT dump_doclist(t1, '') FROM t1 LIMIT 1;
+  }
+} {1 {dump_doclist: empty second argument}}
+
+do_test fts2p-0.7 {
+  catchsql {
+    SELECT dump_doclist(t1, 'a', 0) FROM t1 LIMIT 1;
+  }
+} {1 {dump_doclist: incorrect arguments}}
+
+do_test fts2p-0.8 {
+  catchsql {
+    SELECT dump_doclist(t1, 'a', 0, 0, 0) FROM t1 LIMIT 1;
+  }
+} {1 {dump_doclist: incorrect arguments}}
+
+do_test fts2p-0.9 {
+  catchsql {
+    SELECT dump_doclist(t1, 'a', 16, 16) FROM t1 LIMIT 1;
+  }
+} {1 {dump_doclist: segment not found}}
+
+#*************************************************************************
+# Utility function to check for the expected terms in the segment
+# level/index.  _all version does same but for entire index.
+proc check_terms {test level index terms} {
+  # TODO(shess): Figure out why uplevel in do_test can't catch
+  # $level and $index directly.
+  set ::level $level
+  set ::index $index
+  do_test $test.terms {
+    execsql {
+      SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1;
+    }
+  } [list $terms]
+}
+proc check_terms_all {test terms} {
+  do_test $test.terms {
+    execsql {
+      SELECT dump_terms(t1) FROM t1 LIMIT 1;
+    }
+  } [list $terms]
+}
+
+# Utility function to check for the expected doclist for the term in
+# segment level/index.  _all version does same for entire index.
+proc check_doclist {test level index term doclist} {
+  # TODO(shess): Again, why can't the non-:: versions work?
+  set ::term $term
+  set ::level $level
+  set ::index $index
+  do_test $test {
+    execsql {
+      SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1;
+    }
+  } [list $doclist]
+}
+proc check_doclist_all {test term doclist} {
+  set ::term $term
+  do_test $test {
+    execsql {
+      SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1;
+    }
+  } [list $doclist]
+}
+
+#*************************************************************************
+# Test the segments resulting from straight-forward inserts.
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts2(c);
+  INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
+  INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
+  INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
+}
+
+# Check for expected segments and expected matches.
+do_test fts2p-1.0.segments {
+  execsql {
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {0 0 0 1 0 2}
+do_test fts2p-1.0.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
+  }
+} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
+        {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
+        {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
+
+# Check the specifics of the segments constructed.
+# Logical view of entire index.
+check_terms_all   fts2p-1.0.1   {a is test that this was}
+check_doclist_all fts2p-1.0.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
+check_doclist_all fts2p-1.0.1.2 is {[1 0[1]] [3 0[1]]}
+check_doclist_all fts2p-1.0.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]}
+check_doclist_all fts2p-1.0.1.4 that {[2 0[0]]}
+check_doclist_all fts2p-1.0.1.5 this {[1 0[0]] [3 0[0]]}
+check_doclist_all fts2p-1.0.1.6 was {[2 0[1]]}
+
+# Segment 0,0
+check_terms   fts2p-1.0.2   0 0 {a is test this}
+check_doclist fts2p-1.0.2.1 0 0 a {[1 0[2]]}
+check_doclist fts2p-1.0.2.2 0 0 is {[1 0[1]]}
+check_doclist fts2p-1.0.2.3 0 0 test {[1 0[3]]}
+check_doclist fts2p-1.0.2.4 0 0 this {[1 0[0]]}
+
+# Segment 0,1
+check_terms   fts2p-1.0.3   0 1 {a test that was}
+check_doclist fts2p-1.0.3.1 0 1 a {[2 0[2]]}
+check_doclist fts2p-1.0.3.2 0 1 test {[2 0[3]]}
+check_doclist fts2p-1.0.3.3 0 1 that {[2 0[0]]}
+check_doclist fts2p-1.0.3.4 0 1 was {[2 0[1]]}
+
+# Segment 0,2
+check_terms   fts2p-1.0.4   0 2 {a is test this}
+check_doclist fts2p-1.0.4.1 0 2 a {[3 0[2]]}
+check_doclist fts2p-1.0.4.2 0 2 is {[3 0[1]]}
+check_doclist fts2p-1.0.4.3 0 2 test {[3 0[3]]}
+check_doclist fts2p-1.0.4.4 0 2 this {[3 0[0]]}
+
+#*************************************************************************
+# Test the segments resulting from inserts followed by a delete.
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts2(c);
+  INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
+  INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
+  INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
+  DELETE FROM t1 WHERE rowid = 1;
+}
+
+do_test fts2p-1.1.segments {
+  execsql {
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {0 0 0 1 0 2 0 3}
+do_test fts2p-1.1.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
+  }
+} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}}
+
+check_terms_all fts2p-1.1.1 {a is test that this was}
+check_doclist_all fts2p-1.1.1.1 a {[2 0[2]] [3 0[2]]}
+check_doclist_all fts2p-1.1.1.2 is {[3 0[1]]}
+check_doclist_all fts2p-1.1.1.3 test {[2 0[3]] [3 0[3]]}
+check_doclist_all fts2p-1.1.1.4 that {[2 0[0]]}
+check_doclist_all fts2p-1.1.1.5 this {[3 0[0]]}
+check_doclist_all fts2p-1.1.1.6 was {[2 0[1]]}
+
+check_terms fts2p-1.1.2 0 0 {a is test this}
+check_doclist fts2p-1.1.2.1 0 0 a {[1 0[2]]}
+check_doclist fts2p-1.1.2.2 0 0 is {[1 0[1]]}
+check_doclist fts2p-1.1.2.3 0 0 test {[1 0[3]]}
+check_doclist fts2p-1.1.2.4 0 0 this {[1 0[0]]}
+
+check_terms fts2p-1.1.3 0 1 {a test that was}
+check_doclist fts2p-1.1.3.1 0 1 a {[2 0[2]]}
+check_doclist fts2p-1.1.3.2 0 1 test {[2 0[3]]}
+check_doclist fts2p-1.1.3.3 0 1 that {[2 0[0]]}
+check_doclist fts2p-1.1.3.4 0 1 was {[2 0[1]]}
+
+check_terms fts2p-1.1.4 0 2 {a is test this}
+check_doclist fts2p-1.1.4.1 0 2 a {[3 0[2]]}
+check_doclist fts2p-1.1.4.2 0 2 is {[3 0[1]]}
+check_doclist fts2p-1.1.4.3 0 2 test {[3 0[3]]}
+check_doclist fts2p-1.1.4.4 0 2 this {[3 0[0]]}
+
+check_terms fts2p-1.1.5 0 3 {a is test this}
+check_doclist fts2p-1.1.5.1 0 3 a {[1]}
+check_doclist fts2p-1.1.5.2 0 3 is {[1]}
+check_doclist fts2p-1.1.5.3 0 3 test {[1]}
+check_doclist fts2p-1.1.5.4 0 3 this {[1]}
+
+#*************************************************************************
+# Test results when all references to certain tokens are deleted.
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts2(c);
+  INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
+  INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
+  INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
+  DELETE FROM t1 WHERE rowid IN (1,3);
+}
+
+# Still 4 segments because 0,3 will contain deletes for rowid 1 and 3.
+do_test fts2p-1.2.segments {
+  execsql {
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {0 0 0 1 0 2 0 3}
+do_test fts2p-1.2.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
+  }
+} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
+
+check_terms_all fts2p-1.2.1 {a is test that this was}
+check_doclist_all fts2p-1.2.1.1 a {[2 0[2]]}
+check_doclist_all fts2p-1.2.1.2 is {}
+check_doclist_all fts2p-1.2.1.3 test {[2 0[3]]}
+check_doclist_all fts2p-1.2.1.4 that {[2 0[0]]}
+check_doclist_all fts2p-1.2.1.5 this {}
+check_doclist_all fts2p-1.2.1.6 was {[2 0[1]]}
+
+check_terms fts2p-1.2.2 0 0 {a is test this}
+check_doclist fts2p-1.2.2.1 0 0 a {[1 0[2]]}
+check_doclist fts2p-1.2.2.2 0 0 is {[1 0[1]]}
+check_doclist fts2p-1.2.2.3 0 0 test {[1 0[3]]}
+check_doclist fts2p-1.2.2.4 0 0 this {[1 0[0]]}
+
+check_terms fts2p-1.2.3 0 1 {a test that was}
+check_doclist fts2p-1.2.3.1 0 1 a {[2 0[2]]}
+check_doclist fts2p-1.2.3.2 0 1 test {[2 0[3]]}
+check_doclist fts2p-1.2.3.3 0 1 that {[2 0[0]]}
+check_doclist fts2p-1.2.3.4 0 1 was {[2 0[1]]}
+
+check_terms fts2p-1.2.4 0 2 {a is test this}
+check_doclist fts2p-1.2.4.1 0 2 a {[3 0[2]]}
+check_doclist fts2p-1.2.4.2 0 2 is {[3 0[1]]}
+check_doclist fts2p-1.2.4.3 0 2 test {[3 0[3]]}
+check_doclist fts2p-1.2.4.4 0 2 this {[3 0[0]]}
+
+check_terms fts2p-1.2.5 0 3 {a is test this}
+check_doclist fts2p-1.2.5.1 0 3 a {[1] [3]}
+check_doclist fts2p-1.2.5.2 0 3 is {[1] [3]}
+check_doclist fts2p-1.2.5.3 0 3 test {[1] [3]}
+check_doclist fts2p-1.2.5.4 0 3 this {[1] [3]}
+
+#*************************************************************************
+# Test results when everything is optimized manually.
+db eval {
+  DROP TABLE IF EXISTS t1;
+  CREATE VIRTUAL TABLE t1 USING fts2(c);
+  INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
+  INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
+  INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
+  DELETE FROM t1 WHERE rowid IN (1,3);
+  DROP TABLE IF EXISTS t1old;
+  ALTER TABLE t1 RENAME TO t1old;
+  CREATE VIRTUAL TABLE t1 USING fts2(c);
+  INSERT INTO t1 (rowid, c) SELECT rowid, c FROM t1old;
+  DROP TABLE t1old;
+}
+
+# Should be a single optimal segment with the same logical results.
+do_test fts2p-1.3.segments {
+  execsql {
+    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
+  }
+} {0 0}
+do_test fts2p-1.3.matches {
+  execsql {
+    SELECT OFFSETS(t1) FROM t1
+     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
+  }
+} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
+
+check_terms_all fts2p-1.3.1 {a test that was}
+check_doclist_all fts2p-1.3.1.1 a {[2 0[2]]}
+check_doclist_all fts2p-1.3.1.2 test {[2 0[3]]}
+check_doclist_all fts2p-1.3.1.3 that {[2 0[0]]}
+check_doclist_all fts2p-1.3.1.4 was {[2 0[1]]}
+
+check_terms fts2p-1.3.2 0 0 {a test that was}
+check_doclist fts2p-1.3.2.1 0 0 a {[2 0[2]]}
+check_doclist fts2p-1.3.2.2 0 0 test {[2 0[3]]}
+check_doclist fts2p-1.3.2.3 0 0 that {[2 0[0]]}
+check_doclist fts2p-1.3.2.4 0 0 was {[2 0[1]]}
+
+finish_test
author	shess <shess@noemail.net>
	Tue, 22 Jul 2008 23:32:27 +0000 (23:32 +0000)
committer	shess <shess@noemail.net>
	Tue, 22 Jul 2008 23:32:27 +0000 (23:32 +0000)
ext/fts2/fts2.c		patch \| blob \| blame \| history
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
test/fts2p.test	[new file with mode: 0644]	patch \| blob