]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Add the "instance" type to the fts5vocab virtual table module. For direct
authordan <dan@noemail.net>
Thu, 10 Aug 2017 20:36:56 +0000 (20:36 +0000)
committerdan <dan@noemail.net>
Thu, 10 Aug 2017 20:36:56 +0000 (20:36 +0000)
access to the contents of the fts5 term index.

FossilOrigin-Name: 34a7bd7121a478e14982d59be95ad891fab8050ad5e006638f826c57c392b93e

ext/fts5/fts5_vocab.c
manifest
manifest.uuid

index 82c7dc90560b150049badb21948d6b346c8ca7fe..e48a54c9ab81b1dd644d8254708b28abd3740355 100644 (file)
 **   the number of fts5 rows that contain at least one instance of term
 **   $term. Field $cnt is set to the total number of instances of term 
 **   $term in the database.
+**
+** instance:
+**     CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>));
+**
+**   One row for each term instance in the database. 
 */
 
 
@@ -44,7 +49,7 @@ struct Fts5VocabTable {
   char *zFts5Db;                  /* Db containing fts5 table */
   sqlite3 *db;                    /* Database handle */
   Fts5Global *pGlobal;            /* FTS5 global object for this database */
-  int eType;                      /* FTS5_VOCAB_COL or ROW */
+  int eType;                      /* FTS5_VOCAB_COL, ROW or INSTANCE */
 };
 
 struct Fts5VocabCursor {
@@ -64,16 +69,22 @@ struct Fts5VocabCursor {
   i64 *aCnt;
   i64 *aDoc;
 
-  /* Output values used by 'row' and 'col' tables */
+  /* Output values used by all tables. */
   i64 rowid;                      /* This table's current rowid value */
   Fts5Buffer term;                /* Current value of 'term' column */
+
+  /* Output values Used by 'instance' tables only */
+  i64 iInstPos;
+  int iInstOff;
 };
 
-#define FTS5_VOCAB_COL    0
-#define FTS5_VOCAB_ROW    1
+#define FTS5_VOCAB_COL      0
+#define FTS5_VOCAB_ROW      1
+#define FTS5_VOCAB_INSTANCE 2
 
 #define FTS5_VOCAB_COL_SCHEMA  "term, col, doc, cnt"
 #define FTS5_VOCAB_ROW_SCHEMA  "term, doc, cnt"
+#define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset"
 
 /*
 ** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
@@ -101,6 +112,9 @@ static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
     if( sqlite3_stricmp(zCopy, "row")==0 ){
       *peType = FTS5_VOCAB_ROW;
     }else
+    if( sqlite3_stricmp(zCopy, "instance")==0 ){
+      *peType = FTS5_VOCAB_INSTANCE;
+    }else
     {
       *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
       rc = SQLITE_ERROR;
@@ -161,7 +175,8 @@ static int fts5VocabInitVtab(
 ){
   const char *azSchema[] = { 
     "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA  ")", 
-    "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA  ")"
+    "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA  ")",
+    "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")"
   };
 
   Fts5VocabTable *pRet = 0;
@@ -235,6 +250,15 @@ static int fts5VocabCreateMethod(
 
 /* 
 ** Implementation of the xBestIndex method.
+**
+** Only constraints of the form:
+**
+**     term <= ?
+**     term == ?
+**     term >= ?
+**
+** are interpreted. Less-than and less-than-or-equal are treated 
+** identically, as are greater-than and greater-than-or-equal.
 */
 static int fts5VocabBestIndexMethod(
   sqlite3_vtab *pUnused,
@@ -378,6 +402,54 @@ static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
   return SQLITE_OK;
 }
 
+static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){
+  int rc = SQLITE_OK;
+  
+  if( sqlite3Fts5IterEof(pCsr->pIter) ){
+    pCsr->bEof = 1;
+  }else{
+    const char *zTerm;
+    int nTerm;
+    zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
+    if( pCsr->nLeTerm>=0 ){
+      int nCmp = MIN(nTerm, pCsr->nLeTerm);
+      int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
+      if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
+        pCsr->bEof = 1;
+      }
+    }
+
+    sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
+  }
+  return rc;
+}
+
+static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){
+  int eDetail = pCsr->pConfig->eDetail;
+  int rc = SQLITE_OK;
+  Fts5IndexIter *pIter = pCsr->pIter;
+  i64 *pp = &pCsr->iInstPos;
+  int *po = &pCsr->iInstOff;
+  
+  while( eDetail==FTS5_DETAIL_NONE
+      || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp) 
+  ){
+    pCsr->iInstPos = 0;
+    pCsr->iInstOff = 0;
+
+    rc = sqlite3Fts5IterNextScan(pCsr->pIter);
+    if( rc==SQLITE_OK ){
+      rc = fts5VocabInstanceNewTerm(pCsr);
+      if( eDetail==FTS5_DETAIL_NONE ) break;
+    }
+    if( rc ){
+      pCsr->bEof = 1;
+      break;
+    }
+  }
+
+  return rc;
+}
 
 /*
 ** Advance the cursor to the next row in the table.
@@ -390,13 +462,17 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
 
   pCsr->rowid++;
 
+  if( pTab->eType==FTS5_VOCAB_INSTANCE ){
+    return fts5VocabInstanceNext(pCsr);
+  }
+
   if( pTab->eType==FTS5_VOCAB_COL ){
     for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
       if( pCsr->aDoc[pCsr->iCol] ) break;
     }
   }
 
-  if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){
+  if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){
     if( sqlite3Fts5IterEof(pCsr->pIter) ){
       pCsr->bEof = 1;
     }else{
@@ -420,22 +496,26 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
 
       assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
       while( rc==SQLITE_OK ){
+        int eDetail = pCsr->pConfig->eDetail;
         const u8 *pPos; int nPos;   /* Position list */
         i64 iPos = 0;               /* 64-bit position read from poslist */
         int iOff = 0;               /* Current offset within position list */
 
         pPos = pCsr->pIter->pData;
         nPos = pCsr->pIter->nData;
-        switch( pCsr->pConfig->eDetail ){
-          case FTS5_DETAIL_FULL:
-            pPos = pCsr->pIter->pData;
-            nPos = pCsr->pIter->nData;
-            if( pTab->eType==FTS5_VOCAB_ROW ){
+
+        switch( pTab->eType ){
+          case FTS5_VOCAB_ROW:
+            if( eDetail==FTS5_DETAIL_FULL ){
               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
                 pCsr->aCnt[0]++;
               }
-              pCsr->aDoc[0]++;
-            }else{
+            }
+            pCsr->aDoc[0]++;
+            break;
+
+          case FTS5_VOCAB_COL:
+            if( eDetail==FTS5_DETAIL_FULL ){
               int iCol = -1;
               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
                 int ii = FTS5_POS2COLUMN(iPos);
@@ -449,13 +529,7 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
                   iCol = ii;
                 }
               }
-            }
-            break;
-
-          case FTS5_DETAIL_COLUMNS:
-            if( pTab->eType==FTS5_VOCAB_ROW ){
-              pCsr->aDoc[0]++;
-            }else{
+            }else if( eDetail==FTS5_DETAIL_COLUMNS ){
               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
                 assert_nc( iPos>=0 && iPos<nCol );
                 if( iPos>=nCol ){
@@ -464,18 +538,21 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
                 }
                 pCsr->aDoc[iPos]++;
               }
+            }else{
+              assert( eDetail==FTS5_DETAIL_NONE );
+              pCsr->aDoc[0]++;
             }
             break;
 
-          default: 
-            assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE );
-            pCsr->aDoc[0]++;
+          default:
+            assert( pTab->eType==FTS5_VOCAB_INSTANCE );
             break;
         }
 
         if( rc==SQLITE_OK ){
           rc = sqlite3Fts5IterNextScan(pCsr->pIter);
         }
+        if( pTab->eType==FTS5_VOCAB_INSTANCE ) break;
 
         if( rc==SQLITE_OK ){
           zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
@@ -505,7 +582,9 @@ static int fts5VocabFilterMethod(
   int nUnused,                    /* Number of elements in apVal */
   sqlite3_value **apVal           /* Arguments for the indexing scheme */
 ){
+  Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
+  int eType = pTab->eType;
   int rc = SQLITE_OK;
 
   int iVal = 0;
@@ -545,11 +624,16 @@ static int fts5VocabFilterMethod(
     }
   }
 
-
   if( rc==SQLITE_OK ){
     rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
   }
-  if( rc==SQLITE_OK ){
+  if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){
+    rc = fts5VocabInstanceNewTerm(pCsr);
+  }
+  if( rc==SQLITE_OK 
+   && !pCsr->bEof 
+   && (eType!=FTS5_VOCAB_INSTANCE || pCsr->pConfig->eDetail!=FTS5_DETAIL_NONE)
+  ){
     rc = fts5VocabNextMethod(pCursor);
   }
 
@@ -591,13 +675,42 @@ static int fts5VocabColumnMethod(
     }else{
       iVal = pCsr->aCnt[pCsr->iCol];
     }
-  }else{
+  }else if( eType==FTS5_VOCAB_ROW ){
     assert( iCol==1 || iCol==2 );
     if( iCol==1 ){
       iVal = pCsr->aDoc[0];
     }else{
       iVal = pCsr->aCnt[0];
     }
+  }else{
+    int eDetail = pCsr->pConfig->eDetail;
+    assert( eType==FTS5_VOCAB_INSTANCE );
+    switch( iCol ){
+      case 1:
+        sqlite3_result_int64(pCtx, pCsr->pIter->iRowid);
+        break;
+      case 2: {
+        int ii = -1;
+        if( eDetail==FTS5_DETAIL_FULL ){
+          ii = FTS5_POS2COLUMN(pCsr->iInstPos);
+        }else if( eDetail==FTS5_DETAIL_COLUMNS ){
+          ii = pCsr->iInstPos;
+        }
+        if( ii>=0 && ii<pCsr->pConfig->nCol ){
+          const char *z = pCsr->pConfig->azCol[ii];
+          sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
+        }
+        break;
+      }
+      default: {
+        assert( iCol==3 );
+        if( eDetail==FTS5_DETAIL_FULL ){
+          int ii = FTS5_POS2OFFSET(pCsr->iInstPos);
+          sqlite3_result_int(pCtx, ii);
+        }
+        break;
+      }
+    }
   }
 
   if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);
index 2717e23e3d9a6ffe7acf97c5a2f98f08d59bf64c..17ea787547b93370e961bd4c1ab98453dcd972bd 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\sa\sfirst\sdraft\sof\sthe\s"vtablog"\sextensions\sthat\simplements\sa\sgeneric\nvirtual\stable\suseful\sfor\sexperimentation\susing\sthe\scommand-line\sshell.
-D 2017-08-10T03:27:27.239
+C Add\sthe\s"instance"\stype\sto\sthe\sfts5vocab\svirtual\stable\smodule.\sFor\sdirect\naccess\sto\sthe\scontents\sof\sthe\sfts5\sterm\sindex.
+D 2017-08-10T20:36:56.419
 F Makefile.in d9873c9925917cca9990ee24be17eb9613a668012c85a343aef7e5536ae266e8
 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
 F Makefile.msc 02b469e9dcd5b7ee63fc1fb05babc174260ee4cfa4e0ef2e48c3c6801567a016
@@ -114,7 +114,7 @@ F ext/fts5/fts5_test_tok.c ffd657dd67e7fcdb31bf63fb60b6d867299a581d0f46e97086aba
 F ext/fts5/fts5_tokenize.c 2ce7b44183538ec46b7907726262ee43ffdd39a8
 F ext/fts5/fts5_unicode2.c b450b209b157d598f7b9df9f837afb75a14c24bf
 F ext/fts5/fts5_varint.c a5aceacda04dafcbae725413d7a16818ecd65738
-F ext/fts5/fts5_vocab.c e44fefa7f0c1db252998af071daf06a7147e17e7
+F ext/fts5/fts5_vocab.c 90783d59cb8ee29ae08ac7f7e1f9c04cc4fb3ffc46d34fedba96d145636dd39d
 F ext/fts5/fts5parse.y a070b538e08ae9e2177d15c337ed2a3464408f0f886e746307098f746efd94ca
 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
 F ext/fts5/test/fts5_common.tcl b01c584144b5064f30e6c648145a2dd6bc440841
@@ -1645,7 +1645,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P dcdf091388251292ff9939bdff920708320bc64dacfe0fa1878c5ffd11b679c9
-R 089d3b3d49f7f300de0a7e207b239b0f
-U drh
-Z e2065e8cae31fe4aaa6c975ba17a09f2
+P e49279e65169a939b6058a0960dc1fe09ce4ee2d78992a1969773cbc7ce1043b
+R 51c4219f1ac2413928699169835fef62
+U dan
+Z c67de475a6759cd5bb3ba77ac9b27176
index 2a55521c237c48845d6b358ed3860623a9f77d50..6de194638d96ad6ea3f85674b4c24a9bd3394c14 100644 (file)
@@ -1 +1 @@
-e49279e65169a939b6058a0960dc1fe09ce4ee2d78992a1969773cbc7ce1043b
\ No newline at end of file
+34a7bd7121a478e14982d59be95ad891fab8050ad5e006638f826c57c392b93e
\ No newline at end of file