Allow FTS4 multi-token phrases to use a combination of in-memory and incrementally...

author dan <dan@noemail.net>

Thu, 3 Oct 2013 19:27:14 +0000 (19:27 +0000)

committer dan <dan@noemail.net>

Thu, 3 Oct 2013 19:27:14 +0000 (19:27 +0000)
author dan <dan@noemail.net>
Thu, 3 Oct 2013 19:27:14 +0000 (19:27 +0000)
committer dan <dan@noemail.net>
Thu, 3 Oct 2013 19:27:14 +0000 (19:27 +0000)
diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c

index 2965bced373753facd43d5225fa7eeee4fe45623..4018a316f022f90b36081f36b60c16946a349b8d 100644 (file)
--- a/ext/fts3/fts3.c
+++ b/ext/fts3/fts3.c
@@ -4043,6 +4043,7 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
    ** scanned in forward order, and the phrase consists of 
    ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
    ** tokens or prefix tokens that cannot use a prefix-index.  */
+  int bHaveIncr = 0;
    int bIncrOk = (bOptOk 
     && pCsr->bDesc==pTab->bDescIdx 
     && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
@@ -4053,23 +4054,28 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
    );
    for(i=0; bIncrOk==1 && i<p->nToken; i++){
      Fts3PhraseToken *pToken = &p->aToken[i];
-    if( pToken->bFirst || !pToken->pSegcsr || !pToken->pSegcsr->bLookup ){
+    if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){
        bIncrOk = 0;
      }
+    if( pToken->pSegcsr ) bHaveIncr = 1;
    }
  
-  if( bIncrOk ){
+  if( bIncrOk && bHaveIncr ){
      /* Use the incremental approach. */
      int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
      for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
-      Fts3PhraseToken *pTok = &p->aToken[i];
-      rc = sqlite3Fts3MsrIncrStart(pTab, pTok->pSegcsr, iCol, pTok->z, pTok->n);
+      Fts3PhraseToken *pToken = &p->aToken[i];
+      Fts3MultiSegReader *pSegcsr = pToken->pSegcsr;
+      if( pSegcsr ){
+        rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n);
+      }
      }
+    p->bIncr = 1;
    }else{
      /* Load the full doclist for the phrase into memory. */
      rc = fts3EvalPhraseLoad(pCsr, p);
+    p->bIncr = 0;
    }
-  p->bIncr = bIncrOk;
  
    assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
    return rc;
@@ -4173,11 +4179,59 @@ void sqlite3Fts3DoclistNext(
    *ppIter = p;
  }
  
+/*
+** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof
+** to true if EOF is reached.
+*/
+static void fts3EvalDlPhraseNext(
+  Fts3Table *pTab,
+  Fts3Doclist *pDL,
+  u8 *pbEof
+){
+  char *pIter;                            /* Used to iterate through aAll */
+  char *pEnd = &pDL->aAll[pDL->nAll];     /* 1 byte past end of aAll */
+ 
+  if( pDL->pNextDocid ){
+    pIter = pDL->pNextDocid;
+  }else{
+    pIter = pDL->aAll;
+  }
+
+  if( pIter>=pEnd ){
+    /* We have already reached the end of this doclist. EOF. */
+    *pbEof = 1;
+  }else{
+    sqlite3_int64 iDelta;
+    pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
+    if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
+      pDL->iDocid += iDelta;
+    }else{
+      pDL->iDocid -= iDelta;
+    }
+    pDL->pList = pIter;
+    fts3PoslistCopy(0, &pIter);
+    pDL->nList = (int)(pIter - pDL->pList);
+
+    /* pIter now points just past the 0x00 that terminates the position-
+    ** list for document pDL->iDocid. However, if this position-list was
+    ** edited in place by fts3EvalNearTrim(), then pIter may not actually
+    ** point to the start of the next docid value. The following line deals
+    ** with this case by advancing pIter past the zero-padding added by
+    ** fts3EvalNearTrim().  */
+    while( pIter<pEnd && *pIter==0 ) pIter++;
+
+    pDL->pNextDocid = pIter;
+    assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
+    *pbEof = 0;
+  }
+}
+
  /*
  ** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
  */
  typedef struct TokenDoclist TokenDoclist;
  struct TokenDoclist {
+  int bIgnore;
    sqlite3_int64 iDocid;
    char *pList;
    int nList;
@@ -4194,29 +4248,55 @@ struct TokenDoclist {
  */
  static int incrPhraseTokenNext(
    Fts3Table *pTab,                /* Virtual table handle */
-  Fts3PhraseToken *pToken,        /* Advance the iterator for this token */
+  Fts3Phrase *pPhrase,            /* Phrase to advance token of */
+  int iToken,                     /* Specific token to advance */
    TokenDoclist *p,                /* OUT: Docid and doclist for new entry */
-  int *pbEof                      /* OUT: True if iterator is at EOF */
+  u8 *pbEof                       /* OUT: True if iterator is at EOF */
  ){
-  int rc;
-  assert( pToken->pDeferred==0 );
-  rc = sqlite3Fts3MsrIncrNext(
-      pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
-  );
-  if( p->pList==0 ) *pbEof = 1;
+  int rc = SQLITE_OK;
+
+  if( pPhrase->iDoclistToken==iToken ){
+    assert( p->bIgnore==0 );
+    assert( pPhrase->aToken[iToken].pSegcsr==0 );
+    fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof);
+    p->pList = pPhrase->doclist.pList;
+    p->nList = pPhrase->doclist.nList;
+    p->iDocid = pPhrase->doclist.iDocid;
+  }else{
+    Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
+    assert( pToken->pDeferred==0 );
+    assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 );
+    if( pToken->pSegcsr ){
+      assert( p->bIgnore==0 );
+      rc = sqlite3Fts3MsrIncrNext(
+          pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
+      );
+      if( p->pList==0 ) *pbEof = 1;
+    }else{
+      p->bIgnore = 1;
+    }
+  }
+
    return rc;
  }
  
  
  /*
-** The phrase iterator passed as the second argument uses the incremental
-** doclist strategy. Advance it to the next matching documnent in the
-** database. If an error occurs, return an SQLite error code. Otherwise, 
-** return SQLITE_OK.
+** The phrase iterator passed as the second argument:
+**
+**   * features at least one token that uses an incremental doclist, and 
+**
+**   * does not contain any deferred tokens.
+**
+** Advance it to the next matching documnent in the database and populate
+** the Fts3Doclist.pList and nList fields. 
  **
  ** If there is no "next" entry and no error occurs, then *pbEof is set to
  ** 1 before returning. Otherwise, if no error occurs and the iterator is
  ** successfully advanced, *pbEof is set to 0.
+**
+** If an error occurs, return an SQLite error code. Otherwise, return 
+** SQLITE_OK.
  */
  static int fts3EvalIncrPhraseNext(
    Fts3Cursor *pCsr,               /* FTS Cursor handle */
@@ -4226,12 +4306,13 @@ static int fts3EvalIncrPhraseNext(
    int rc = SQLITE_OK;
    Fts3Doclist *pDL = &p->doclist;
    Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
-  int bEof = 0;
+  u8 bEof = 0;
  
+  /* This is only called if it is guaranteed that the phrase has at least
+  ** one incremental token. In which case the bIncr flag is set. */
    assert( p->bIncr==1 );
-  assert( pDL->pNextDocid==0 );
  
-  if( p->nToken==1 ){
+  if( p->nToken==1 && p->bIncr ){
      rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
          &pDL->iDocid, &pDL->pList, &pDL->nList
      );
@@ -4240,29 +4321,35 @@ static int fts3EvalIncrPhraseNext(
      int bDescDoclist = pCsr->bDesc;
      struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
  
+    memset(a, 0, sizeof(a));
      assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
+    assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS );
  
      while( bEof==0 ){
+      int bMaxSet = 0;
        sqlite3_int64 iMax;         /* Largest docid for all iterators */
        int i;                      /* Used to iterate through tokens */
  
        /* Advance the iterator for each token in the phrase once. */
        for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
-        rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
-        if( i==0 || DOCID_CMP(iMax, a[i].iDocid)<0 ){
+        rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
+        if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){
            iMax = a[i].iDocid;
+          bMaxSet = 1;
          }
        }
+      assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 );
+      assert( rc!=SQLITE_OK || bMaxSet );
  
        /* Keep advancing iterators until they all point to the same document */
-      if( bEof==0 && rc==SQLITE_OK ){
-        for(i=0; i<p->nToken; i++){
-          while( DOCID_CMP(a[i].iDocid, iMax)<0 && rc==SQLITE_OK && bEof==0 ){
-            rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
-            if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
-              iMax = a[i].iDocid;
-              i = 0;
-            }
+      for(i=0; i<p->nToken; i++){
+        while( rc==SQLITE_OK && bEof==0 
+            && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 
+        ){
+          rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
+          if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
+            iMax = a[i].iDocid;
+            i = 0;
            }
          }
        }
@@ -4276,16 +4363,18 @@ static int fts3EvalIncrPhraseNext(
          memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
  
          for(i=0; i<(p->nToken-1); i++){
-          char *pLeft = a[i].pList;
-          char *pRight = aDoclist;
-          char *pOut = aDoclist;
-          int nDist = p->nToken-1-i;
-          int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pLeft, &pRight);
-          if( res==0 ) break;
-          nList = (pOut - aDoclist);
+          if( a[i].bIgnore==0 ){
+            char *pL = a[i].pList;
+            char *pR = aDoclist;
+            char *pOut = aDoclist;
+            int nDist = p->nToken-1-i;
+            int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR);
+            if( res==0 ) break;
+            nList = (pOut - aDoclist);
+          }
          }
          if( i==(p->nToken-1) ){
-          pDL->iDocid = a[0].iDocid;
+          pDL->iDocid = iMax;
            pDL->pList = aDoclist;
            pDL->nList = nList;
            pDL->bFreeList = 1;
@@ -4326,41 +4415,7 @@ static int fts3EvalPhraseNext(
      );
      pDL->pList = pDL->pNextDocid;
    }else{
-    char *pIter;                            /* Used to iterate through aAll */
-    char *pEnd = &pDL->aAll[pDL->nAll];     /* 1 byte past end of aAll */
-    if( pDL->pNextDocid ){
-      pIter = pDL->pNextDocid;
-    }else{
-      pIter = pDL->aAll;
-    }
-
-    if( pIter>=pEnd ){
-      /* We have already reached the end of this doclist. EOF. */
-      *pbEof = 1;
-    }else{
-      sqlite3_int64 iDelta;
-      pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
-      if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
-        pDL->iDocid += iDelta;
-      }else{
-        pDL->iDocid -= iDelta;
-      }
-      pDL->pList = pIter;
-      fts3PoslistCopy(0, &pIter);
-      pDL->nList = (int)(pIter - pDL->pList);
-
-      /* pIter now points just past the 0x00 that terminates the position-
-      ** list for document pDL->iDocid. However, if this position-list was
-      ** edited in place by fts3EvalNearTrim(), then pIter may not actually
-      ** point to the start of the next docid value. The following line deals
-      ** with this case by advancing pIter past the zero-padding added by
-      ** fts3EvalNearTrim().  */
-      while( pIter<pEnd && *pIter==0 ) pIter++;
-
-      pDL->pNextDocid = pIter;
-      assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
-      *pbEof = 0;
-    }
+    fts3EvalDlPhraseNext(pTab, pDL, pbEof);
    }
  
    return rc;
@@ -4640,7 +4695,7 @@ static int fts3EvalSelectDeferred(
        ** overflowing the 32-bit integer it is stored in. */
        if( ii<12 ) nLoad4 = nLoad4*4;
  
-      if( ii==0 || pTC->pPhrase->nToken>1 ){
+      if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){
          /* Either this is the cheapest token in the entire query, or it is
          ** part of a multi-token phrase. Either way, the entire doclist will
          ** (eventually) be loaded into memory. It may as well be now. */
@@ -5238,8 +5293,11 @@ static void fts3EvalRestart(
        if( pPhrase->bIncr ){
          int i;
          for(i=0; i<pPhrase->nToken; i++){
-          assert( pPhrase->aToken[i].pSegcsr );
-          sqlite3Fts3MsrIncrRestart(pPhrase->aToken[i].pSegcsr);
+          Fts3PhraseToken *pToken = &pPhrase->aToken[i];
+          assert( pToken->pDeferred==0 );
+          if( pToken->pSegcsr ){
+            sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
+          }
          }
          *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
        }
diff --git a/manifest b/manifest

index 3f64e7d97f00ababf9b82622a9989c4eb8c03549..b25987a37023383062203e73fccc0028bed5bf79 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\sa\stest\sto\scheck\sthat\sthe\snew\smulti-token\sphrase\soptimization\sis\sactually\shelping.
-D 2013-10-02T08:04:27.690
+C Allow\sFTS4\smulti-token\sphrases\sto\suse\sa\scombination\sof\sin-memory\sand\sincrementally\sloaded\sdoclists.\sThis\sallows\sphrases\sto\s(partially)\sbenefit\sfrom\sincremental\sdoclists\swithout\sdisabling\sthe\sdeferred\stoken\soptimization.
+D 2013-10-03T19:27:14.455
  F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
  F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e
  F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -78,7 +78,7 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51
  F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
  F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314
  F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c a6b015359e8f5911e4b23b5b80375544e8f00b60
+F ext/fts3/fts3.c 6d277a3ff6b20ff815184395407c5a4bd7787f9c
  F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
  F ext/fts3/fts3Int.h 8689f7cf85020e7f88d1e761eeac480c3b0ea7ad
  F ext/fts3/fts3_aux.c b02632f6dd0e375ce97870206d914ea6d8df5ccd
@@ -556,7 +556,7 @@ F test/fts4aa.test 0c3152322c7f0b548cc942ad763eaba0da87ccca
  F test/fts4check.test 66fa274cab2b615f2fb338b257713aba8fad88a8
  F test/fts4content.test 2e7252557d6d24afa101d9ba1de710d6140e6d06
  F test/fts4docid.test e33c383cfbdff0284685604d256f347a18fdbf01
-F test/fts4incr.test 79093b825148178a4eeba2bd1219a19b5d2d248a
+F test/fts4incr.test 2fae04582c2329a038b2b1f985e702478fb94888
  F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7
  F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee
  F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891
@@ -1120,7 +1120,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
  F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
  F tool/wherecosttest.c f407dc4c79786982a475261866a161cd007947ae
  F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
-P 65d9c6fafbc19d3582e07c31fd915a70e7fcfa8e
-R c082139794006b287921621a475577ab
+P bc3a2ed5fb2402805928b0002457eebf06f87c47
+R 901c988fa73ccb0a783d711147163b1a
  U dan
-Z 7170aa47c893316c61ddefdaf461c4d6
+Z a8c5d4de2473f3c23eca341d3a936ce8
diff --git a/manifest.uuid b/manifest.uuid

index 100e78856593ff4f93243421fd40e85b325de3ba..a09415fd3f79f4b68965446da643a49568c54a8f 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-bc3a2ed5fb2402805928b0002457eebf06f87c47
-\ No newline at end of file
+f6819c5f3363d358e7ef65fe6978f13991bd44af
+\ No newline at end of file
diff --git a/test/fts4incr.test b/test/fts4incr.test

index 31d4937c10bd7af46f1e423dd59ddac480ab4913..a9799b351d1b62a32d3952bf034ec8342e10b77c 100644 (file)
--- a/test/fts4incr.test
+++ b/test/fts4incr.test
@@ -13,7 +13,7 @@
  set testdir [file dirname $argv0]
  source $testdir/tester.tcl
  source $testdir/fts3_common.tcl
-set ::testprefix fts4docid
+set ::testprefix fts4incr
  
  # If SQLITE_ENABLE_FTS3 is defined, omit this file.
  ifcapable !fts3 {
@@ -27,7 +27,7 @@ ifcapable !fts3 {
  source $testdir/genesis.tcl
  
  do_test 1.0 {
-  execsql { CREATE VIRTUAL TABLE t1 USING fts3(words) }
+  execsql { CREATE VIRTUAL TABLE t1 USING fts4(words) }
    fts_kjv_genesis
  } {}
author	dan <dan@noemail.net>
	Thu, 3 Oct 2013 19:27:14 +0000 (19:27 +0000)
committer	dan <dan@noemail.net>
	Thu, 3 Oct 2013 19:27:14 +0000 (19:27 +0000)
ext/fts3/fts3.c		patch \| blob \| blame \| history
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
test/fts4incr.test		patch \| blob \| blame \| history