Modify fts3 to support a more complex expression syntax that allows parenthesis....

author danielk1977 <danielk1977@noemail.net>

Wed, 17 Dec 2008 15:18:17 +0000 (15:18 +0000)

committer danielk1977 <danielk1977@noemail.net>

Wed, 17 Dec 2008 15:18:17 +0000 (15:18 +0000)
author danielk1977 <danielk1977@noemail.net>
Wed, 17 Dec 2008 15:18:17 +0000 (15:18 +0000)
committer danielk1977 <danielk1977@noemail.net>
Wed, 17 Dec 2008 15:18:17 +0000 (15:18 +0000)
diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c

index ae7e98c26d2ed461a2e5c73eb4391df4502a624a..0c7db56c52a632e86d524502c26dacce75ec855a 100644 (file)
--- a/ext/fts3/fts3.c
+++ b/ext/fts3/fts3.c
@@ -285,6 +285,7 @@
  #include <ctype.h>
  
  #include "fts3.h"
+#include "fts3_expr.h"
  #include "fts3_hash.h"
  #include "fts3_tokenizer.h"
  #ifndef SQLITE_CORE 
@@ -312,11 +313,6 @@
  # define FTSTRACE(A)
  #endif
  
-/*
-** Default span for NEAR operators.
-*/
-#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
-
  /* It is not safe to call isspace(), tolower(), or isalnum() on
  ** hi-bit-set characters.  This is the same solution used in the
  ** tokenizer.
@@ -1788,90 +1784,6 @@ static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
  /* Forward reference */
  typedef struct fulltext_vtab fulltext_vtab;
  
-/* A single term in a query is represented by an instances of
-** the following structure. Each word which may match against
-** document content is a term. Operators, like NEAR or OR, are
-** not terms. Query terms are organized as a flat list stored
-** in the Query.pTerms array.
-**
-** If the QueryTerm.nPhrase variable is non-zero, then the QueryTerm
-** is the first in a contiguous string of terms that are either part
-** of the same phrase, or connected by the NEAR operator.
-**
-** If the QueryTerm.nNear variable is non-zero, then the token is followed 
-** by a NEAR operator with span set to (nNear-1). For example, the 
-** following query:
-**
-** The QueryTerm.iPhrase variable stores the index of the token within
-** its phrase, indexed starting at 1, or 1 if the token is not part 
-** of any phrase.
-**
-** For example, the data structure used to represent the following query:
-**
-**     ... MATCH 'sqlite NEAR/5 google NEAR/2 "search engine"'
-**
-** is:
-**
-**     {nPhrase=4, iPhrase=1, nNear=6, pTerm="sqlite"},
-**     {nPhrase=0, iPhrase=1, nNear=3, pTerm="google"},
-**     {nPhrase=0, iPhrase=1, nNear=0, pTerm="search"},
-**     {nPhrase=0, iPhrase=2, nNear=0, pTerm="engine"},
-**
-** compiling the FTS3 syntax to Query structures is done by the parseQuery()
-** function.
-*/
-typedef struct QueryTerm {
-  short int nPhrase; /* How many following terms are part of the same phrase */
-  short int iPhrase; /* This is the i-th term of a phrase. */
-  short int iColumn; /* Column of the index that must match this term */
-  short int nNear;   /* term followed by a NEAR operator with span=(nNear-1) */
-  signed char isOr;  /* this term is preceded by "OR" */
-  signed char isNot; /* this term is preceded by "-" */
-  signed char isPrefix; /* this term is followed by "*" */
-  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
-  int nTerm;         /* Number of bytes in pTerm[] */
-} QueryTerm;
-
-
-/* A query string is parsed into a Query structure.
- *
- * We could, in theory, allow query strings to be complicated
- * nested expressions with precedence determined by parentheses.
- * But none of the major search engines do this.  (Perhaps the
- * feeling is that an parenthesized expression is two complex of
- * an idea for the average user to grasp.)  Taking our lead from
- * the major search engines, we will allow queries to be a list
- * of terms (with an implied AND operator) or phrases in double-quotes,
- * with a single optional "-" before each non-phrase term to designate
- * negation and an optional OR connector.
- *
- * OR binds more tightly than the implied AND, which is what the
- * major search engines seem to do.  So, for example:
- * 
- *    [one two OR three]     ==>    one AND (two OR three)
- *    [one OR two three]     ==>    (one OR two) AND three
- *
- * A "-" before a term matches all entries that lack that term.
- * The "-" must occur immediately before the term with in intervening
- * space.  This is how the search engines do it.
- *
- * A NOT term cannot be the right-hand operand of an OR.  If this
- * occurs in the query string, the NOT is ignored:
- *
- *    [one OR -two]          ==>    one OR two
- *
- */
-typedef struct Query {
-  fulltext_vtab *pFts;  /* The full text index */
-  int nTerms;           /* Number of terms in the query */
-  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
-  int nextIsOr;         /* Set the isOr flag on the next inserted term */
-  int nextIsNear;       /* Set the isOr flag on the next inserted term */
-  int nextColumn;       /* Next word parsed must be in this column */
-  int dfltColumn;       /* The default column */
-} Query;
-
-
  /*
  ** An instance of the following structure keeps track of generated
  ** matching-word offset information and snippets.
@@ -2022,14 +1934,14 @@ typedef struct fulltext_cursor {
    QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
    sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
    int eof;                         /* True if at End Of Results */
-  Query q;                         /* Parsed query string */
+  Fts3Expr *pExpr;                 /* Parsed MATCH query string */
    Snippet snippet;                 /* Cached snippet for the current row */
    int iColumn;                     /* Column being searched */
    DataBuffer result;               /* Doclist results from fulltextQuery */
    DLReader reader;                 /* Result reader if result not empty */
  } fulltext_cursor;
  
-static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
+static fulltext_vtab *cursor_vtab(fulltext_cursor *c){
    return (fulltext_vtab *) c->base.pVtab;
  }
  
@@ -3177,18 +3089,6 @@ static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
    }
  }
  
-
-/* Free all of the dynamically allocated memory held by *q
-*/
-static void queryClear(Query *q){
-  int i;
-  for(i = 0; i < q->nTerms; ++i){
-    sqlite3_free(q->pTerms[i].pTerm);
-  }
-  sqlite3_free(q->pTerms);
-  CLEAR(q);
-}
-
  /* Free all of the dynamically allocated memory held by the
  ** Snippet
  */
@@ -3198,6 +3098,7 @@ static void snippetClear(Snippet *p){
    sqlite3_free(p->zSnippet);
    CLEAR(p);
  }
+
  /*
  ** Append a single entry to the p->aMatch[] log.
  */
@@ -3233,12 +3134,51 @@ static void snippetAppendMatch(
  #define FTS3_ROTOR_SZ   (32)
  #define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)
  
+/*
+** Function to iterate through the tokens of a compiled expression.
+*/
+static int nextExprToken(Fts3Expr **ppExpr, int *piToken){
+  Fts3Expr *p = *ppExpr;
+  int iToken = *piToken;
+  if( iToken<0 ){
+    /* In this case the expression p is the root of an expression tree.
+    ** Move to the first token in the expression tree.
+    */
+    while( p->pLeft ){
+      p = p->pLeft;
+    }
+    iToken = 0;
+  }else{
+    assert(p && p->eType==FTSQUERY_PHRASE );
+    if( iToken<(p->pPhrase->nToken-1) ){
+      iToken++;
+    }else{
+      iToken = 0;
+      while( p->pParent && p->pParent->pLeft!=p ){
+        assert( p->pParent->pRight==p );
+        p = p->pParent;
+      }
+      p = p->pParent;
+      if( p ){
+        p = p->pRight;
+        while( p->pLeft ){
+          p = p->pLeft;
+        }
+      }
+    }
+  }
+
+  *ppExpr = p;
+  *piToken = iToken;
+  return p?1:0;
+}
+
  /*
  ** Add entries to pSnippet->aMatch[] for every match that occurs against
  ** document zDoc[0..nDoc-1] which is stored in column iColumn.
  */
  static void snippetOffsetsOfColumn(
-  Query *pQuery,
+  fulltext_cursor *pCur,
    Snippet *pSnippet,
    int iColumn,
    const char *zDoc,
@@ -3249,8 +3189,6 @@ static void snippetOffsetsOfColumn(
    sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
    fulltext_vtab *pVtab;                /* The full text index */
    int nColumn;                         /* Number of columns in the index */
-  const QueryTerm *aTerm;              /* Query string terms */
-  int nTerm;                           /* Number of query string terms */  
    int i, j;                            /* Loop counters */
    int rc;                              /* Return code */
    unsigned int match, prevMatch;       /* Phrase search bitmasks */
@@ -3264,37 +3202,34 @@ static void snippetOffsetsOfColumn(
    int iRotorBegin[FTS3_ROTOR_SZ];      /* Beginning offset of token */
    int iRotorLen[FTS3_ROTOR_SZ];        /* Length of token */
  
-  pVtab = pQuery->pFts;
+  pVtab = cursor_vtab(pCur);
    nColumn = pVtab->nColumn;
    pTokenizer = pVtab->pTokenizer;
    pTModule = pTokenizer->pModule;
    rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
    if( rc ) return;
    pTCursor->pTokenizer = pTokenizer;
-  aTerm = pQuery->pTerms;
-  nTerm = pQuery->nTerms;
-  if( nTerm>=FTS3_ROTOR_SZ ){
-    nTerm = FTS3_ROTOR_SZ - 1;
-  }
+
    prevMatch = 0;
-  while(1){
-    rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
-    if( rc ) break;
+  while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
+    Fts3Expr *pIter = pCur->pExpr;
+    int iIter = -1;
      iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
      iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
      match = 0;
-    for(i=0; i<nTerm; i++){
-      int iCol;
-      iCol = aTerm[i].iColumn;
+    for(i=0; i<(FTS3_ROTOR_SZ-1) && nextExprToken(&pIter, &iIter); i++){
+      int nPhrase = pIter->pPhrase->nToken;   /* Tokens in current phrase */
+      struct PhraseToken *pToken = &pIter->pPhrase->aToken[iIter];
+      int iCol = pIter->pPhrase->iColumn;
        if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
-      if( aTerm[i].nTerm>nToken ) continue;
-      if( !aTerm[i].isPrefix && aTerm[i].nTerm<nToken ) continue;
-      assert( aTerm[i].nTerm<=nToken );
-      if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue;
-      if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
+      if( pToken->n>nToken ) continue;
+      if( !pToken->isPrefix && pToken->n<nToken ) continue;
+      assert( pToken->n<=nToken );
+      if( memcmp(pToken->z, zToken, pToken->n) ) continue;
+      if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
        match |= 1<<i;
-      if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
-        for(j=aTerm[i].iPhrase-1; j>=0; j--){
+      if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){
+        for(j=nPhrase-1; j>=0; j--){
            int k = (iRotor-j) & FTS3_ROTOR_MASK;
            snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
                  iRotorBegin[k], iRotorLen[k]);
@@ -3324,86 +3259,113 @@ static void snippetOffsetsOfColumn(
  ** then when this function is called the Snippet contains token offsets
  ** 0, 4 and 5. This function removes the "0" entry (because the first A
  ** is not near enough to an E).
+**
+** When this function is called, the value pointed to by parameter piLeft is
+** the integer id of the left-most token in the expression tree headed by
+** pExpr. This function increments *piLeft by the total number of tokens
+** in the expression tree headed by pExpr.
  */
-static void trimSnippetOffsetsForNear(Query *pQuery, Snippet *pSnippet){
-  int ii;
-  int iDir = 1;
-
-  while(iDir>-2) {
-    assert( iDir==1 || iDir==-1 );
-    for(ii=0; ii<pSnippet->nMatch; ii++){
-      int jj;
-      int nNear;
-      struct snippetMatch *pMatch = &pSnippet->aMatch[ii];
-      QueryTerm *pQueryTerm = &pQuery->pTerms[pMatch->iTerm];
-
-      if( (pMatch->iTerm+iDir)<0 
-       || (pMatch->iTerm+iDir)>=pQuery->nTerms
-      ){
-        continue;
-      }
-     
-      nNear = pQueryTerm->nNear;
-      if( iDir<0 ){
-        nNear = pQueryTerm[-1].nNear;
-      }
-  
-      if( pMatch->iTerm>=0 && nNear ){
-        int isOk = 0;
-        int iNextTerm = pMatch->iTerm+iDir;
-        int iPrevTerm = iNextTerm;
-
-        int iEndToken;
-        int iStartToken;
-
-        if( iDir<0 ){
-          int nPhrase = 1;
-          iStartToken = pMatch->iToken;
-          while( (pMatch->iTerm+nPhrase)<pQuery->nTerms 
-              && pQuery->pTerms[pMatch->iTerm+nPhrase].iPhrase>1 
-          ){
-            nPhrase++;
-          }
-          iEndToken = iStartToken + nPhrase - 1;
-        }else{
-          iEndToken   = pMatch->iToken;
-          iStartToken = pMatch->iToken+1-pQueryTerm->iPhrase;
-        }
+static int trimSnippetOffsets(
+  Fts3Expr *pExpr, 
+  Snippet *pSnippet,
+  int *piLeft
+){
+  if( pExpr ){
+    if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
+      return 1;
+    }
  
-        while( pQuery->pTerms[iNextTerm].iPhrase>1 ){
-          iNextTerm--;
-        }
-        while( (iPrevTerm+1)<pQuery->nTerms && 
-               pQuery->pTerms[iPrevTerm+1].iPhrase>1 
-        ){
-          iPrevTerm++;
+    switch( pExpr->eType ){
+      case FTSQUERY_PHRASE:
+        *piLeft += pExpr->pPhrase->nToken;
+        break;
+      case FTSQUERY_NEAR: {
+        /* The right-hand-side of a NEAR operator is always a phrase. The
+        ** left-hand-side is either a phrase or an expression tree that is 
+        ** itself headed by a NEAR operator. The following initializations
+        ** set local variable iLeft to the token number of the left-most
+        ** token in the right-hand phrase, and iRight to the right most
+        ** token in the same phrase. For example, if we had:
+        **
+        **     <col> MATCH '"abc def" NEAR/2 "ghi jkl"'
+        **
+        ** then iLeft will be set to 2 (token number of ghi) and nToken will
+        ** be set to 4.
+        */
+        Fts3Expr *pLeft = pExpr->pLeft;
+        Fts3Expr *pRight = pExpr->pRight;
+        int iLeft = *piLeft;
+        int nNear = pExpr->nNear;
+        int nToken = pRight->pPhrase->nToken;
+        int jj, ii;
+        if( pLeft->eType==FTSQUERY_NEAR ){
+          pLeft = pLeft->pRight;
          }
-  
-        for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
-          struct snippetMatch *p = &pSnippet->aMatch[jj];
-          if( p->iCol==pMatch->iCol && ((
-               p->iTerm==iNextTerm && 
-               p->iToken>iEndToken && 
-               p->iToken<=iEndToken+nNear
-          ) || (
-               p->iTerm==iPrevTerm && 
-               p->iToken<iStartToken && 
-               p->iToken>=iStartToken-nNear
-          ))){
-            isOk = 1;
+        assert( pRight->eType==FTSQUERY_PHRASE );
+        assert( pLeft->eType==FTSQUERY_PHRASE );
+        nToken += pLeft->pPhrase->nToken;
+
+        for(ii=0; ii<pSnippet->nMatch; ii++){
+          struct snippetMatch *p = &pSnippet->aMatch[ii];
+          if( p->iTerm==iLeft ){
+            int isOk = 0;
+            /* Snippet ii is an occurence of query term iLeft in the document.
+            ** It occurs at position (p->iToken) of the document. We now
+            ** search for an instance of token (iLeft-1) somewhere in the 
+            ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within 
+            ** the set of snippetMatch structures. If one is found, proceed. 
+            ** If one cannot be found, then remove snippets ii..(ii+N-1) 
+            ** from the matching snippets, where N is the number of tokens 
+            ** in phrase pRight->pPhrase.
+            */
+            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
+              struct snippetMatch *p2 = &pSnippet->aMatch[jj];
+              if( p2->iTerm==(iLeft-1) ){
+                if( p2->iToken>=(p->iToken-nNear-1) 
+                 && p2->iToken<(p->iToken+nNear+nToken) 
+                ){
+                  isOk = 1;
+                }
+              }
+            }
+            if( !isOk ){
+              int kk;
+              for(kk=0; kk<pRight->pPhrase->nToken; kk++){
+                pSnippet->aMatch[kk+ii].iTerm = -2;
+              }
+              return 1;
+            }
            }
-        }
-        if( !isOk ){
-          for(jj=1-pQueryTerm->iPhrase; jj<=0; jj++){
-            pMatch[jj].iTerm = -1;
+          if( p->iTerm==(iLeft-1) ){
+            int isOk = 0;
+            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
+              struct snippetMatch *p2 = &pSnippet->aMatch[jj];
+              if( p2->iTerm==iLeft ){
+                if( p2->iToken<=(p->iToken+nNear+1) 
+                 && p2->iToken>(p->iToken-nNear-nToken) 
+                ){
+                  isOk = 1;
+                }
+              }
+            }
+            if( !isOk ){
+              int kk;
+              for(kk=0; kk<pLeft->pPhrase->nToken; kk++){
+                pSnippet->aMatch[ii-kk].iTerm = -2;
+              }
+              return 1;
+            }
            }
-          ii = -1;
-          iDir = 1;
          }
+        break;
        }
      }
-    iDir -= 2;
+
+    if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){
+      return 1;
+    }
    }
+  return 0;
  }
  
  /*
@@ -3414,11 +3376,12 @@ static void snippetAllOffsets(fulltext_cursor *p){
    int nColumn;
    int iColumn, i;
    int iFirst, iLast;
-  fulltext_vtab *pFts;
+  int iTerm = 0;
+  fulltext_vtab *pFts = cursor_vtab(p);
  
-  if( p->snippet.nMatch ) return;
-  if( p->q.nTerms==0 ) return;
-  pFts = p->q.pFts;
+  if( p->snippet.nMatch || p->pExpr==0 ){
+    return;
+  }
    nColumn = pFts->nColumn;
    iColumn = (p->iCursorType - QUERY_FULLTEXT);
    if( iColumn<0 || iColumn>=nColumn ){
@@ -3433,15 +3396,18 @@ static void snippetAllOffsets(fulltext_cursor *p){
      int nDoc;
      zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
      nDoc = sqlite3_column_bytes(p->pStmt, i+1);
-    snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
+    snippetOffsetsOfColumn(p, &p->snippet, i, zDoc, nDoc);
    }
  
-  trimSnippetOffsetsForNear(&p->q, &p->snippet);
+  while( trimSnippetOffsets(p->pExpr, &p->snippet, &iTerm) ){
+    iTerm = 0;
+  }
  }
  
  /*
  ** Convert the information in the aMatch[] array of the snippet
-** into the string zOffset[0..nOffset-1].
+** into the string zOffset[0..nOffset-1]. This string is used as
+** the return of the SQL offsets() function.
  */
  static void snippetOffsetText(Snippet *p){
    int i;
@@ -3556,7 +3522,7 @@ static void snippetText(
      aMatch[i].snStatus = SNIPPET_IGNORE;
    }
    nDesired = 0;
-  for(i=0; i<pCursor->q.nTerms; i++){
+  for(i=0; i<FTS3_ROTOR_SZ; i++){
      for(j=0; j<nMatch; j++){
        if( aMatch[j].iTerm==i ){
          aMatch[j].snStatus = SNIPPET_DESIRED;
@@ -3644,9 +3610,11 @@ static int fulltextClose(sqlite3_vtab_cursor *pCursor){
    fulltext_cursor *c = (fulltext_cursor *) pCursor;
    FTSTRACE(("FTS3 Close %p\n", c));
    sqlite3_finalize(c->pStmt);
-  queryClear(&c->q);
+  sqlite3Fts3ExprFree(c->pExpr);
    snippetClear(&c->snippet);
-  if( c->result.nData!=0 ) dlrDestroy(&c->reader);
+  if( c->result.nData!=0 ){
+    dlrDestroy(&c->reader);
+  }
    dataBufferDestroy(&c->result);
    sqlite3_free(c);
    return SQLITE_OK;
@@ -3703,255 +3671,127 @@ static int termSelect(fulltext_vtab *v, int iColumn,
                        const char *pTerm, int nTerm, int isPrefix,
                        DocListType iType, DataBuffer *out);
  
-/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
-** is the first term of a phrase query, go ahead and evaluate the phrase
-** query and return the doclist for the entire phrase query.
+/* 
+** Return a DocList corresponding to the phrase *pPhrase.
  **
  ** The resulting DL_DOCIDS doclist is stored in pResult, which is
  ** overwritten.
  */
-static int docListOfTerm(
-  fulltext_vtab *v,    /* The full text index */
-  int iColumn,         /* column to restrict to.  No restriction if >=nColumn */
-  QueryTerm *pQTerm,   /* Term we are looking for, or 1st term of a phrase */
-  DataBuffer *pResult  /* Write the result here */
+static int docListOfPhrase(
+  fulltext_vtab *pTab,   /* The full text index */
+  Fts3Phrase *pPhrase,   /* Phrase to return a doclist corresponding to */
+  DocListType eListType, /* Either DL_DOCIDS or DL_POSITIONS */
+  DataBuffer *pResult    /* Write the result here */
  ){
-  DataBuffer left, right, new;
-  int i, rc;
-
-  /* No phrase search if no position info. */
-  assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );
+  int ii;
+  int rc = SQLITE_OK;
+  int iCol = pPhrase->iColumn;
+  DocListType eType = eListType;
+  assert( eType==DL_POSITIONS || eType==DL_DOCIDS );
+  if( pPhrase->nToken>1 ){
+    eType = DL_POSITIONS;
+  }
  
    /* This code should never be called with buffered updates. */
-  assert( v->nPendingData<0 );
+  assert( pTab->nPendingData<0 );
  
-  dataBufferInit(&left, 0);
-  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
-                  (0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS), &left);
-  if( rc ) return rc;
-  for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
-    /* If this token is connected to the next by a NEAR operator, and
-    ** the next token is the start of a phrase, then set nPhraseRight
-    ** to the number of tokens in the phrase. Otherwise leave it at 1.
-    */
-    int nPhraseRight = 1;
-    while( (i+nPhraseRight)<=pQTerm->nPhrase 
-        && pQTerm[i+nPhraseRight].nNear==0 
-    ){
-      nPhraseRight++;
-    }
-
-    dataBufferInit(&right, 0);
-    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
-                    pQTerm[i].isPrefix, DL_POSITIONS, &right);
-    if( rc ){
-      dataBufferDestroy(&left);
-      return rc;
+  for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){
+    DataBuffer tmp;
+    struct PhraseToken *p = &pPhrase->aToken[ii];
+    rc = termSelect(pTab, iCol, p->z, p->n, p->isPrefix, eType, &tmp);
+    if( rc==SQLITE_OK ){
+      if( ii==0 ){
+        *pResult = tmp;
+      }else{
+        DataBuffer res = *pResult;
+        dataBufferInit(pResult, 0);
+        if( ii==(pPhrase->nToken-1) ){
+          eType = eListType;
+        }
+        docListPhraseMerge(
+          res.pData, res.nData, tmp.pData, tmp.nData, 0, 0, eType, pResult
+        );
+        dataBufferDestroy(&res);
+        dataBufferDestroy(&tmp);
+      }
      }
-    dataBufferInit(&new, 0);
-    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
-                       pQTerm[i-1].nNear, pQTerm[i-1].iPhrase + nPhraseRight,
-                       ((i<pQTerm->nPhrase) ? DL_POSITIONS : DL_DOCIDS),
-                       &new);
-    dataBufferDestroy(&left);
-    dataBufferDestroy(&right);
-    left = new;
-  }
-  *pResult = left;
-  return SQLITE_OK;
-}
-
-/* Add a new term pTerm[0..nTerm-1] to the query *q.
-*/
-static void queryAdd(Query *q, const char *pTerm, int nTerm){
-  QueryTerm *t;
-  ++q->nTerms;
-  q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
-  if( q->pTerms==0 ){
-    q->nTerms = 0;
-    return;
    }
-  t = &q->pTerms[q->nTerms - 1];
-  CLEAR(t);
-  t->pTerm = sqlite3_malloc(nTerm+1);
-  memcpy(t->pTerm, pTerm, nTerm);
-  t->pTerm[nTerm] = 0;
-  t->nTerm = nTerm;
-  t->isOr = q->nextIsOr;
-  t->isPrefix = 0;
-  q->nextIsOr = 0;
-  t->iColumn = q->nextColumn;
-  q->nextColumn = q->dfltColumn;
-}
  
-/*
-** Check to see if the string zToken[0...nToken-1] matches any
-** column name in the virtual table.   If it does,
-** return the zero-indexed column number.  If not, return -1.
-*/
-static int checkColumnSpecifier(
-  fulltext_vtab *pVtab,    /* The virtual table */
-  const char *zToken,      /* Text of the token */
-  int nToken               /* Number of characters in the token */
-){
-  int i;
-  for(i=0; i<pVtab->nColumn; i++){
-    if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
-        && pVtab->azColumn[i][nToken]==0 ){
-      return i;
-    }
-  }
-  return -1;
+  return rc;
  }
  
  /*
-** Parse the text at zSegment[0..nSegment-1].  Add additional terms
-** to the query being assemblied in pQuery.
-**
-** inPhrase is true if zSegment[0..nSegement-1] is contained within
-** double-quotes.  If inPhrase is true, then the first term
-** is marked with the number of terms in the phrase less one and
-** OR and "-" syntax is ignored.  If inPhrase is false, then every
-** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
-*/
-static int tokenizeSegment(
-  sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
-  const char *zSegment, int nSegment,     /* Query expression being parsed */
-  int inPhrase,                           /* True if within "..." */
-  Query *pQuery                           /* Append results here */
+** Evaluate the full-text expression pExpr against fts3 table pTab. Write
+** the results into pRes.
+*/
+static int evalFts3Expr(
+  fulltext_vtab *pTab,           /* Fts3 Virtual table object */
+  Fts3Expr *pExpr,               /* Parsed fts3 expression */
+  DataBuffer *pRes               /* OUT: Write results of the expression here */
  ){
-  const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
-  sqlite3_tokenizer_cursor *pCursor;
-  int firstIndex = pQuery->nTerms;
-  int iCol;
-  int nTerm = 1;
-  
-  int rc = pModule->xOpen(pTokenizer, zSegment, nSegment, &pCursor);
-  if( rc!=SQLITE_OK ) return rc;
-  pCursor->pTokenizer = pTokenizer;
+  int rc = SQLITE_OK;
  
-  while( 1 ){
-    const char *zToken;
-    int nToken, iBegin, iEnd, iPos;
+  /* Initialize the output buffer. If this is an empty query (pExpr==0), 
+  ** this is all that needs to be done. Empty queries produce empty 
+  ** result sets.
+  */
+  dataBufferInit(pRes, 0);
  
-    rc = pModule->xNext(pCursor,
-                        &zToken, &nToken,
-                        &iBegin, &iEnd, &iPos);
-    if( rc!=SQLITE_OK ) break;
-    if( !inPhrase &&
-        zSegment[iEnd]==':' &&
-         (iCol = checkColumnSpecifier(pQuery->pFts, zToken, nToken))>=0 ){
-      pQuery->nextColumn = iCol;
-      continue;
-    }
-    if( !inPhrase && pQuery->nTerms>0 && nToken==2 
-     && zSegment[iBegin+0]=='O'
-     && zSegment[iBegin+1]=='R' 
-    ){
-      pQuery->nextIsOr = 1;
-      continue;
-    }
-    if( !inPhrase && pQuery->nTerms>0 && !pQuery->nextIsOr && nToken==4 
-      && memcmp(&zSegment[iBegin], "NEAR", 4)==0
-    ){
-      QueryTerm *pTerm = &pQuery->pTerms[pQuery->nTerms-1];
-      if( (iBegin+6)<nSegment 
-       && zSegment[iBegin+4] == '/'
-       && isdigit(zSegment[iBegin+5])
-      ){
-        int k;
-        pTerm->nNear = 0;
-        for(k=5; (iBegin+k)<=nSegment && isdigit(zSegment[iBegin+k]); k++){
-          pTerm->nNear = pTerm->nNear*10 + (zSegment[iBegin+k] - '0');
-        }
-        pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
-      } else {
-        pTerm->nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
+  if( pExpr ){
+    if( pExpr->eType==FTSQUERY_PHRASE ){
+      DocListType eType = DL_DOCIDS;
+      if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
+        eType = DL_POSITIONS;
        }
-      pTerm->nNear++;
-      continue;
-    }
-
-    queryAdd(pQuery, zToken, nToken);
-    if( !inPhrase && iBegin>0 && zSegment[iBegin-1]=='-' ){
-      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
-    }
-    if( iEnd<nSegment && zSegment[iEnd]=='*' ){
-      pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
-    }
-    pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
-    if( inPhrase ){
-      nTerm++;
-    }
-  }
-
-  if( inPhrase && pQuery->nTerms>firstIndex ){
-    pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
-  }
-
-  return pModule->xClose(pCursor);
-}
-
-/* Parse a query string, yielding a Query object pQuery.
-**
-** The calling function will need to queryClear() to clean up
-** the dynamically allocated memory held by pQuery.
-*/
-static int parseQuery(
-  fulltext_vtab *v,        /* The fulltext index */
-  const char *zInput,      /* Input text of the query string */
-  int nInput,              /* Size of the input text */
-  int dfltColumn,          /* Default column of the index to match against */
-  Query *pQuery            /* Write the parse results here. */
-){
-  int iInput, inPhrase = 0;
-  int ii;
-  QueryTerm *aTerm;
-
-  if( zInput==0 ) nInput = 0;
-  if( nInput<0 ) nInput = strlen(zInput);
-  pQuery->nTerms = 0;
-  pQuery->pTerms = NULL;
-  pQuery->nextIsOr = 0;
-  pQuery->nextColumn = dfltColumn;
-  pQuery->dfltColumn = dfltColumn;
-  pQuery->pFts = v;
-
-  for(iInput=0; iInput<nInput; ++iInput){
-    int i;
-    for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
-    if( i>iInput ){
-      tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
-                       pQuery);
-    }
-    iInput = i;
-    if( i<nInput ){
-      assert( zInput[i]=='"' );
-      inPhrase = !inPhrase;
-    }
-  }
-
-  if( inPhrase ){
-    /* unmatched quote */
-    queryClear(pQuery);
-    return SQLITE_ERROR;
-  }
+      rc = docListOfPhrase(pTab, pExpr->pPhrase, eType, pRes);
+    }else{
+      DataBuffer lhs;
+      DataBuffer rhs;
  
-  /* Modify the values of the QueryTerm.nPhrase variables to account for
-  ** the NEAR operator. For the purposes of QueryTerm.nPhrase, phrases
-  ** and tokens connected by the NEAR operator are handled as a single
-  ** phrase. See comments above the QueryTerm structure for details.
-  */
-  aTerm = pQuery->pTerms;
-  for(ii=0; ii<pQuery->nTerms; ii++){
-    if( aTerm[ii].nNear || aTerm[ii].nPhrase ){
-      while (aTerm[ii+aTerm[ii].nPhrase].nNear) {
-        aTerm[ii].nPhrase += (1 + aTerm[ii+aTerm[ii].nPhrase+1].nPhrase);
+      dataBufferInit(&rhs, 0);
+      if( SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pLeft, &lhs)) 
+       && SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pRight, &rhs)) 
+      ){
+        switch( pExpr->eType ){
+          case FTSQUERY_NEAR: {
+            int nToken;
+            Fts3Expr *pLeft;
+            DocListType eType = DL_DOCIDS;
+            if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
+              eType = DL_POSITIONS;
+            }
+            pLeft = pExpr->pLeft;
+            while( pLeft->eType==FTSQUERY_NEAR ){ 
+              pLeft=pLeft->pRight;
+            }
+            assert( pExpr->pRight->eType==FTSQUERY_PHRASE );
+            assert( pLeft->eType==FTSQUERY_PHRASE );
+            nToken = pLeft->pPhrase->nToken + pExpr->pRight->pPhrase->nToken;
+            docListPhraseMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, 
+                pExpr->nNear+1, nToken, eType, pRes
+            );
+            break;
+          }
+          case FTSQUERY_NOT: {
+            docListExceptMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData,pRes);
+            break;
+          }
+          case FTSQUERY_AND: {
+            docListAndMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
+            break;
+          }
+          case FTSQUERY_OR: {
+            docListOrMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
+            break;
+          }
+        }
        }
+      dataBufferDestroy(&lhs);
+      dataBufferDestroy(&rhs);
      }
    }
  
-  return SQLITE_OK;
+  return rc;
  }
  
  /* TODO(shess) Refactor the code to remove this forward decl. */
@@ -3970,12 +3810,9 @@ static int fulltextQuery(
    const char *zInput,    /* The query string */
    int nInput,            /* Number of bytes in zInput[] */
    DataBuffer *pResult,   /* Write the result doclist here */
-  Query *pQuery          /* Put parsed query string here */
+  Fts3Expr **ppExpr        /* Put parsed query string here */
  ){
-  int i, iNext, rc;
-  DataBuffer left, right, or, new;
-  int nNot = 0;
-  QueryTerm *aTerm;
+  int rc;
  
    /* TODO(shess) Instead of flushing pendingTerms, we could query for
    ** the relevant term and merge the doclist into what we receive from
@@ -3987,86 +3824,20 @@ static int fulltextQuery(
  
    /* Flush any buffered updates before executing the query. */
    rc = flushPendingTerms(v);
-  if( rc!=SQLITE_OK ) return rc;
-
-  /* TODO(shess) I think that the queryClear() calls below are not
-  ** necessary, because fulltextClose() already clears the query.
-  */
-  rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
-  if( rc!=SQLITE_OK ) return rc;
-
-  /* Empty or NULL queries return no results. */
-  if( pQuery->nTerms==0 ){
-    dataBufferInit(pResult, 0);
-    return SQLITE_OK;
-  }
-
-  /* Merge AND terms. */
-  /* TODO(shess) I think we can early-exit if( i>nNot && left.nData==0 ). */
-  aTerm = pQuery->pTerms;
-  for(i = 0; i<pQuery->nTerms; i=iNext){
-    if( aTerm[i].isNot ){
-      /* Handle all NOT terms in a separate pass */
-      nNot++;
-      iNext = i + aTerm[i].nPhrase+1;
-      continue;
-    }
-    iNext = i + aTerm[i].nPhrase + 1;
-    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
-    if( rc ){
-      if( i!=nNot ) dataBufferDestroy(&left);
-      queryClear(pQuery);
-      return rc;
-    }
-    while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
-      rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or);
-      iNext += aTerm[iNext].nPhrase + 1;
-      if( rc ){
-        if( i!=nNot ) dataBufferDestroy(&left);
-        dataBufferDestroy(&right);
-        queryClear(pQuery);
-        return rc;
-      }
-      dataBufferInit(&new, 0);
-      docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new);
-      dataBufferDestroy(&right);
-      dataBufferDestroy(&or);
-      right = new;
-    }
-    if( i==nNot ){           /* first term processed. */
-      left = right;
-    }else{
-      dataBufferInit(&new, 0);
-      docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new);
-      dataBufferDestroy(&right);
-      dataBufferDestroy(&left);
-      left = new;
-    }
-  }
-
-  if( nNot==pQuery->nTerms ){
-    /* We do not yet know how to handle a query of only NOT terms */
-    return SQLITE_ERROR;
+  if( rc!=SQLITE_OK ){
+    return rc;
    }
  
-  /* Do the EXCEPT terms */
-  for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
-    if( !aTerm[i].isNot ) continue;
-    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
-    if( rc ){
-      queryClear(pQuery);
-      dataBufferDestroy(&left);
-      return rc;
-    }
-    dataBufferInit(&new, 0);
-    docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new);
-    dataBufferDestroy(&right);
-    dataBufferDestroy(&left);
-    left = new;
+  /* Parse the query passed to the MATCH operator. */
+  rc = sqlite3Fts3ExprParse(v->pTokenizer, 
+      v->azColumn, v->nColumn, iColumn, zInput, nInput, ppExpr
+  );
+  if( rc!=SQLITE_OK ){
+    assert( 0==(*ppExpr) );
+    return rc;
    }
  
-  *pResult = left;
-  return rc;
+  return evalFts3Expr(v, *ppExpr, pResult);
  }
  
  /*
@@ -4146,10 +3917,10 @@ static int fulltextFilter(
  
      default:   /* full-text search */
      {
+      int iCol = idxNum-QUERY_FULLTEXT;
        const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
        assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
        assert( argc==1 );
-      queryClear(&c->q);
        if( c->result.nData!=0 ){
          /* This case happens if the same cursor is used repeatedly. */
          dlrDestroy(&c->reader);
@@ -4157,7 +3928,7 @@ static int fulltextFilter(
        }else{
          dataBufferInit(&c->result, 0);
        }
-      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q);
+      rc = fulltextQuery(v, iCol, zQuery, -1, &c->result, &c->pExpr);
        if( rc!=SQLITE_OK ) return rc;
        if( c->result.nData!=0 ){
          dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
@@ -6041,9 +5812,14 @@ static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
  /* Scan the database and merge together the posting lists for the term
  ** into *out.
  */
-static int termSelect(fulltext_vtab *v, int iColumn,
-                      const char *pTerm, int nTerm, int isPrefix,
-                      DocListType iType, DataBuffer *out){
+static int termSelect(
+  fulltext_vtab *v, 
+  int iColumn,
+  const char *pTerm, int nTerm,             /* Term to query for */
+  int isPrefix,                             /* True for a prefix search */
+  DocListType iType, 
+  DataBuffer *out                           /* Write results here */
+){
    DataBuffer doclist;
    sqlite3_stmt *s;
    int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
@@ -6053,6 +5829,7 @@ static int termSelect(fulltext_vtab *v, int iColumn,
    assert( v->nPendingData<0 );
  
    dataBufferInit(&doclist, 0);
+  dataBufferInit(out, 0);
  
    /* Traverse the segments from oldest to newest so that newer doclist
    ** elements for given docids overwrite older elements.
@@ -7170,6 +6947,10 @@ int sqlite3Fts3Init(sqlite3 *db){
      }
    }
  
+#ifdef SQLITE_TEST
+  sqlite3Fts3ExprInitTestInterface(db);
+#endif
+
    /* Create the virtual table wrapper around the hash-table and overload 
    ** the two scalar functions. If this is successful, register the
    ** module with sqlite.
diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c

new file mode 100644 (file)

index 0000000..546dcba
--- /dev/null
+++ b/ext/fts3/fts3_expr.c
@@ -0,0 +1,864 @@
+/*
+** 2008 Nov 28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This module contains code that implements a parser for fts3 query strings
+** (the right-hand argument to the MATCH operator). Because the supported 
+** syntax is relatively simple, the whole tokenizer/parser system is
+** hand-coded. The public interface to this module is declared in source
+** code file "fts3_expr.h".
+*/
+
+/*
+** By default, this module parses the legacy syntax that has been 
+** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
+** is defined, then it uses the new syntax. The differences between
+** the new and the old syntaxes are:
+**
+**  a) The new syntax supports parenthesis. The old does not.
+**
+**  b) The new syntax supports the AND and NOT operators. The old does not.
+**
+**  c) The old syntax supports the "-" token qualifier. This is not 
+**     supported by the new syntax (it is replaced by the NOT operator).
+**
+**  d) When using the old syntax, the OR operator has a greater precedence
+**     than an implicit AND. When using the new, both implicity and explicit
+**     AND operators have a higher precedence than OR.
+**
+** If compiled with SQLITE_TEST defined, then this module exports the
+** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
+** to zero causes the module to use the old syntax. If it is set to 
+** non-zero the new syntax is activated. This is so both syntaxes can
+** be tested using a single build of testfixture.
+*/
+#ifdef SQLITE_TEST
+int sqlite3_fts3_enable_parentheses = 0;
+#else
+# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS 
+#  define sqlite3_fts3_enable_parentheses 1
+# else
+#  define sqlite3_fts3_enable_parentheses 0
+# endif
+#endif
+
+/*
+** Default span for NEAR operators.
+*/
+#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
+
+#include "fts3_expr.h"
+#include "sqlite3.h"
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+
+typedef struct ParseContext ParseContext;
+struct ParseContext {
+  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
+  const char **azCol;                 /* Array of column names for fts3 table */
+  int nCol;                           /* Number of entries in azCol[] */
+  int iDefaultCol;                    /* Default column to query */
+  sqlite3_context *pCtx;              /* Write error message here */
+  int nNest;                          /* Number of nested brackets */
+};
+
+/*
+** This function is equivalent to the standard isspace() function. 
+**
+** The standard isspace() can be awkward to use safely, because although it
+** is defined to accept an argument of type int, its behaviour when passed
+** an integer that falls outside of the range of the unsigned char type
+** is undefined (and sometimes, "undefined" means segfault). This wrapper
+** is defined to accept an argument of type char, and always returns 0 for
+** any values that fall outside of the range of the unsigned char type (i.e.
+** negative values).
+*/
+static int safe_isspace(char c){
+  return (c&0x80)==0 ? isspace(c) : 0;
+}
+
+/*
+** Extract the next token from buffer z (length n) using the tokenizer
+** and other information (column names etc.) in pParse. Create an Fts3Expr
+** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
+** single token and set *ppExpr to point to it. If the end of the buffer is
+** reached before a token is found, set *ppExpr to zero. It is the
+** responsibility of the caller to eventually deallocate the allocated 
+** Fts3Expr structure (if any) by passing it to sqlite3_free().
+**
+** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
+** fails.
+*/
+static int getNextToken(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  int iCol,                               /* Value for Fts3Phrase.iColumn */
+  const char *z, int n,                   /* Input string */
+  Fts3Expr **ppExpr,                      /* OUT: expression */
+  int *pnConsumed                         /* OUT: Number of bytes consumed */
+){
+  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  int rc;
+  sqlite3_tokenizer_cursor *pCursor;
+  Fts3Expr *pRet = 0;
+  int nConsumed = 0;
+
+  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
+  if( rc==SQLITE_OK ){
+    const char *zToken;
+    int nToken, iStart, iEnd, iPosition;
+    int nByte;                               /* total space to allocate */
+
+    pCursor->pTokenizer = pTokenizer;
+    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
+
+    if( rc==SQLITE_OK ){
+      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
+      pRet = (Fts3Expr *)sqlite3_malloc(nByte);
+      if( !pRet ){
+        rc = SQLITE_NOMEM;
+      }else{
+        memset(pRet, 0, nByte);
+        pRet->eType = FTSQUERY_PHRASE;
+        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
+        pRet->pPhrase->nToken = 1;
+        pRet->pPhrase->iColumn = iCol;
+        pRet->pPhrase->aToken[0].n = nToken;
+        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
+        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
+
+        if( iEnd<n && z[iEnd]=='*' ){
+          pRet->pPhrase->aToken[0].isPrefix = 1;
+          iEnd++;
+        }
+        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
+          pRet->pPhrase->isNot = 1;
+        }
+      }
+    }
+    nConsumed = iEnd;
+
+    pModule->xClose(pCursor);
+  }
+  
+  *pnConsumed = nConsumed;
+  *ppExpr = pRet;
+  return rc;
+}
+
+void realloc_or_free(void **ppOrig, int nNew){
+  void *pRet = sqlite3_realloc(*ppOrig, nNew);
+  if( !pRet ){
+    sqlite3_free(*ppOrig);
+  }
+  *ppOrig = pRet;
+}
+
+/*
+** Buffer zInput, length nInput, contains the contents of a quoted string
+** that appeared as part of an fts3 query expression. Neither quote character
+** is included in the buffer. This function attempts to tokenize the entire
+** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE 
+** containing the results.
+**
+** If successful, SQLITE_OK is returned and *ppExpr set to point at the
+** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
+** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
+** to 0.
+*/
+static int getNextString(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  const char *zInput, int nInput,         /* Input string */
+  Fts3Expr **ppExpr                       /* OUT: expression */
+){
+  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  int rc;
+  Fts3Expr *p = 0;
+  sqlite3_tokenizer_cursor *pCursor = 0;
+  char *zTemp = 0;
+  int nTemp = 0;
+
+  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
+  if( rc==SQLITE_OK ){
+    int ii;
+    pCursor->pTokenizer = pTokenizer;
+    for(ii=0; rc==SQLITE_OK; ii++){
+      const char *zToken;
+      int nToken, iBegin, iEnd, iPos;
+      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
+      if( rc==SQLITE_OK ){
+        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
+        realloc_or_free((void **)&p, nByte+ii*sizeof(struct PhraseToken));
+        realloc_or_free((void **)&zTemp, nTemp + nToken);
+        if( !p || !zTemp ){
+          goto no_mem;
+        }
+        if( ii==0 ){
+          memset(p, 0, nByte);
+          p->pPhrase = (Fts3Phrase *)&p[1];
+          p->eType = FTSQUERY_PHRASE;
+          p->pPhrase->iColumn = pParse->iDefaultCol;
+        }
+        p->pPhrase = (Fts3Phrase *)&p[1];
+        p->pPhrase->nToken = ii+1;
+        p->pPhrase->aToken[ii].n = nToken;
+        memcpy(&zTemp[nTemp], zToken, nToken);
+        nTemp += nToken;
+        if( iEnd<nInput && zInput[iEnd]=='*' ){
+          p->pPhrase->aToken[ii].isPrefix = 1;
+        }else{
+          p->pPhrase->aToken[ii].isPrefix = 0;
+        }
+      }
+    }
+
+    pModule->xClose(pCursor);
+    pCursor = 0;
+  }
+
+  if( rc==SQLITE_DONE ){
+    int jj;
+    char *zNew;
+    int nNew = 0;
+    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
+    nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken);
+    realloc_or_free((void **)&p, nByte + nTemp);
+    if( !p ){
+      goto no_mem;
+    }
+    p->pPhrase = (Fts3Phrase *)&p[1];
+    zNew = &(((char *)p)[nByte]);
+    memcpy(zNew, zTemp, nTemp);
+    for(jj=0; jj<p->pPhrase->nToken; jj++){
+      p->pPhrase->aToken[jj].z = &zNew[nNew];
+      nNew += p->pPhrase->aToken[jj].n;
+    }
+    sqlite3_free(zTemp);
+    rc = SQLITE_OK;
+  }
+
+  *ppExpr = p;
+  return rc;
+no_mem:
+
+  if( pCursor ){
+    pModule->xClose(pCursor);
+  }
+  sqlite3_free(zTemp);
+  sqlite3_free(p);
+  *ppExpr = 0;
+  return SQLITE_NOMEM;
+}
+
+/*
+** Function getNextNode(), which is called by fts3ExprParse(), may itself
+** call fts3ExprParse(). So this forward declaration is required.
+*/
+static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
+
+/*
+** The output variable *ppExpr is populated with an allocated Fts3Expr 
+** structure, or set to 0 if the end of the input buffer is reached.
+**
+** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
+** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
+** If SQLITE_ERROR is returned, pContext is populated with an error message.
+*/
+static int getNextNode(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  const char *z, int n,                   /* Input string */
+  Fts3Expr **ppExpr,                      /* OUT: expression */
+  int *pnConsumed                         /* OUT: Number of bytes consumed */
+){
+  struct Fts3Keyword {
+    char *z;
+    int n;
+    int eType;
+  } aKeyword[] = {
+    { "OR" ,  2, FTSQUERY_OR   },
+    { "AND",  3, FTSQUERY_AND  },
+    { "NOT",  3, FTSQUERY_NOT  },
+    { "NEAR", 4, FTSQUERY_NEAR }
+  };
+  int ii;
+  int iCol;
+  int iColLen;
+  int rc;
+  Fts3Expr *pRet = 0;
+
+  const char *zInput = z;
+  int nInput = n;
+
+  /* Skip over any whitespace before checking for a keyword, an open or
+  ** close bracket, or a quoted string. 
+  */
+  while( nInput>0 && safe_isspace(*zInput) ){
+    nInput--;
+    zInput++;
+  }
+
+  /* See if we are dealing with a keyword. */
+  for(ii=0; ii<sizeof(aKeyword)/sizeof(struct Fts3Keyword); ii++){
+    struct Fts3Keyword *pKey = &aKeyword[ii];
+
+    if( (0==sqlite3_fts3_enable_parentheses)
+     && (pKey->eType==FTSQUERY_AND || pKey->eType==FTSQUERY_NOT) 
+    ){
+      continue;
+    }
+
+    if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
+      int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
+      int nKey = pKey->n;
+      char cNext;
+
+      /* If this is a "NEAR" keyword, check for an explicit nearness. */
+      if( pKey->eType==FTSQUERY_NEAR ){
+        assert( nKey==4 );
+        if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
+          nNear = 0;
+          for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
+            nNear = nNear * 10 + (zInput[nKey] - '0');
+          }
+        }
+      }
+
+      /* At this point this is probably a keyword. But for that to be true,
+      ** the next byte must contain either whitespace, an open or close
+      ** bracket, a quote character, or EOF. 
+      */
+      cNext = zInput[nKey];
+      if( safe_isspace(cNext) 
+       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
+      ){
+        pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
+        memset(pRet, 0, sizeof(Fts3Expr));
+        pRet->eType = pKey->eType;
+        pRet->nNear = nNear;
+        *ppExpr = pRet;
+        *pnConsumed = (zInput - z) + nKey;
+        return SQLITE_OK;
+      }
+
+      /* Turns out that wasn't a keyword after all. This happens if the
+      ** user has supplied a token such as "ORacle". Continue.
+      */
+    }
+  }
+
+  /* Check for an open bracket. */
+  if( sqlite3_fts3_enable_parentheses ){
+    if( *zInput=='(' ){
+      int nConsumed;
+      int rc;
+      pParse->nNest++;
+      rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
+      *pnConsumed = (zInput - z) + 1 + nConsumed;
+      return rc;
+    }
+  
+    /* Check for a close bracket. */
+    if( *zInput==')' ){
+      pParse->nNest--;
+      *pnConsumed = (zInput - z) + 1;
+      return SQLITE_DONE;
+    }
+  }
+
+  /* See if we are dealing with a quoted phrase. If this is the case, then
+  ** search for the closing quote and pass the whole string to getNextString()
+  ** for processing. This is easy to do, as fts3 has no syntax for escaping
+  ** a quote character embedded in a string.
+  */
+  if( *zInput=='"' ){
+    for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
+    *pnConsumed = (zInput - z) + ii + 1;
+    if( ii==nInput ){
+      return SQLITE_ERROR;
+    }
+    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
+  }
+
+
+  /* If control flows to this point, this must be a regular token, or 
+  ** the end of the input. Read a regular token using the sqlite3_tokenizer
+  ** interface. Before doing so, figure out if there is an explicit
+  ** column specifier for the token. 
+  **
+  ** TODO: Strangely, it is not possible to associate a column specifier
+  ** with a quoted phrase, only with a single token. Not sure if this was
+  ** an implementation artifact or an intentional decision when fts3 was
+  ** first implemented. Whichever it was, this module duplicates the 
+  ** limitation.
+  */
+  iCol = pParse->iDefaultCol;
+  iColLen = 0;
+  for(ii=0; ii<pParse->nCol; ii++){
+    const char *zStr = pParse->azCol[ii];
+    int nStr = strlen(zStr);
+    if( nInput>nStr && zInput[nStr]==':' && memcmp(zStr, zInput, nStr)==0 ){
+      iCol = ii;
+      iColLen = ((zInput - z) + nStr + 1);
+      break;
+    }
+  }
+  rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
+  *pnConsumed += iColLen;
+  return rc;
+}
+
+/*
+** The argument is an Fts3Expr structure for a binary operator (any type
+** except an FTSQUERY_PHRASE). Return an integer value representing the
+** precedence of the operator. Lower values have a higher precedence (i.e.
+** group more tightly). For example, in the C language, the == operator
+** groups more tightly than ||, and would therefore have a higher precedence.
+**
+** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
+** is defined), the order of the operators in precedence from highest to
+** lowest is:
+**
+**   NEAR
+**   NOT
+**   AND (including implicit ANDs)
+**   OR
+**
+** Note that when using the old query syntax, the OR operator has a higher
+** precedence than the AND operator.
+*/
+static int opPrecedence(Fts3Expr *p){
+  assert( p->eType!=FTSQUERY_PHRASE );
+  if( sqlite3_fts3_enable_parentheses ){
+    return p->eType;
+  }else if( p->eType==FTSQUERY_NEAR ){
+    return 1;
+  }else if( p->eType==FTSQUERY_OR ){
+    return 2;
+  }
+  assert( p->eType==FTSQUERY_AND );
+  return 3;
+}
+
+/*
+** Argument ppHead contains a pointer to the current head of a query 
+** expression tree being parsed. pPrev is the expression node most recently
+** inserted into the tree. This function adds pNew, which is always a binary
+** operator node, into the expression tree based on the relative precedence
+** of pNew and the existing nodes of the tree. This may result in the head
+** of the tree changing, in which case *ppHead is set to the new root node.
+*/
+static void insertBinaryOperator(
+  Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
+  Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
+  Fts3Expr *pNew           /* New binary node to insert into expression tree */
+){
+  Fts3Expr *pSplit = pPrev;
+  while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
+    pSplit = pSplit->pParent;
+  }
+
+  if( pSplit->pParent ){
+    assert( pSplit->pParent->pRight==pSplit );
+    pSplit->pParent->pRight = pNew;
+    pNew->pParent = pSplit->pParent;
+  }else{
+    *ppHead = pNew;
+  }
+  pNew->pLeft = pSplit;
+  pSplit->pParent = pNew;
+}
+
+/*
+** Parse the fts3 query expression found in buffer z, length n. This function
+** returns either when the end of the buffer is reached or an unmatched 
+** closing bracket - ')' - is encountered.
+**
+** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
+** parsed form of the expression and *pnConsumed is set to the number of
+** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
+** (out of memory error) or SQLITE_ERROR (parse error) is returned.
+*/
+static int fts3ExprParse(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  const char *z, int n,                   /* Text of MATCH query */
+  Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
+  int *pnConsumed                         /* OUT: Number of bytes consumed */
+){
+  Fts3Expr *pRet = 0;
+  Fts3Expr *pPrev = 0;
+  Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
+  int nIn = n;
+  const char *zIn = z;
+  int rc = SQLITE_OK;
+  int isRequirePhrase = 1;
+
+  while( rc==SQLITE_OK ){
+    Fts3Expr *p = 0;
+    int nByte;
+    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
+    if( rc==SQLITE_OK ){
+      int isPhrase;
+
+      if( !sqlite3_fts3_enable_parentheses 
+       && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot 
+      ){
+        /* Create an implicit NOT operator. */
+        Fts3Expr *pNot = sqlite3_malloc(sizeof(Fts3Expr));
+        if( !pNot ){
+          sqlite3Fts3ExprFree(p);
+          rc = SQLITE_NOMEM;
+          goto exprparse_out;
+        }
+        memset(pNot, 0, sizeof(Fts3Expr));
+        pNot->eType = FTSQUERY_NOT;
+        pNot->pRight = p;
+        if( pNotBranch ){
+          pNotBranch->pLeft = p;
+          pNot->pRight = pNotBranch;
+        }
+        pNotBranch = pNot;
+      }else{
+        assert( p->eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
+        isPhrase = (p->eType==FTSQUERY_PHRASE || p->pLeft);
+        if( !isPhrase && isRequirePhrase ){
+          sqlite3Fts3ExprFree(p);
+          rc = SQLITE_ERROR;
+          goto exprparse_out;
+        }
+  
+        if( isPhrase && !isRequirePhrase ){
+          /* Insert an implicit AND operator. */
+          Fts3Expr *pAnd;
+          assert( pRet && pPrev );
+          pAnd = sqlite3_malloc(sizeof(Fts3Expr));
+          if( !pAnd ){
+            sqlite3Fts3ExprFree(p);
+            rc = SQLITE_NOMEM;
+            goto exprparse_out;
+          }
+          memset(pAnd, 0, sizeof(Fts3Expr));
+          pAnd->eType = FTSQUERY_AND;
+          insertBinaryOperator(&pRet, pPrev, pAnd);
+          pPrev = pAnd;
+        }
+
+        if( pPrev && (
+            (pPrev->eType==FTSQUERY_NEAR && p->eType!=FTSQUERY_PHRASE)
+         || (p->eType==FTSQUERY_NEAR && pPrev->eType!=FTSQUERY_PHRASE) 
+        )){
+          /* This is an attempt to do "phrase NEAR (bracketed expression)"
+          ** or "(bracketed expression) NEAR phrase", both of which are
+          ** illegal. Return an error.
+          */
+          sqlite3Fts3ExprFree(p);
+          rc = SQLITE_ERROR;
+          goto exprparse_out;
+        }
+  
+        if( isPhrase ){
+          if( pRet ){
+            assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
+            pPrev->pRight = p;
+            p->pParent = pPrev;
+          }else{
+            pRet = p;
+          }
+        }else{
+          insertBinaryOperator(&pRet, pPrev, p);
+        }
+        isRequirePhrase = !isPhrase;
+      }
+      assert( nByte>0 );
+    }
+    nIn -= nByte;
+    zIn += nByte;
+    pPrev = p;
+  }
+
+  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
+    rc = SQLITE_ERROR;
+  }
+
+  if( rc==SQLITE_DONE ){
+    rc = SQLITE_OK;
+    if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
+      if( !pRet ){
+        rc = SQLITE_ERROR;
+      }else{
+        pNotBranch->pLeft = pRet;
+        pRet = pNotBranch;
+      }
+    }
+  }
+  *pnConsumed = n - nIn;
+
+exprparse_out:
+  if( rc!=SQLITE_OK ){
+    sqlite3Fts3ExprFree(pRet);
+    sqlite3Fts3ExprFree(pNotBranch);
+    pRet = 0;
+  }
+  *ppExpr = pRet;
+  return rc;
+}
+
+/*
+** Parameters z and n contain a pointer to and length of a buffer containing
+** an fts3 query expression, respectively. This function attempts to parse the
+** query expression and create a tree of Fts3Expr structures representing the
+** parsed expression. If successful, *ppExpr is set to point to the head
+** of the parsed expression tree and SQLITE_OK is returned. If an error
+** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
+** error) is returned and *ppExpr is set to 0.
+**
+** If parameter n is a negative number, then z is assumed to point to a
+** nul-terminated string and the length is determined using strlen().
+**
+** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
+** use to normalize query tokens while parsing the expression. The azCol[]
+** array, which is assumed to contain nCol entries, should contain the names
+** of each column in the target fts3 table, in order from left to right. 
+** Column names must be nul-terminated strings.
+**
+** The iDefaultCol parameter should be passed the index of the table column
+** that appears on the left-hand-side of the MATCH operator (the default
+** column to match against for tokens for which a column name is not explicitly
+** specified as part of the query string), or -1 if tokens may by default
+** match any table column.
+*/
+int sqlite3Fts3ExprParse(
+  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
+  char **azCol,                       /* Array of column names for fts3 table */
+  int nCol,                           /* Number of entries in azCol[] */
+  int iDefaultCol,                    /* Default column to query */
+  const char *z, int n,               /* Text of MATCH query */
+  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
+){
+  int nParsed;
+  int rc;
+  ParseContext sParse;
+  sParse.pTokenizer = pTokenizer;
+  sParse.azCol = (const char **)azCol;
+  sParse.nCol = nCol;
+  sParse.iDefaultCol = iDefaultCol;
+  sParse.nNest = 0;
+  if( z==0 ){
+    *ppExpr = 0;
+    return SQLITE_OK;
+  }
+  if( n<0 ){
+    n = strlen(z);
+  }
+  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
+
+  /* Check for mismatched parenthesis */
+  if( rc==SQLITE_OK && sParse.nNest ){
+    rc = SQLITE_ERROR;
+    sqlite3Fts3ExprFree(*ppExpr);
+    *ppExpr = 0;
+  }
+
+  return rc;
+}
+
+/*
+** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+*/
+void sqlite3Fts3ExprFree(Fts3Expr *p){
+  if( p ){
+    sqlite3Fts3ExprFree(p->pLeft);
+    sqlite3Fts3ExprFree(p->pRight);
+    sqlite3_free(p);
+  }
+}
+
+/****************************************************************************
+*****************************************************************************
+** Everything after this point is just test code.
+*/
+
+#ifdef SQLITE_TEST
+
+#include <stdio.h>
+
+/*
+** Function to query the hash-table of tokenizers (see README.tokenizers).
+*/
+static int queryTokenizer(
+  sqlite3 *db, 
+  const char *zName,  
+  const sqlite3_tokenizer_module **pp
+){
+  int rc;
+  sqlite3_stmt *pStmt;
+  const char zSql[] = "SELECT fts3_tokenizer(?)";
+
+  *pp = 0;
+  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+  if( SQLITE_ROW==sqlite3_step(pStmt) ){
+    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
+      memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
+    }
+  }
+
+  return sqlite3_finalize(pStmt);
+}
+
+/*
+** This function is part of the test interface for the query parser. It
+** writes a text representation of the query expression pExpr into the
+** buffer pointed to by argument zBuf. It is assumed that zBuf is large 
+** enough to store the required text representation.
+*/
+static void exprToString(Fts3Expr *pExpr, char *zBuf){
+  switch( pExpr->eType ){
+    case FTSQUERY_PHRASE: {
+      Fts3Phrase *pPhrase = pExpr->pPhrase;
+      int i;
+      zBuf += sprintf(zBuf, "PHRASE %d %d", pPhrase->iColumn, pPhrase->isNot);
+      for(i=0; i<pPhrase->nToken; i++){
+        zBuf += sprintf(zBuf," %.*s",pPhrase->aToken[i].n,pPhrase->aToken[i].z);
+        zBuf += sprintf(zBuf,"%s", (pPhrase->aToken[i].isPrefix?"+":""));
+      }
+      return;
+    }
+
+    case FTSQUERY_NEAR:
+      zBuf += sprintf(zBuf, "NEAR/%d ", pExpr->nNear);
+      break;
+    case FTSQUERY_NOT:
+      zBuf += sprintf(zBuf, "NOT ");
+      break;
+    case FTSQUERY_AND:
+      zBuf += sprintf(zBuf, "AND ");
+      break;
+    case FTSQUERY_OR:
+      zBuf += sprintf(zBuf, "OR ");
+      break;
+  }
+
+  zBuf += sprintf(zBuf, "{");
+  exprToString(pExpr->pLeft, zBuf);
+  zBuf += strlen(zBuf);
+  zBuf += sprintf(zBuf, "} ");
+
+  zBuf += sprintf(zBuf, "{");
+  exprToString(pExpr->pRight, zBuf);
+  zBuf += strlen(zBuf);
+  zBuf += sprintf(zBuf, "}");
+}
+
+/*
+** This is the implementation of a scalar SQL function used to test the 
+** expression parser. It should be called as follows:
+**
+**   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
+**
+** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
+** to parse the query expression (see README.tokenizers). The second argument
+** is the query expression to parse. Each subsequent argument is the name
+** of a column of the fts3 table that the query expression may refer to.
+** For example:
+**
+**   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
+*/
+static void fts3ExprTest(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  sqlite3_tokenizer_module const *pModule = 0;
+  sqlite3_tokenizer *pTokenizer;
+  int rc;
+  char **azCol = 0;
+  const char *zExpr;
+  int nExpr;
+  int nCol;
+  int ii;
+  Fts3Expr *pExpr;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+
+  if( argc<3 ){
+    sqlite3_result_error(context, 
+        "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
+    );
+    return;
+  }
+
+  rc = queryTokenizer(db, (const char *)sqlite3_value_text(argv[0]), &pModule);
+  if( rc==SQLITE_NOMEM ){
+    sqlite3_result_error_nomem(context);
+    goto exprtest_out;
+  }else if( !pModule ){
+    sqlite3_result_error(context, "No such tokenizer module", -1);
+    goto exprtest_out;
+  }
+
+  rc = pModule->xCreate(0, 0, &pTokenizer);
+  assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
+  if( rc==SQLITE_NOMEM ){
+    sqlite3_result_error_nomem(context);
+    goto exprtest_out;
+  }
+  pTokenizer->pModule = pModule;
+
+  zExpr = (const char *)sqlite3_value_text(argv[1]);
+  nExpr = sqlite3_value_bytes(argv[1]);
+  nCol = argc-2;
+  azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
+  if( !azCol ){
+    sqlite3_result_error_nomem(context);
+    goto exprtest_out;
+  }
+  for(ii=0; ii<nCol; ii++){
+    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
+  }
+
+  rc = sqlite3Fts3ExprParse(
+      pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
+  );
+  if( rc==SQLITE_NOMEM ){
+    sqlite3_result_error_nomem(context);
+    goto exprtest_out;
+  }else if( rc==SQLITE_OK ){
+    char zBuf[4096];
+    exprToString(pExpr, zBuf);
+    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+    sqlite3Fts3ExprFree(pExpr);
+  }else{
+    sqlite3_result_error(context, "Error parsing expression", -1);
+  }
+
+exprtest_out:
+  if( pTokenizer ){
+    rc = pModule->xDestroy(pTokenizer);
+  }
+  sqlite3_free(azCol);
+}
+
+/*
+** Register the query expression parser test function fts3_exprtest() 
+** with database connection db. 
+*/
+void sqlite3Fts3ExprInitTestInterface(sqlite3* db){
+  sqlite3_create_function(
+      db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
+  );
+}
+
+#endif
diff --git a/ext/fts3/fts3_expr.h b/ext/fts3/fts3_expr.h

new file mode 100644 (file)

index 0000000..df60150
--- /dev/null
+++ b/ext/fts3/fts3_expr.h
@@ -0,0 +1,87 @@
+/*
+** 2008 Nov 28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+
+#include "fts3_tokenizer.h"
+#include "sqlite3.h"
+
+/*
+** The following describes the syntax supported by the fts3 MATCH
+** operator in a similar format to that used by the lemon parser
+** generator. This module does not use actually lemon, it uses a
+** custom parser.
+**
+**   phrase ::= TOKEN.
+**   phrase ::= TOKEN:COLUMN.
+**   phrase ::= "TOKEN TOKEN TOKEN...".
+**   phrase ::= phrase near phrase.
+**
+**   near ::= NEAR.
+**   near ::= NEAR / INTEGER.
+**
+**   query ::= -TOKEN.
+**   query ::= phrase.
+**   query ::= LP query RP.
+**   query ::= query NOT query.
+**   query ::= query OR query.
+**   query ::= query AND query.
+*/
+
+typedef struct Fts3Expr Fts3Expr;
+typedef struct Fts3Phrase Fts3Phrase;
+
+struct Fts3Phrase {
+  int nToken;          /* Number of entries in aToken[] */
+  int iColumn;         /* Index of column this phrase must match */
+  int isNot;           /* Phrase prefixed by unary not (-) operator */
+  struct PhraseToken {
+    char *z;
+    int n;             /* Number of bytes in buffer pointed to by z */
+    int isPrefix;      /* True if token ends in with a "*" character */
+  } aToken[1];
+};
+
+struct Fts3Expr {
+  int eType;                 /* One of the FTSQUERY_XXX values defined below */
+  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
+  Fts3Expr *pParent;
+  Fts3Expr *pLeft;
+  Fts3Expr *pRight;
+  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
+};
+
+int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int, 
+                         const char *, int, Fts3Expr **);
+void sqlite3Fts3ExprFree(Fts3Expr *);
+
+/*
+** Candidate values for Fts3Query.eType. Note that the order of the first
+** four values is in order of precedence when parsing expressions. For 
+** example, the following:
+**
+**   "a OR b AND c NOT d NEAR e"
+**
+** is equivalent to:
+**
+**   "a OR (b AND (c NOT (d NEAR e)))"
+*/
+#define FTSQUERY_NEAR   1
+#define FTSQUERY_NOT    2
+#define FTSQUERY_AND    3
+#define FTSQUERY_OR     4
+#define FTSQUERY_PHRASE 5
+
+#ifdef SQLITE_TEST
+void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
+#endif
+
diff --git a/main.mk b/main.mk

index d3c33517ace3efc6e3f3d7622a6061886337b5fb..b693a6839a8d4a5baa80cfa6d9bdd1cb9ba5e3f4 100644 (file)
--- a/main.mk
+++ b/main.mk
@@ -52,7 +52,7 @@ TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
  LIBOBJ+= alter.o analyze.o attach.o auth.o \
           bitvec.o btmutex.o btree.o build.o \
           callback.o complete.o date.o delete.o expr.o fault.o \
-         fts3.o fts3_hash.o fts3_icu.o fts3_porter.o \
+         fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
           fts3_tokenizer.o fts3_tokenizer1.o \
           func.o global.o hash.o \
           icu.o insert.o journal.o legacy.o loadext.o \
@@ -178,6 +178,8 @@ SRC += \
  SRC += \
    $(TOP)/ext/fts3/fts3.c \
    $(TOP)/ext/fts3/fts3.h \
+  $(TOP)/ext/fts3/fts3_expr.c \
+  $(TOP)/ext/fts3/fts3_expr.h \
    $(TOP)/ext/fts3/fts3_hash.c \
    $(TOP)/ext/fts3/fts3_hash.h \
    $(TOP)/ext/fts3/fts3_icu.c \
@@ -247,7 +249,8 @@ TESTSRC2 = \
    $(TOP)/src/pcache1.c $(TOP)/src/select.c $(TOP)/src/tokenize.c               \
    $(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \
    $(TOP)/src/vdbe.c $(TOP)/src/vdbemem.c $(TOP)/src/where.c parse.c            \
-  $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_tokenizer.c 
+  $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_expr.c                           \
+  $(TOP)/ext/fts3/fts3_tokenizer.c 
  
  # Header files used by all library source files.
  #
@@ -283,6 +286,7 @@ EXTHDR += \
    $(TOP)/ext/fts2/fts2_tokenizer.h
  EXTHDR += \
    $(TOP)/ext/fts3/fts3.h \
+  $(TOP)/ext/fts3/fts3_expr.h \
    $(TOP)/ext/fts3/fts3_hash.h \
    $(TOP)/ext/fts3/fts3_tokenizer.h
  EXTHDR += \
@@ -412,6 +416,9 @@ fts2_tokenizer1.o:  $(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR)
  fts3.o:        $(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR)
         $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c
  
+fts3_expr.o:   $(TOP)/ext/fts3/fts3_expr.c $(HDR) $(EXTHDR)
+       $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_expr.c
+
  fts3_hash.o:   $(TOP)/ext/fts3/fts3_hash.c $(HDR) $(EXTHDR)
         $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_hash.c
  
diff --git a/manifest b/manifest

index ab531a9faa0b59227caaa2b6ff71ec57936b5390..770a260a3c466cf664e4ca1e762ed2a9bb1302bb 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Version\s3.6.7\s(CVS\s6033)
-D 2008-12-16T18:15:59
+C Modify\sfts3\sto\ssupport\sa\smore\scomplex\sexpression\ssyntax\sthat\sallows\sparenthesis.\sThe\snew\ssyntax\sis\snot\sentirely\sbackwards\scompatible,\sso\sis\sdisabled\sby\sdefault.\sUse\s-DSQLITE_ENABLE_FTS3_PARENTHESIS\sto\senable\sit.\s(CVS\s6034)
+D 2008-12-17T15:18:18
  F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
  F Makefile.in f7e4c81c347b04f7b0f1c1b081a168645d7b8af7
  F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -52,8 +52,10 @@ F ext/fts2/fts2_tokenizer1.c 8545ce12b41922004da46e91a7b023b92b76f94e
  F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
  F ext/fts3/README.tokenizers 226644a0eab97724e8de83061912e8bb248461b6
  F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c e67453b6ac421b79e600385491ed7f038b3bb271
+F ext/fts3/fts3.c 3aa6aef1eadc44606f6ed3c841062735a5210077
  F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
+F ext/fts3/fts3_expr.c b141145197cc749accb03d2b970813443b723edd
+F ext/fts3/fts3_expr.h 4dad4d87cf5d41ea924a815fe89a6f87dc76f277
  F ext/fts3/fts3_hash.c e15e84d18f8df149ab290029872d4559c4c7c15a
  F ext/fts3/fts3_hash.h 004b759e1602ff16dfa02fea3ca1c77336ad6798
  F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
@@ -80,7 +82,7 @@ F ext/rtree/tkt3363.test 6662237ea75bb431cd5d262dfc9535e1023315fc
  F ext/rtree/viewrtree.tcl 09526398dae87a5a87c5aac2b3854dbaf8376869
  F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
  F ltmain.sh 09fe5815427dc7d0abb188bbcdf0e34896577210
-F main.mk 5923e75b5ac4b265f322597c3953dda7175f4405
+F main.mk f6eb58a66f942bf672ab58e74e30e72cad39b93f
  F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a
  F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f
  F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac
@@ -160,7 +162,7 @@ F src/sqliteLimit.h f435e728c6b620ef7312814d660a81f9356eb5c8
  F src/status.c 237b193efae0cf6ac3f0817a208de6c6c6ef6d76
  F src/table.c 23db1e5f27c03160987c122a078b4bb51ef0b2f8
  F src/tclsqlite.c 23afb60549af943e135ded441a631f4745be6040
-F src/test1.c 9c0502c3627162f969099e57932782057d9139b6
+F src/test1.c b193b8b80617bdb8297b25a87d00ee8d5a125d0d
  F src/test2.c 897528183edf2839c2a3c991d415905db56f1240
  F src/test3.c 88a246b56b824275300e6c899634fbac1dc94b14
  F src/test4.c f79ab52d27ff49b784b631a42e2ccd52cfd5c84c
@@ -364,6 +366,7 @@ F test/fts3b.test b3a25180a633873d37d86e1ccd00ed690d37237a
  F test/fts3c.test 4c7ef29b37aca3e8ebb6a39b57910caa6506034e
  F test/fts3d.test d92a47fe8ed59c9e53d2d8e6d2685bb380aadadc
  F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
+F test/fts3expr.test 18143e61503845b940fd7caacce53bce4307426f
  F test/fts3near.test e8a9b4e16c63a795918b334b74d4aec14815bf8b
  F test/func.test a50f0a4b69ac251debe1dce3ba29da7476dc8c52
  F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a
@@ -675,7 +678,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
  F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
  F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
  F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
-P 8b8f6a6ab597e06e60557ab56c6ee7f8522ed570
-R a6ff965a91307b50874e830368f8fcd6
-U drh
-Z 76064440713dce47dc110c16fc71382b
+P f4f40370fb83d677df3fbf2c51c4bb4a3e5ccc7a
+R 0170cc023fb1827148ff15e7bc02335a
+U danielk1977
+Z 3401f60bb1566cbc5da4c344a91c4fb9
diff --git a/manifest.uuid b/manifest.uuid

index a4df069db20a55b3c53692b8de9dea3fede03632..d2c9a8b4cff69fa333bbec29af2e19db5fb0099f 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-f4f40370fb83d677df3fbf2c51c4bb4a3e5ccc7a
-\ No newline at end of file
+7389b9ecb80294569845c40a23e0c832d07f7a45
+\ No newline at end of file
diff --git a/src/test1.c b/src/test1.c

index 37f5778932011aff276f9925160fc78706e0beb9..d73dec9367ac011d4cb32b9fc2eaeb131edb63af 100644 (file)
--- a/src/test1.c
+++ b/src/test1.c
@@ -13,7 +13,7 @@
  ** is not included in the SQLite library.  It is used for automated
  ** testing of the SQLite library.
  **
-** $Id: test1.c,v 1.337 2008/12/11 02:56:07 drh Exp $
+** $Id: test1.c,v 1.338 2008/12/17 15:18:18 danielk1977 Exp $
  */
  #include "sqliteInt.h"
  #include "tcl.h"
@@ -4930,6 +4930,9 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
    extern int sqlite3_enable_in_opt;
    extern char sqlite3_query_plan[];
    static char *query_plan = sqlite3_query_plan;
+#ifdef SQLITE_ENABLE_FTS3
+  extern int sqlite3_fts3_enable_parentheses;
+#endif
  #endif
  
    for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
@@ -5012,6 +5015,10 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
  #ifdef SQLITE_TEST
    Tcl_LinkVar(interp, "sqlite_enable_in_opt",
        (char*)&sqlite3_enable_in_opt, TCL_LINK_INT);
+#ifdef SQLITE_ENABLE_FTS3
+  Tcl_LinkVar(interp, "sqlite_fts3_enable_parentheses",
+      (char*)&sqlite3_fts3_enable_parentheses, TCL_LINK_INT);
+#endif
  #endif
    return TCL_OK;
  }
diff --git a/test/fts3expr.test b/test/fts3expr.test

new file mode 100644 (file)

index 0000000..2e6f5e1
--- /dev/null
+++ b/test/fts3expr.test
@@ -0,0 +1,258 @@
+# 2006 September 9
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library.  The
+# focus of this script is testing the FTS3 module.
+#
+# $Id: fts3expr.test,v 1.1 2008/12/17 15:18:18 danielk1977 Exp $
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts3 {
+  finish_test
+  return
+}
+
+set sqlite_fts3_enable_parentheses 1
+
+proc test_fts3expr {expr} {
+  db one {SELECT fts3_exprtest('simple', $expr, 'a', 'b', 'c')}
+}
+do_test fts3expr-1.0 {
+  test_fts3expr "abcd"
+} {PHRASE 3 0 abcd}
+do_test fts3expr-1.1 {
+  test_fts3expr " tag "
+} {PHRASE 3 0 tag}
+
+do_test fts3expr-1.2 {
+  test_fts3expr "ab AND cd"
+} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.3 {
+  test_fts3expr "ab OR cd"
+} {OR {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.4 {
+  test_fts3expr "ab NOT cd"
+} {NOT {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.5 {
+  test_fts3expr "ab NEAR cd"
+} {NEAR/10 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.6 {
+  test_fts3expr "ab NEAR/5 cd"
+} {NEAR/5 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+
+do_test fts3expr-1.7 {
+  test_fts3expr {"one two three"}
+} {PHRASE 3 0 one two three}
+do_test fts3expr-1.8 {
+  test_fts3expr {zero "one two three" four}
+} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
+do_test fts3expr-1.9 {
+  test_fts3expr {"one* two three*"}
+} {PHRASE 3 0 one+ two three+}
+
+do_test fts3expr-1.10 {
+  test_fts3expr {one* two}
+} {AND {PHRASE 3 0 one+} {PHRASE 3 0 two}}
+do_test fts3expr-1.11 {
+  test_fts3expr {one two*}
+} {AND {PHRASE 3 0 one} {PHRASE 3 0 two+}}
+
+do_test fts3expr-1.14 {
+  test_fts3expr {a:one two}
+} {AND {PHRASE 0 0 one} {PHRASE 3 0 two}}
+do_test fts3expr-1.15 {
+  test_fts3expr {one b:two}
+} {AND {PHRASE 3 0 one} {PHRASE 1 0 two}}
+
+proc strip_phrase_data {L} {
+  if {[lindex $L 0] eq "PHRASE"} {
+    return [lrange $L 3 end]
+  }
+  return [list \
+    [lindex $L 0] \
+    [strip_phrase_data [lindex $L 1]] \
+    [strip_phrase_data [lindex $L 2]] \
+  ]
+}
+proc test_fts3expr2 {expr} {
+  strip_phrase_data [
+    db one {SELECT fts3_exprtest('simple', $expr, 'a', 'b', 'c')}
+  ]
+}
+do_test fts3expr-2.1 {
+  test_fts3expr2 "ab OR cd AND ef"
+} {OR ab {AND cd ef}}
+do_test fts3expr-2.2 {
+  test_fts3expr2 "cd AND ef OR ab"
+} {OR {AND cd ef} ab}
+do_test fts3expr-2.3 {
+  test_fts3expr2 "ab AND cd AND ef OR gh"
+} {OR {AND {AND ab cd} ef} gh}
+do_test fts3expr-2.4 {
+  test_fts3expr2 "ab AND cd OR ef AND gh"
+} {OR {AND ab cd} {AND ef gh}}
+do_test fts3expr-2.5 {
+  test_fts3expr2 "ab cd"
+} {AND ab cd}
+
+do_test fts3expr-3.1 {
+  test_fts3expr2 "(ab OR cd) AND ef"
+} {AND {OR ab cd} ef}
+do_test fts3expr-3.2 {
+  test_fts3expr2 "ef AND (ab OR cd)"
+} {AND ef {OR ab cd}}
+do_test fts3expr-3.3 {
+  test_fts3expr2 "(ab OR cd)"
+} {OR ab cd}
+do_test fts3expr-3.4 {
+  test_fts3expr2 "(((ab OR cd)))"
+} {OR ab cd}
+
+#------------------------------------------------------------------------
+# The following tests, fts3expr-4.*, test the parsers response to syntax
+# errors in query expressions. This is done using a real fts3 table and
+# MATCH clauses, not the parser test interface.
+# 
+do_test fts3expr-4.1 {
+  execsql { CREATE VIRTUAL TABLE t1 USING fts3(a, b, c) }
+} {}
+
+# Mismatched parenthesis:
+do_test fts3expr-4.2.1 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example AND (hello OR world))' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.2.2 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example AND (hello OR world' }
+} {1 {SQL logic error or missing database}}
+
+# Unterminated quotation marks:
+do_test fts3expr-4.3.1 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example OR "hello world' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.3.2 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example OR hello world"' }
+} {1 {SQL logic error or missing database}}
+
+# Binary operators without the required operands.
+do_test fts3expr-4.4.1 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'OR hello world' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.4.2 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'hello world OR' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.4.3 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one (hello world OR) two' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.4.4 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one (OR hello world) two' }
+} {1 {SQL logic error or missing database}}
+
+# NEAR operators with something other than phrases as arguments.
+do_test fts3expr-4.5.1 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH '(hello OR world) NEAR one' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.5.2 {
+  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one NEAR (hello OR world)' }
+} {1 {SQL logic error or missing database}}
+
+#------------------------------------------------------------------------
+# The following OOM tests are designed to cover cases in fts3_expr.c.
+# 
+source $testdir/malloc_common.tcl
+do_malloc_test fts3expr-malloc-1 -sqlbody {
+  SELECT fts3_exprtest('simple', 'a b c "d e f"', 'a', 'b', 'c')
+}
+do_malloc_test fts3expr-malloc-2 -tclprep {
+  set sqlite_fts3_enable_parentheses 0
+} -sqlbody {
+  SELECT fts3_exprtest('simple', 'a -b', 'a', 'b', 'c')
+} -cleanup {
+  set sqlite_fts3_enable_parentheses 1
+}
+
+#------------------------------------------------------------------------
+# The following tests are not very important. They cover error handling
+# cases in the test code, which makes test coverage easier to measure.
+# 
+do_test fts3expr-5.1 {
+  catchsql { SELECT fts3_exprtest('simple', 'a b') }
+} {1 {Usage: fts3_exprtest(tokenizer, expr, col1, ...}}
+do_test fts3expr-5.2 {
+  catchsql { SELECT fts3_exprtest('doesnotexist', 'a b', 'c') }
+} {1 {No such tokenizer module}}
+do_test fts3expr-5.3 {
+  catchsql { SELECT fts3_exprtest('simple', 'a b OR', 'c') }
+} {1 {Error parsing expression}}
+
+#------------------------------------------------------------------------
+# The next set of tests verifies that things actually work as they are
+# supposed to when using the new syntax.
+# 
+do_test fts3expr-6.1 {
+  execsql {
+    CREATE VIRTUAL TABLE t1 USING fts3(a);
+  }
+  for {set ii 1} {$ii < 32} {incr ii} {
+    set v [list]
+    if {$ii & 1}  { lappend v one }
+    if {$ii & 2}  { lappend v two }
+    if {$ii & 4}  { lappend v three }
+    if {$ii & 8}  { lappend v four }
+    if {$ii & 16} { lappend v five }
+    execsql { INSERT INTO t1 VALUES($v) }
+  }
+
+  execsql {SELECT rowid FROM t1 WHERE t1 MATCH 'five four one' ORDER BY rowid}
+} {25 27 29 31}
+
+foreach {id expr res} {
+
+  2 "five four NOT one" {24 26 28 30}
+
+  3 "five AND four OR one" 
+      {1 3 5 7 9 11 13 15 17 19 21 23 24 25 26 27 28 29 30 31}
+
+  4 "five AND (four OR one)" {17 19 21 23 24 25 26 27 28 29 30 31}
+
+  5 "five NOT (four OR one)" {16 18 20 22}
+
+  6 "(five NOT (four OR one)) OR (five AND (four OR one))"
+      {16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+  7 "(five OR one) AND two AND three" {7 15 22 23 30 31}
+
+  8 "five OR one AND two AND three" 
+    {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+  9 "five OR one two three" 
+    {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+  10 "five OR \"one two three\"" 
+    {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+  11 "one two OR four five NOT three" {3 7 11 15 19 23 24 25 26 27 31}
+
+  12 "(one two OR four five) NOT three" {3 11 19 24 25 26 27}
+
+  13 "((((((one two OR four five)))))) NOT three" {3 11 19 24 25 26 27}
+
+} {
+  do_test fts3expr-6.$id {
+    execsql { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid }
+  } $res
+}
+
+set sqlite_fts3_enable_parentheses 0
+finish_test
+
author	danielk1977 <danielk1977@noemail.net>
	Wed, 17 Dec 2008 15:18:17 +0000 (15:18 +0000)
committer	danielk1977 <danielk1977@noemail.net>
	Wed, 17 Dec 2008 15:18:17 +0000 (15:18 +0000)
ext/fts3/fts3.c		patch \| blob \| blame \| history
ext/fts3/fts3_expr.c	[new file with mode: 0644]	patch \| blob
ext/fts3/fts3_expr.h	[new file with mode: 0644]	patch \| blob
main.mk		patch \| blob \| blame \| history
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/test1.c		patch \| blob \| blame \| history
test/fts3expr.test	[new file with mode: 0644]	patch \| blob