#include <ctype.h>
#include "fts3.h"
+#include "fts3_expr.h"
#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#ifndef SQLITE_CORE
# define FTSTRACE(A)
#endif
-/*
-** Default span for NEAR operators.
-*/
-#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
-
/* It is not safe to call isspace(), tolower(), or isalnum() on
** hi-bit-set characters. This is the same solution used in the
** tokenizer.
/* Forward reference */
typedef struct fulltext_vtab fulltext_vtab;
-/* A single term in a query is represented by an instances of
-** the following structure. Each word which may match against
-** document content is a term. Operators, like NEAR or OR, are
-** not terms. Query terms are organized as a flat list stored
-** in the Query.pTerms array.
-**
-** If the QueryTerm.nPhrase variable is non-zero, then the QueryTerm
-** is the first in a contiguous string of terms that are either part
-** of the same phrase, or connected by the NEAR operator.
-**
-** If the QueryTerm.nNear variable is non-zero, then the token is followed
-** by a NEAR operator with span set to (nNear-1). For example, the
-** following query:
-**
-** The QueryTerm.iPhrase variable stores the index of the token within
-** its phrase, indexed starting at 1, or 1 if the token is not part
-** of any phrase.
-**
-** For example, the data structure used to represent the following query:
-**
-** ... MATCH 'sqlite NEAR/5 google NEAR/2 "search engine"'
-**
-** is:
-**
-** {nPhrase=4, iPhrase=1, nNear=6, pTerm="sqlite"},
-** {nPhrase=0, iPhrase=1, nNear=3, pTerm="google"},
-** {nPhrase=0, iPhrase=1, nNear=0, pTerm="search"},
-** {nPhrase=0, iPhrase=2, nNear=0, pTerm="engine"},
-**
-** compiling the FTS3 syntax to Query structures is done by the parseQuery()
-** function.
-*/
-typedef struct QueryTerm {
- short int nPhrase; /* How many following terms are part of the same phrase */
- short int iPhrase; /* This is the i-th term of a phrase. */
- short int iColumn; /* Column of the index that must match this term */
- short int nNear; /* term followed by a NEAR operator with span=(nNear-1) */
- signed char isOr; /* this term is preceded by "OR" */
- signed char isNot; /* this term is preceded by "-" */
- signed char isPrefix; /* this term is followed by "*" */
- char *pTerm; /* text of the term. '\000' terminated. malloced */
- int nTerm; /* Number of bytes in pTerm[] */
-} QueryTerm;
-
-
-/* A query string is parsed into a Query structure.
- *
- * We could, in theory, allow query strings to be complicated
- * nested expressions with precedence determined by parentheses.
- * But none of the major search engines do this. (Perhaps the
- * feeling is that an parenthesized expression is two complex of
- * an idea for the average user to grasp.) Taking our lead from
- * the major search engines, we will allow queries to be a list
- * of terms (with an implied AND operator) or phrases in double-quotes,
- * with a single optional "-" before each non-phrase term to designate
- * negation and an optional OR connector.
- *
- * OR binds more tightly than the implied AND, which is what the
- * major search engines seem to do. So, for example:
- *
- * [one two OR three] ==> one AND (two OR three)
- * [one OR two three] ==> (one OR two) AND three
- *
- * A "-" before a term matches all entries that lack that term.
- * The "-" must occur immediately before the term with in intervening
- * space. This is how the search engines do it.
- *
- * A NOT term cannot be the right-hand operand of an OR. If this
- * occurs in the query string, the NOT is ignored:
- *
- * [one OR -two] ==> one OR two
- *
- */
-typedef struct Query {
- fulltext_vtab *pFts; /* The full text index */
- int nTerms; /* Number of terms in the query */
- QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */
- int nextIsOr; /* Set the isOr flag on the next inserted term */
- int nextIsNear; /* Set the isOr flag on the next inserted term */
- int nextColumn; /* Next word parsed must be in this column */
- int dfltColumn; /* The default column */
-} Query;
-
-
/*
** An instance of the following structure keeps track of generated
** matching-word offset information and snippets.
QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
int eof; /* True if at End Of Results */
- Query q; /* Parsed query string */
+ Fts3Expr *pExpr; /* Parsed MATCH query string */
Snippet snippet; /* Cached snippet for the current row */
int iColumn; /* Column being searched */
DataBuffer result; /* Doclist results from fulltextQuery */
DLReader reader; /* Result reader if result not empty */
} fulltext_cursor;
-static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
+static fulltext_vtab *cursor_vtab(fulltext_cursor *c){
return (fulltext_vtab *) c->base.pVtab;
}
}
}
-
-/* Free all of the dynamically allocated memory held by *q
-*/
-static void queryClear(Query *q){
- int i;
- for(i = 0; i < q->nTerms; ++i){
- sqlite3_free(q->pTerms[i].pTerm);
- }
- sqlite3_free(q->pTerms);
- CLEAR(q);
-}
-
/* Free all of the dynamically allocated memory held by the
** Snippet
*/
sqlite3_free(p->zSnippet);
CLEAR(p);
}
+
/*
** Append a single entry to the p->aMatch[] log.
*/
#define FTS3_ROTOR_SZ (32)
#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)
+/*
+** Function to iterate through the tokens of a compiled expression.
+*/
+static int nextExprToken(Fts3Expr **ppExpr, int *piToken){
+ Fts3Expr *p = *ppExpr;
+ int iToken = *piToken;
+ if( iToken<0 ){
+ /* In this case the expression p is the root of an expression tree.
+ ** Move to the first token in the expression tree.
+ */
+ while( p->pLeft ){
+ p = p->pLeft;
+ }
+ iToken = 0;
+ }else{
+ assert(p && p->eType==FTSQUERY_PHRASE );
+ if( iToken<(p->pPhrase->nToken-1) ){
+ iToken++;
+ }else{
+ iToken = 0;
+ while( p->pParent && p->pParent->pLeft!=p ){
+ assert( p->pParent->pRight==p );
+ p = p->pParent;
+ }
+ p = p->pParent;
+ if( p ){
+ p = p->pRight;
+ while( p->pLeft ){
+ p = p->pLeft;
+ }
+ }
+ }
+ }
+
+ *ppExpr = p;
+ *piToken = iToken;
+ return p?1:0;
+}
+
/*
** Add entries to pSnippet->aMatch[] for every match that occurs against
** document zDoc[0..nDoc-1] which is stored in column iColumn.
*/
static void snippetOffsetsOfColumn(
- Query *pQuery,
+ fulltext_cursor *pCur,
Snippet *pSnippet,
int iColumn,
const char *zDoc,
sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */
fulltext_vtab *pVtab; /* The full text index */
int nColumn; /* Number of columns in the index */
- const QueryTerm *aTerm; /* Query string terms */
- int nTerm; /* Number of query string terms */
int i, j; /* Loop counters */
int rc; /* Return code */
unsigned int match, prevMatch; /* Phrase search bitmasks */
int iRotorBegin[FTS3_ROTOR_SZ]; /* Beginning offset of token */
int iRotorLen[FTS3_ROTOR_SZ]; /* Length of token */
- pVtab = pQuery->pFts;
+ pVtab = cursor_vtab(pCur);
nColumn = pVtab->nColumn;
pTokenizer = pVtab->pTokenizer;
pTModule = pTokenizer->pModule;
rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
if( rc ) return;
pTCursor->pTokenizer = pTokenizer;
- aTerm = pQuery->pTerms;
- nTerm = pQuery->nTerms;
- if( nTerm>=FTS3_ROTOR_SZ ){
- nTerm = FTS3_ROTOR_SZ - 1;
- }
+
prevMatch = 0;
- while(1){
- rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
- if( rc ) break;
+ while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
+ Fts3Expr *pIter = pCur->pExpr;
+ int iIter = -1;
iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
match = 0;
- for(i=0; i<nTerm; i++){
- int iCol;
- iCol = aTerm[i].iColumn;
+ for(i=0; i<(FTS3_ROTOR_SZ-1) && nextExprToken(&pIter, &iIter); i++){
+ int nPhrase = pIter->pPhrase->nToken; /* Tokens in current phrase */
+ struct PhraseToken *pToken = &pIter->pPhrase->aToken[iIter];
+ int iCol = pIter->pPhrase->iColumn;
if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
- if( aTerm[i].nTerm>nToken ) continue;
- if( !aTerm[i].isPrefix && aTerm[i].nTerm<nToken ) continue;
- assert( aTerm[i].nTerm<=nToken );
- if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue;
- if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
+ if( pToken->n>nToken ) continue;
+ if( !pToken->isPrefix && pToken->n<nToken ) continue;
+ assert( pToken->n<=nToken );
+ if( memcmp(pToken->z, zToken, pToken->n) ) continue;
+ if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
match |= 1<<i;
- if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
- for(j=aTerm[i].iPhrase-1; j>=0; j--){
+ if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){
+ for(j=nPhrase-1; j>=0; j--){
int k = (iRotor-j) & FTS3_ROTOR_MASK;
snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
iRotorBegin[k], iRotorLen[k]);
** then when this function is called the Snippet contains token offsets
** 0, 4 and 5. This function removes the "0" entry (because the first A
** is not near enough to an E).
+**
+** When this function is called, the value pointed to by parameter piLeft is
+** the integer id of the left-most token in the expression tree headed by
+** pExpr. This function increments *piLeft by the total number of tokens
+** in the expression tree headed by pExpr.
*/
-static void trimSnippetOffsetsForNear(Query *pQuery, Snippet *pSnippet){
- int ii;
- int iDir = 1;
-
- while(iDir>-2) {
- assert( iDir==1 || iDir==-1 );
- for(ii=0; ii<pSnippet->nMatch; ii++){
- int jj;
- int nNear;
- struct snippetMatch *pMatch = &pSnippet->aMatch[ii];
- QueryTerm *pQueryTerm = &pQuery->pTerms[pMatch->iTerm];
-
- if( (pMatch->iTerm+iDir)<0
- || (pMatch->iTerm+iDir)>=pQuery->nTerms
- ){
- continue;
- }
-
- nNear = pQueryTerm->nNear;
- if( iDir<0 ){
- nNear = pQueryTerm[-1].nNear;
- }
-
- if( pMatch->iTerm>=0 && nNear ){
- int isOk = 0;
- int iNextTerm = pMatch->iTerm+iDir;
- int iPrevTerm = iNextTerm;
-
- int iEndToken;
- int iStartToken;
-
- if( iDir<0 ){
- int nPhrase = 1;
- iStartToken = pMatch->iToken;
- while( (pMatch->iTerm+nPhrase)<pQuery->nTerms
- && pQuery->pTerms[pMatch->iTerm+nPhrase].iPhrase>1
- ){
- nPhrase++;
- }
- iEndToken = iStartToken + nPhrase - 1;
- }else{
- iEndToken = pMatch->iToken;
- iStartToken = pMatch->iToken+1-pQueryTerm->iPhrase;
- }
+static int trimSnippetOffsets(
+ Fts3Expr *pExpr,
+ Snippet *pSnippet,
+ int *piLeft
+){
+ if( pExpr ){
+ if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
+ return 1;
+ }
- while( pQuery->pTerms[iNextTerm].iPhrase>1 ){
- iNextTerm--;
- }
- while( (iPrevTerm+1)<pQuery->nTerms &&
- pQuery->pTerms[iPrevTerm+1].iPhrase>1
- ){
- iPrevTerm++;
+ switch( pExpr->eType ){
+ case FTSQUERY_PHRASE:
+ *piLeft += pExpr->pPhrase->nToken;
+ break;
+ case FTSQUERY_NEAR: {
+ /* The right-hand-side of a NEAR operator is always a phrase. The
+ ** left-hand-side is either a phrase or an expression tree that is
+ ** itself headed by a NEAR operator. The following initializations
+ ** set local variable iLeft to the token number of the left-most
+ ** token in the right-hand phrase, and iRight to the right most
+ ** token in the same phrase. For example, if we had:
+ **
+ ** <col> MATCH '"abc def" NEAR/2 "ghi jkl"'
+ **
+ ** then iLeft will be set to 2 (token number of ghi) and nToken will
+ ** be set to 4.
+ */
+ Fts3Expr *pLeft = pExpr->pLeft;
+ Fts3Expr *pRight = pExpr->pRight;
+ int iLeft = *piLeft;
+ int nNear = pExpr->nNear;
+ int nToken = pRight->pPhrase->nToken;
+ int jj, ii;
+ if( pLeft->eType==FTSQUERY_NEAR ){
+ pLeft = pLeft->pRight;
}
-
- for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
- struct snippetMatch *p = &pSnippet->aMatch[jj];
- if( p->iCol==pMatch->iCol && ((
- p->iTerm==iNextTerm &&
- p->iToken>iEndToken &&
- p->iToken<=iEndToken+nNear
- ) || (
- p->iTerm==iPrevTerm &&
- p->iToken<iStartToken &&
- p->iToken>=iStartToken-nNear
- ))){
- isOk = 1;
+ assert( pRight->eType==FTSQUERY_PHRASE );
+ assert( pLeft->eType==FTSQUERY_PHRASE );
+ nToken += pLeft->pPhrase->nToken;
+
+ for(ii=0; ii<pSnippet->nMatch; ii++){
+ struct snippetMatch *p = &pSnippet->aMatch[ii];
+ if( p->iTerm==iLeft ){
+ int isOk = 0;
+ /* Snippet ii is an occurence of query term iLeft in the document.
+ ** It occurs at position (p->iToken) of the document. We now
+ ** search for an instance of token (iLeft-1) somewhere in the
+ ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within
+ ** the set of snippetMatch structures. If one is found, proceed.
+ ** If one cannot be found, then remove snippets ii..(ii+N-1)
+ ** from the matching snippets, where N is the number of tokens
+ ** in phrase pRight->pPhrase.
+ */
+ for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
+ struct snippetMatch *p2 = &pSnippet->aMatch[jj];
+ if( p2->iTerm==(iLeft-1) ){
+ if( p2->iToken>=(p->iToken-nNear-1)
+ && p2->iToken<(p->iToken+nNear+nToken)
+ ){
+ isOk = 1;
+ }
+ }
+ }
+ if( !isOk ){
+ int kk;
+ for(kk=0; kk<pRight->pPhrase->nToken; kk++){
+ pSnippet->aMatch[kk+ii].iTerm = -2;
+ }
+ return 1;
+ }
}
- }
- if( !isOk ){
- for(jj=1-pQueryTerm->iPhrase; jj<=0; jj++){
- pMatch[jj].iTerm = -1;
+ if( p->iTerm==(iLeft-1) ){
+ int isOk = 0;
+ for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
+ struct snippetMatch *p2 = &pSnippet->aMatch[jj];
+ if( p2->iTerm==iLeft ){
+ if( p2->iToken<=(p->iToken+nNear+1)
+ && p2->iToken>(p->iToken-nNear-nToken)
+ ){
+ isOk = 1;
+ }
+ }
+ }
+ if( !isOk ){
+ int kk;
+ for(kk=0; kk<pLeft->pPhrase->nToken; kk++){
+ pSnippet->aMatch[ii-kk].iTerm = -2;
+ }
+ return 1;
+ }
}
- ii = -1;
- iDir = 1;
}
+ break;
}
}
- iDir -= 2;
+
+ if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){
+ return 1;
+ }
}
+ return 0;
}
/*
int nColumn;
int iColumn, i;
int iFirst, iLast;
- fulltext_vtab *pFts;
+ int iTerm = 0;
+ fulltext_vtab *pFts = cursor_vtab(p);
- if( p->snippet.nMatch ) return;
- if( p->q.nTerms==0 ) return;
- pFts = p->q.pFts;
+ if( p->snippet.nMatch || p->pExpr==0 ){
+ return;
+ }
nColumn = pFts->nColumn;
iColumn = (p->iCursorType - QUERY_FULLTEXT);
if( iColumn<0 || iColumn>=nColumn ){
int nDoc;
zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
nDoc = sqlite3_column_bytes(p->pStmt, i+1);
- snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
+ snippetOffsetsOfColumn(p, &p->snippet, i, zDoc, nDoc);
}
- trimSnippetOffsetsForNear(&p->q, &p->snippet);
+ while( trimSnippetOffsets(p->pExpr, &p->snippet, &iTerm) ){
+ iTerm = 0;
+ }
}
/*
** Convert the information in the aMatch[] array of the snippet
-** into the string zOffset[0..nOffset-1].
+** into the string zOffset[0..nOffset-1]. This string is used as
+** the return of the SQL offsets() function.
*/
static void snippetOffsetText(Snippet *p){
int i;
aMatch[i].snStatus = SNIPPET_IGNORE;
}
nDesired = 0;
- for(i=0; i<pCursor->q.nTerms; i++){
+ for(i=0; i<FTS3_ROTOR_SZ; i++){
for(j=0; j<nMatch; j++){
if( aMatch[j].iTerm==i ){
aMatch[j].snStatus = SNIPPET_DESIRED;
fulltext_cursor *c = (fulltext_cursor *) pCursor;
FTSTRACE(("FTS3 Close %p\n", c));
sqlite3_finalize(c->pStmt);
- queryClear(&c->q);
+ sqlite3Fts3ExprFree(c->pExpr);
snippetClear(&c->snippet);
- if( c->result.nData!=0 ) dlrDestroy(&c->reader);
+ if( c->result.nData!=0 ){
+ dlrDestroy(&c->reader);
+ }
dataBufferDestroy(&c->result);
sqlite3_free(c);
return SQLITE_OK;
const char *pTerm, int nTerm, int isPrefix,
DocListType iType, DataBuffer *out);
-/* Return a DocList corresponding to the query term *pTerm. If *pTerm
-** is the first term of a phrase query, go ahead and evaluate the phrase
-** query and return the doclist for the entire phrase query.
+/*
+** Return a DocList corresponding to the phrase *pPhrase.
**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
** overwritten.
*/
-static int docListOfTerm(
- fulltext_vtab *v, /* The full text index */
- int iColumn, /* column to restrict to. No restriction if >=nColumn */
- QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */
- DataBuffer *pResult /* Write the result here */
+static int docListOfPhrase(
+ fulltext_vtab *pTab, /* The full text index */
+ Fts3Phrase *pPhrase, /* Phrase to return a doclist corresponding to */
+ DocListType eListType, /* Either DL_DOCIDS or DL_POSITIONS */
+ DataBuffer *pResult /* Write the result here */
){
- DataBuffer left, right, new;
- int i, rc;
-
- /* No phrase search if no position info. */
- assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );
+ int ii;
+ int rc = SQLITE_OK;
+ int iCol = pPhrase->iColumn;
+ DocListType eType = eListType;
+ assert( eType==DL_POSITIONS || eType==DL_DOCIDS );
+ if( pPhrase->nToken>1 ){
+ eType = DL_POSITIONS;
+ }
/* This code should never be called with buffered updates. */
- assert( v->nPendingData<0 );
+ assert( pTab->nPendingData<0 );
- dataBufferInit(&left, 0);
- rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
- (0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS), &left);
- if( rc ) return rc;
- for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
- /* If this token is connected to the next by a NEAR operator, and
- ** the next token is the start of a phrase, then set nPhraseRight
- ** to the number of tokens in the phrase. Otherwise leave it at 1.
- */
- int nPhraseRight = 1;
- while( (i+nPhraseRight)<=pQTerm->nPhrase
- && pQTerm[i+nPhraseRight].nNear==0
- ){
- nPhraseRight++;
- }
-
- dataBufferInit(&right, 0);
- rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
- pQTerm[i].isPrefix, DL_POSITIONS, &right);
- if( rc ){
- dataBufferDestroy(&left);
- return rc;
+ for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){
+ DataBuffer tmp;
+ struct PhraseToken *p = &pPhrase->aToken[ii];
+ rc = termSelect(pTab, iCol, p->z, p->n, p->isPrefix, eType, &tmp);
+ if( rc==SQLITE_OK ){
+ if( ii==0 ){
+ *pResult = tmp;
+ }else{
+ DataBuffer res = *pResult;
+ dataBufferInit(pResult, 0);
+ if( ii==(pPhrase->nToken-1) ){
+ eType = eListType;
+ }
+ docListPhraseMerge(
+ res.pData, res.nData, tmp.pData, tmp.nData, 0, 0, eType, pResult
+ );
+ dataBufferDestroy(&res);
+ dataBufferDestroy(&tmp);
+ }
}
- dataBufferInit(&new, 0);
- docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
- pQTerm[i-1].nNear, pQTerm[i-1].iPhrase + nPhraseRight,
- ((i<pQTerm->nPhrase) ? DL_POSITIONS : DL_DOCIDS),
- &new);
- dataBufferDestroy(&left);
- dataBufferDestroy(&right);
- left = new;
- }
- *pResult = left;
- return SQLITE_OK;
-}
-
-/* Add a new term pTerm[0..nTerm-1] to the query *q.
-*/
-static void queryAdd(Query *q, const char *pTerm, int nTerm){
- QueryTerm *t;
- ++q->nTerms;
- q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
- if( q->pTerms==0 ){
- q->nTerms = 0;
- return;
}
- t = &q->pTerms[q->nTerms - 1];
- CLEAR(t);
- t->pTerm = sqlite3_malloc(nTerm+1);
- memcpy(t->pTerm, pTerm, nTerm);
- t->pTerm[nTerm] = 0;
- t->nTerm = nTerm;
- t->isOr = q->nextIsOr;
- t->isPrefix = 0;
- q->nextIsOr = 0;
- t->iColumn = q->nextColumn;
- q->nextColumn = q->dfltColumn;
-}
-/*
-** Check to see if the string zToken[0...nToken-1] matches any
-** column name in the virtual table. If it does,
-** return the zero-indexed column number. If not, return -1.
-*/
-static int checkColumnSpecifier(
- fulltext_vtab *pVtab, /* The virtual table */
- const char *zToken, /* Text of the token */
- int nToken /* Number of characters in the token */
-){
- int i;
- for(i=0; i<pVtab->nColumn; i++){
- if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
- && pVtab->azColumn[i][nToken]==0 ){
- return i;
- }
- }
- return -1;
+ return rc;
}
/*
-** Parse the text at zSegment[0..nSegment-1]. Add additional terms
-** to the query being assemblied in pQuery.
-**
-** inPhrase is true if zSegment[0..nSegement-1] is contained within
-** double-quotes. If inPhrase is true, then the first term
-** is marked with the number of terms in the phrase less one and
-** OR and "-" syntax is ignored. If inPhrase is false, then every
-** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
-*/
-static int tokenizeSegment(
- sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */
- const char *zSegment, int nSegment, /* Query expression being parsed */
- int inPhrase, /* True if within "..." */
- Query *pQuery /* Append results here */
+** Evaluate the full-text expression pExpr against fts3 table pTab. Write
+** the results into pRes.
+*/
+static int evalFts3Expr(
+ fulltext_vtab *pTab, /* Fts3 Virtual table object */
+ Fts3Expr *pExpr, /* Parsed fts3 expression */
+ DataBuffer *pRes /* OUT: Write results of the expression here */
){
- const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
- sqlite3_tokenizer_cursor *pCursor;
- int firstIndex = pQuery->nTerms;
- int iCol;
- int nTerm = 1;
-
- int rc = pModule->xOpen(pTokenizer, zSegment, nSegment, &pCursor);
- if( rc!=SQLITE_OK ) return rc;
- pCursor->pTokenizer = pTokenizer;
+ int rc = SQLITE_OK;
- while( 1 ){
- const char *zToken;
- int nToken, iBegin, iEnd, iPos;
+ /* Initialize the output buffer. If this is an empty query (pExpr==0),
+ ** this is all that needs to be done. Empty queries produce empty
+ ** result sets.
+ */
+ dataBufferInit(pRes, 0);
- rc = pModule->xNext(pCursor,
- &zToken, &nToken,
- &iBegin, &iEnd, &iPos);
- if( rc!=SQLITE_OK ) break;
- if( !inPhrase &&
- zSegment[iEnd]==':' &&
- (iCol = checkColumnSpecifier(pQuery->pFts, zToken, nToken))>=0 ){
- pQuery->nextColumn = iCol;
- continue;
- }
- if( !inPhrase && pQuery->nTerms>0 && nToken==2
- && zSegment[iBegin+0]=='O'
- && zSegment[iBegin+1]=='R'
- ){
- pQuery->nextIsOr = 1;
- continue;
- }
- if( !inPhrase && pQuery->nTerms>0 && !pQuery->nextIsOr && nToken==4
- && memcmp(&zSegment[iBegin], "NEAR", 4)==0
- ){
- QueryTerm *pTerm = &pQuery->pTerms[pQuery->nTerms-1];
- if( (iBegin+6)<nSegment
- && zSegment[iBegin+4] == '/'
- && isdigit(zSegment[iBegin+5])
- ){
- int k;
- pTerm->nNear = 0;
- for(k=5; (iBegin+k)<=nSegment && isdigit(zSegment[iBegin+k]); k++){
- pTerm->nNear = pTerm->nNear*10 + (zSegment[iBegin+k] - '0');
- }
- pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
- } else {
- pTerm->nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
+ if( pExpr ){
+ if( pExpr->eType==FTSQUERY_PHRASE ){
+ DocListType eType = DL_DOCIDS;
+ if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
+ eType = DL_POSITIONS;
}
- pTerm->nNear++;
- continue;
- }
-
- queryAdd(pQuery, zToken, nToken);
- if( !inPhrase && iBegin>0 && zSegment[iBegin-1]=='-' ){
- pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
- }
- if( iEnd<nSegment && zSegment[iEnd]=='*' ){
- pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
- }
- pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
- if( inPhrase ){
- nTerm++;
- }
- }
-
- if( inPhrase && pQuery->nTerms>firstIndex ){
- pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
- }
-
- return pModule->xClose(pCursor);
-}
-
-/* Parse a query string, yielding a Query object pQuery.
-**
-** The calling function will need to queryClear() to clean up
-** the dynamically allocated memory held by pQuery.
-*/
-static int parseQuery(
- fulltext_vtab *v, /* The fulltext index */
- const char *zInput, /* Input text of the query string */
- int nInput, /* Size of the input text */
- int dfltColumn, /* Default column of the index to match against */
- Query *pQuery /* Write the parse results here. */
-){
- int iInput, inPhrase = 0;
- int ii;
- QueryTerm *aTerm;
-
- if( zInput==0 ) nInput = 0;
- if( nInput<0 ) nInput = strlen(zInput);
- pQuery->nTerms = 0;
- pQuery->pTerms = NULL;
- pQuery->nextIsOr = 0;
- pQuery->nextColumn = dfltColumn;
- pQuery->dfltColumn = dfltColumn;
- pQuery->pFts = v;
-
- for(iInput=0; iInput<nInput; ++iInput){
- int i;
- for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
- if( i>iInput ){
- tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
- pQuery);
- }
- iInput = i;
- if( i<nInput ){
- assert( zInput[i]=='"' );
- inPhrase = !inPhrase;
- }
- }
-
- if( inPhrase ){
- /* unmatched quote */
- queryClear(pQuery);
- return SQLITE_ERROR;
- }
+ rc = docListOfPhrase(pTab, pExpr->pPhrase, eType, pRes);
+ }else{
+ DataBuffer lhs;
+ DataBuffer rhs;
- /* Modify the values of the QueryTerm.nPhrase variables to account for
- ** the NEAR operator. For the purposes of QueryTerm.nPhrase, phrases
- ** and tokens connected by the NEAR operator are handled as a single
- ** phrase. See comments above the QueryTerm structure for details.
- */
- aTerm = pQuery->pTerms;
- for(ii=0; ii<pQuery->nTerms; ii++){
- if( aTerm[ii].nNear || aTerm[ii].nPhrase ){
- while (aTerm[ii+aTerm[ii].nPhrase].nNear) {
- aTerm[ii].nPhrase += (1 + aTerm[ii+aTerm[ii].nPhrase+1].nPhrase);
+ dataBufferInit(&rhs, 0);
+ if( SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pLeft, &lhs))
+ && SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pRight, &rhs))
+ ){
+ switch( pExpr->eType ){
+ case FTSQUERY_NEAR: {
+ int nToken;
+ Fts3Expr *pLeft;
+ DocListType eType = DL_DOCIDS;
+ if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
+ eType = DL_POSITIONS;
+ }
+ pLeft = pExpr->pLeft;
+ while( pLeft->eType==FTSQUERY_NEAR ){
+ pLeft=pLeft->pRight;
+ }
+ assert( pExpr->pRight->eType==FTSQUERY_PHRASE );
+ assert( pLeft->eType==FTSQUERY_PHRASE );
+ nToken = pLeft->pPhrase->nToken + pExpr->pRight->pPhrase->nToken;
+ docListPhraseMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData,
+ pExpr->nNear+1, nToken, eType, pRes
+ );
+ break;
+ }
+ case FTSQUERY_NOT: {
+ docListExceptMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData,pRes);
+ break;
+ }
+ case FTSQUERY_AND: {
+ docListAndMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
+ break;
+ }
+ case FTSQUERY_OR: {
+ docListOrMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
+ break;
+ }
+ }
}
+ dataBufferDestroy(&lhs);
+ dataBufferDestroy(&rhs);
}
}
- return SQLITE_OK;
+ return rc;
}
/* TODO(shess) Refactor the code to remove this forward decl. */
const char *zInput, /* The query string */
int nInput, /* Number of bytes in zInput[] */
DataBuffer *pResult, /* Write the result doclist here */
- Query *pQuery /* Put parsed query string here */
+ Fts3Expr **ppExpr /* Put parsed query string here */
){
- int i, iNext, rc;
- DataBuffer left, right, or, new;
- int nNot = 0;
- QueryTerm *aTerm;
+ int rc;
/* TODO(shess) Instead of flushing pendingTerms, we could query for
** the relevant term and merge the doclist into what we receive from
/* Flush any buffered updates before executing the query. */
rc = flushPendingTerms(v);
- if( rc!=SQLITE_OK ) return rc;
-
- /* TODO(shess) I think that the queryClear() calls below are not
- ** necessary, because fulltextClose() already clears the query.
- */
- rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
- if( rc!=SQLITE_OK ) return rc;
-
- /* Empty or NULL queries return no results. */
- if( pQuery->nTerms==0 ){
- dataBufferInit(pResult, 0);
- return SQLITE_OK;
- }
-
- /* Merge AND terms. */
- /* TODO(shess) I think we can early-exit if( i>nNot && left.nData==0 ). */
- aTerm = pQuery->pTerms;
- for(i = 0; i<pQuery->nTerms; i=iNext){
- if( aTerm[i].isNot ){
- /* Handle all NOT terms in a separate pass */
- nNot++;
- iNext = i + aTerm[i].nPhrase+1;
- continue;
- }
- iNext = i + aTerm[i].nPhrase + 1;
- rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
- if( rc ){
- if( i!=nNot ) dataBufferDestroy(&left);
- queryClear(pQuery);
- return rc;
- }
- while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
- rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or);
- iNext += aTerm[iNext].nPhrase + 1;
- if( rc ){
- if( i!=nNot ) dataBufferDestroy(&left);
- dataBufferDestroy(&right);
- queryClear(pQuery);
- return rc;
- }
- dataBufferInit(&new, 0);
- docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new);
- dataBufferDestroy(&right);
- dataBufferDestroy(&or);
- right = new;
- }
- if( i==nNot ){ /* first term processed. */
- left = right;
- }else{
- dataBufferInit(&new, 0);
- docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new);
- dataBufferDestroy(&right);
- dataBufferDestroy(&left);
- left = new;
- }
- }
-
- if( nNot==pQuery->nTerms ){
- /* We do not yet know how to handle a query of only NOT terms */
- return SQLITE_ERROR;
+ if( rc!=SQLITE_OK ){
+ return rc;
}
- /* Do the EXCEPT terms */
- for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){
- if( !aTerm[i].isNot ) continue;
- rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
- if( rc ){
- queryClear(pQuery);
- dataBufferDestroy(&left);
- return rc;
- }
- dataBufferInit(&new, 0);
- docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new);
- dataBufferDestroy(&right);
- dataBufferDestroy(&left);
- left = new;
+ /* Parse the query passed to the MATCH operator. */
+ rc = sqlite3Fts3ExprParse(v->pTokenizer,
+ v->azColumn, v->nColumn, iColumn, zInput, nInput, ppExpr
+ );
+ if( rc!=SQLITE_OK ){
+ assert( 0==(*ppExpr) );
+ return rc;
}
- *pResult = left;
- return rc;
+ return evalFts3Expr(v, *ppExpr, pResult);
}
/*
default: /* full-text search */
{
+ int iCol = idxNum-QUERY_FULLTEXT;
const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
assert( argc==1 );
- queryClear(&c->q);
if( c->result.nData!=0 ){
/* This case happens if the same cursor is used repeatedly. */
dlrDestroy(&c->reader);
}else{
dataBufferInit(&c->result, 0);
}
- rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q);
+ rc = fulltextQuery(v, iCol, zQuery, -1, &c->result, &c->pExpr);
if( rc!=SQLITE_OK ) return rc;
if( c->result.nData!=0 ){
dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
/* Scan the database and merge together the posting lists for the term
** into *out.
*/
-static int termSelect(fulltext_vtab *v, int iColumn,
- const char *pTerm, int nTerm, int isPrefix,
- DocListType iType, DataBuffer *out){
+static int termSelect(
+ fulltext_vtab *v,
+ int iColumn,
+ const char *pTerm, int nTerm, /* Term to query for */
+ int isPrefix, /* True for a prefix search */
+ DocListType iType,
+ DataBuffer *out /* Write results here */
+){
DataBuffer doclist;
sqlite3_stmt *s;
int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
assert( v->nPendingData<0 );
dataBufferInit(&doclist, 0);
+ dataBufferInit(out, 0);
/* Traverse the segments from oldest to newest so that newer doclist
** elements for given docids overwrite older elements.
}
}
+#ifdef SQLITE_TEST
+ sqlite3Fts3ExprInitTestInterface(db);
+#endif
+
/* Create the virtual table wrapper around the hash-table and overload
** the two scalar functions. If this is successful, register the
** module with sqlite.
--- /dev/null
+/*
+** 2008 Nov 28
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This module contains code that implements a parser for fts3 query strings
+** (the right-hand argument to the MATCH operator). Because the supported
+** syntax is relatively simple, the whole tokenizer/parser system is
+** hand-coded. The public interface to this module is declared in source
+** code file "fts3_expr.h".
+*/
+
+/*
+** By default, this module parses the legacy syntax that has been
+** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
+** is defined, then it uses the new syntax. The differences between
+** the new and the old syntaxes are:
+**
+** a) The new syntax supports parenthesis. The old does not.
+**
+** b) The new syntax supports the AND and NOT operators. The old does not.
+**
+** c) The old syntax supports the "-" token qualifier. This is not
+** supported by the new syntax (it is replaced by the NOT operator).
+**
+** d) When using the old syntax, the OR operator has a greater precedence
+** than an implicit AND. When using the new, both implicity and explicit
+** AND operators have a higher precedence than OR.
+**
+** If compiled with SQLITE_TEST defined, then this module exports the
+** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
+** to zero causes the module to use the old syntax. If it is set to
+** non-zero the new syntax is activated. This is so both syntaxes can
+** be tested using a single build of testfixture.
+*/
+#ifdef SQLITE_TEST
+int sqlite3_fts3_enable_parentheses = 0;
+#else
+# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
+# define sqlite3_fts3_enable_parentheses 1
+# else
+# define sqlite3_fts3_enable_parentheses 0
+# endif
+#endif
+
+/*
+** Default span for NEAR operators.
+*/
+#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
+
+#include "fts3_expr.h"
+#include "sqlite3.h"
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+
+typedef struct ParseContext ParseContext;
+struct ParseContext {
+ sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
+ const char **azCol; /* Array of column names for fts3 table */
+ int nCol; /* Number of entries in azCol[] */
+ int iDefaultCol; /* Default column to query */
+ sqlite3_context *pCtx; /* Write error message here */
+ int nNest; /* Number of nested brackets */
+};
+
+/*
+** This function is equivalent to the standard isspace() function.
+**
+** The standard isspace() can be awkward to use safely, because although it
+** is defined to accept an argument of type int, its behaviour when passed
+** an integer that falls outside of the range of the unsigned char type
+** is undefined (and sometimes, "undefined" means segfault). This wrapper
+** is defined to accept an argument of type char, and always returns 0 for
+** any values that fall outside of the range of the unsigned char type (i.e.
+** negative values).
+*/
+static int safe_isspace(char c){
+ return (c&0x80)==0 ? isspace(c) : 0;
+}
+
+/*
+** Extract the next token from buffer z (length n) using the tokenizer
+** and other information (column names etc.) in pParse. Create an Fts3Expr
+** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
+** single token and set *ppExpr to point to it. If the end of the buffer is
+** reached before a token is found, set *ppExpr to zero. It is the
+** responsibility of the caller to eventually deallocate the allocated
+** Fts3Expr structure (if any) by passing it to sqlite3_free().
+**
+** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
+** fails.
+*/
+static int getNextToken(
+ ParseContext *pParse, /* fts3 query parse context */
+ int iCol, /* Value for Fts3Phrase.iColumn */
+ const char *z, int n, /* Input string */
+ Fts3Expr **ppExpr, /* OUT: expression */
+ int *pnConsumed /* OUT: Number of bytes consumed */
+){
+ sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
+ sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+ int rc;
+ sqlite3_tokenizer_cursor *pCursor;
+ Fts3Expr *pRet = 0;
+ int nConsumed = 0;
+
+ rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
+ if( rc==SQLITE_OK ){
+ const char *zToken;
+ int nToken, iStart, iEnd, iPosition;
+ int nByte; /* total space to allocate */
+
+ pCursor->pTokenizer = pTokenizer;
+ rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
+
+ if( rc==SQLITE_OK ){
+ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
+ pRet = (Fts3Expr *)sqlite3_malloc(nByte);
+ if( !pRet ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pRet, 0, nByte);
+ pRet->eType = FTSQUERY_PHRASE;
+ pRet->pPhrase = (Fts3Phrase *)&pRet[1];
+ pRet->pPhrase->nToken = 1;
+ pRet->pPhrase->iColumn = iCol;
+ pRet->pPhrase->aToken[0].n = nToken;
+ pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
+ memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
+
+ if( iEnd<n && z[iEnd]=='*' ){
+ pRet->pPhrase->aToken[0].isPrefix = 1;
+ iEnd++;
+ }
+ if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
+ pRet->pPhrase->isNot = 1;
+ }
+ }
+ }
+ nConsumed = iEnd;
+
+ pModule->xClose(pCursor);
+ }
+
+ *pnConsumed = nConsumed;
+ *ppExpr = pRet;
+ return rc;
+}
+
+void realloc_or_free(void **ppOrig, int nNew){
+ void *pRet = sqlite3_realloc(*ppOrig, nNew);
+ if( !pRet ){
+ sqlite3_free(*ppOrig);
+ }
+ *ppOrig = pRet;
+}
+
+/*
+** Buffer zInput, length nInput, contains the contents of a quoted string
+** that appeared as part of an fts3 query expression. Neither quote character
+** is included in the buffer. This function attempts to tokenize the entire
+** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
+** containing the results.
+**
+** If successful, SQLITE_OK is returned and *ppExpr set to point at the
+** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
+** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
+** to 0.
+*/
+static int getNextString(
+ ParseContext *pParse, /* fts3 query parse context */
+ const char *zInput, int nInput, /* Input string */
+ Fts3Expr **ppExpr /* OUT: expression */
+){
+ sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
+ sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+ int rc;
+ Fts3Expr *p = 0;
+ sqlite3_tokenizer_cursor *pCursor = 0;
+ char *zTemp = 0;
+ int nTemp = 0;
+
+ rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
+ if( rc==SQLITE_OK ){
+ int ii;
+ pCursor->pTokenizer = pTokenizer;
+ for(ii=0; rc==SQLITE_OK; ii++){
+ const char *zToken;
+ int nToken, iBegin, iEnd, iPos;
+ rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
+ if( rc==SQLITE_OK ){
+ int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
+ realloc_or_free((void **)&p, nByte+ii*sizeof(struct PhraseToken));
+ realloc_or_free((void **)&zTemp, nTemp + nToken);
+ if( !p || !zTemp ){
+ goto no_mem;
+ }
+ if( ii==0 ){
+ memset(p, 0, nByte);
+ p->pPhrase = (Fts3Phrase *)&p[1];
+ p->eType = FTSQUERY_PHRASE;
+ p->pPhrase->iColumn = pParse->iDefaultCol;
+ }
+ p->pPhrase = (Fts3Phrase *)&p[1];
+ p->pPhrase->nToken = ii+1;
+ p->pPhrase->aToken[ii].n = nToken;
+ memcpy(&zTemp[nTemp], zToken, nToken);
+ nTemp += nToken;
+ if( iEnd<nInput && zInput[iEnd]=='*' ){
+ p->pPhrase->aToken[ii].isPrefix = 1;
+ }else{
+ p->pPhrase->aToken[ii].isPrefix = 0;
+ }
+ }
+ }
+
+ pModule->xClose(pCursor);
+ pCursor = 0;
+ }
+
+ if( rc==SQLITE_DONE ){
+ int jj;
+ char *zNew;
+ int nNew = 0;
+ int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
+ nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken);
+ realloc_or_free((void **)&p, nByte + nTemp);
+ if( !p ){
+ goto no_mem;
+ }
+ p->pPhrase = (Fts3Phrase *)&p[1];
+ zNew = &(((char *)p)[nByte]);
+ memcpy(zNew, zTemp, nTemp);
+ for(jj=0; jj<p->pPhrase->nToken; jj++){
+ p->pPhrase->aToken[jj].z = &zNew[nNew];
+ nNew += p->pPhrase->aToken[jj].n;
+ }
+ sqlite3_free(zTemp);
+ rc = SQLITE_OK;
+ }
+
+ *ppExpr = p;
+ return rc;
+no_mem:
+
+ if( pCursor ){
+ pModule->xClose(pCursor);
+ }
+ sqlite3_free(zTemp);
+ sqlite3_free(p);
+ *ppExpr = 0;
+ return SQLITE_NOMEM;
+}
+
+/*
+** Function getNextNode(), which is called by fts3ExprParse(), may itself
+** call fts3ExprParse(). So this forward declaration is required.
+*/
+static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
+
+/*
+** The output variable *ppExpr is populated with an allocated Fts3Expr
+** structure, or set to 0 if the end of the input buffer is reached.
+**
+** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
+** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
+** If SQLITE_ERROR is returned, pContext is populated with an error message.
+*/
+static int getNextNode(
+ ParseContext *pParse, /* fts3 query parse context */
+ const char *z, int n, /* Input string */
+ Fts3Expr **ppExpr, /* OUT: expression */
+ int *pnConsumed /* OUT: Number of bytes consumed */
+){
+ struct Fts3Keyword {
+ char *z;
+ int n;
+ int eType;
+ } aKeyword[] = {
+ { "OR" , 2, FTSQUERY_OR },
+ { "AND", 3, FTSQUERY_AND },
+ { "NOT", 3, FTSQUERY_NOT },
+ { "NEAR", 4, FTSQUERY_NEAR }
+ };
+ int ii;
+ int iCol;
+ int iColLen;
+ int rc;
+ Fts3Expr *pRet = 0;
+
+ const char *zInput = z;
+ int nInput = n;
+
+ /* Skip over any whitespace before checking for a keyword, an open or
+ ** close bracket, or a quoted string.
+ */
+ while( nInput>0 && safe_isspace(*zInput) ){
+ nInput--;
+ zInput++;
+ }
+
+ /* See if we are dealing with a keyword. */
+ for(ii=0; ii<sizeof(aKeyword)/sizeof(struct Fts3Keyword); ii++){
+ struct Fts3Keyword *pKey = &aKeyword[ii];
+
+ if( (0==sqlite3_fts3_enable_parentheses)
+ && (pKey->eType==FTSQUERY_AND || pKey->eType==FTSQUERY_NOT)
+ ){
+ continue;
+ }
+
+ if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
+ int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
+ int nKey = pKey->n;
+ char cNext;
+
+ /* If this is a "NEAR" keyword, check for an explicit nearness. */
+ if( pKey->eType==FTSQUERY_NEAR ){
+ assert( nKey==4 );
+ if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
+ nNear = 0;
+ for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
+ nNear = nNear * 10 + (zInput[nKey] - '0');
+ }
+ }
+ }
+
+ /* At this point this is probably a keyword. But for that to be true,
+ ** the next byte must contain either whitespace, an open or close
+ ** bracket, a quote character, or EOF.
+ */
+ cNext = zInput[nKey];
+ if( safe_isspace(cNext)
+ || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
+ ){
+ pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
+ memset(pRet, 0, sizeof(Fts3Expr));
+ pRet->eType = pKey->eType;
+ pRet->nNear = nNear;
+ *ppExpr = pRet;
+ *pnConsumed = (zInput - z) + nKey;
+ return SQLITE_OK;
+ }
+
+ /* Turns out that wasn't a keyword after all. This happens if the
+ ** user has supplied a token such as "ORacle". Continue.
+ */
+ }
+ }
+
+ /* Check for an open bracket. */
+ if( sqlite3_fts3_enable_parentheses ){
+ if( *zInput=='(' ){
+ int nConsumed;
+ int rc;
+ pParse->nNest++;
+ rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
+ *pnConsumed = (zInput - z) + 1 + nConsumed;
+ return rc;
+ }
+
+ /* Check for a close bracket. */
+ if( *zInput==')' ){
+ pParse->nNest--;
+ *pnConsumed = (zInput - z) + 1;
+ return SQLITE_DONE;
+ }
+ }
+
+ /* See if we are dealing with a quoted phrase. If this is the case, then
+ ** search for the closing quote and pass the whole string to getNextString()
+ ** for processing. This is easy to do, as fts3 has no syntax for escaping
+ ** a quote character embedded in a string.
+ */
+ if( *zInput=='"' ){
+ for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
+ *pnConsumed = (zInput - z) + ii + 1;
+ if( ii==nInput ){
+ return SQLITE_ERROR;
+ }
+ return getNextString(pParse, &zInput[1], ii-1, ppExpr);
+ }
+
+
+ /* If control flows to this point, this must be a regular token, or
+ ** the end of the input. Read a regular token using the sqlite3_tokenizer
+ ** interface. Before doing so, figure out if there is an explicit
+ ** column specifier for the token.
+ **
+ ** TODO: Strangely, it is not possible to associate a column specifier
+ ** with a quoted phrase, only with a single token. Not sure if this was
+ ** an implementation artifact or an intentional decision when fts3 was
+ ** first implemented. Whichever it was, this module duplicates the
+ ** limitation.
+ */
+ iCol = pParse->iDefaultCol;
+ iColLen = 0;
+ for(ii=0; ii<pParse->nCol; ii++){
+ const char *zStr = pParse->azCol[ii];
+ int nStr = strlen(zStr);
+ if( nInput>nStr && zInput[nStr]==':' && memcmp(zStr, zInput, nStr)==0 ){
+ iCol = ii;
+ iColLen = ((zInput - z) + nStr + 1);
+ break;
+ }
+ }
+ rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
+ *pnConsumed += iColLen;
+ return rc;
+}
+
+/*
+** The argument is an Fts3Expr structure for a binary operator (any type
+** except an FTSQUERY_PHRASE). Return an integer value representing the
+** precedence of the operator. Lower values have a higher precedence (i.e.
+** group more tightly). For example, in the C language, the == operator
+** groups more tightly than ||, and would therefore have a higher precedence.
+**
+** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
+** is defined), the order of the operators in precedence from highest to
+** lowest is:
+**
+** NEAR
+** NOT
+** AND (including implicit ANDs)
+** OR
+**
+** Note that when using the old query syntax, the OR operator has a higher
+** precedence than the AND operator.
+*/
+static int opPrecedence(Fts3Expr *p){
+ assert( p->eType!=FTSQUERY_PHRASE );
+ if( sqlite3_fts3_enable_parentheses ){
+ return p->eType;
+ }else if( p->eType==FTSQUERY_NEAR ){
+ return 1;
+ }else if( p->eType==FTSQUERY_OR ){
+ return 2;
+ }
+ assert( p->eType==FTSQUERY_AND );
+ return 3;
+}
+
+/*
+** Argument ppHead contains a pointer to the current head of a query
+** expression tree being parsed. pPrev is the expression node most recently
+** inserted into the tree. This function adds pNew, which is always a binary
+** operator node, into the expression tree based on the relative precedence
+** of pNew and the existing nodes of the tree. This may result in the head
+** of the tree changing, in which case *ppHead is set to the new root node.
+*/
+static void insertBinaryOperator(
+ Fts3Expr **ppHead, /* Pointer to the root node of a tree */
+ Fts3Expr *pPrev, /* Node most recently inserted into the tree */
+ Fts3Expr *pNew /* New binary node to insert into expression tree */
+){
+ Fts3Expr *pSplit = pPrev;
+ while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
+ pSplit = pSplit->pParent;
+ }
+
+ if( pSplit->pParent ){
+ assert( pSplit->pParent->pRight==pSplit );
+ pSplit->pParent->pRight = pNew;
+ pNew->pParent = pSplit->pParent;
+ }else{
+ *ppHead = pNew;
+ }
+ pNew->pLeft = pSplit;
+ pSplit->pParent = pNew;
+}
+
+/*
+** Parse the fts3 query expression found in buffer z, length n. This function
+** returns either when the end of the buffer is reached or an unmatched
+** closing bracket - ')' - is encountered.
+**
+** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
+** parsed form of the expression and *pnConsumed is set to the number of
+** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
+** (out of memory error) or SQLITE_ERROR (parse error) is returned.
+*/
+static int fts3ExprParse(
+ ParseContext *pParse, /* fts3 query parse context */
+ const char *z, int n, /* Text of MATCH query */
+ Fts3Expr **ppExpr, /* OUT: Parsed query structure */
+ int *pnConsumed /* OUT: Number of bytes consumed */
+){
+ Fts3Expr *pRet = 0;
+ Fts3Expr *pPrev = 0;
+ Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */
+ int nIn = n;
+ const char *zIn = z;
+ int rc = SQLITE_OK;
+ int isRequirePhrase = 1;
+
+ while( rc==SQLITE_OK ){
+ Fts3Expr *p = 0;
+ int nByte;
+ rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
+ if( rc==SQLITE_OK ){
+ int isPhrase;
+
+ if( !sqlite3_fts3_enable_parentheses
+ && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot
+ ){
+ /* Create an implicit NOT operator. */
+ Fts3Expr *pNot = sqlite3_malloc(sizeof(Fts3Expr));
+ if( !pNot ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_NOMEM;
+ goto exprparse_out;
+ }
+ memset(pNot, 0, sizeof(Fts3Expr));
+ pNot->eType = FTSQUERY_NOT;
+ pNot->pRight = p;
+ if( pNotBranch ){
+ pNotBranch->pLeft = p;
+ pNot->pRight = pNotBranch;
+ }
+ pNotBranch = pNot;
+ }else{
+ assert( p->eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
+ isPhrase = (p->eType==FTSQUERY_PHRASE || p->pLeft);
+ if( !isPhrase && isRequirePhrase ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_ERROR;
+ goto exprparse_out;
+ }
+
+ if( isPhrase && !isRequirePhrase ){
+ /* Insert an implicit AND operator. */
+ Fts3Expr *pAnd;
+ assert( pRet && pPrev );
+ pAnd = sqlite3_malloc(sizeof(Fts3Expr));
+ if( !pAnd ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_NOMEM;
+ goto exprparse_out;
+ }
+ memset(pAnd, 0, sizeof(Fts3Expr));
+ pAnd->eType = FTSQUERY_AND;
+ insertBinaryOperator(&pRet, pPrev, pAnd);
+ pPrev = pAnd;
+ }
+
+ if( pPrev && (
+ (pPrev->eType==FTSQUERY_NEAR && p->eType!=FTSQUERY_PHRASE)
+ || (p->eType==FTSQUERY_NEAR && pPrev->eType!=FTSQUERY_PHRASE)
+ )){
+ /* This is an attempt to do "phrase NEAR (bracketed expression)"
+ ** or "(bracketed expression) NEAR phrase", both of which are
+ ** illegal. Return an error.
+ */
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_ERROR;
+ goto exprparse_out;
+ }
+
+ if( isPhrase ){
+ if( pRet ){
+ assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
+ pPrev->pRight = p;
+ p->pParent = pPrev;
+ }else{
+ pRet = p;
+ }
+ }else{
+ insertBinaryOperator(&pRet, pPrev, p);
+ }
+ isRequirePhrase = !isPhrase;
+ }
+ assert( nByte>0 );
+ }
+ nIn -= nByte;
+ zIn += nByte;
+ pPrev = p;
+ }
+
+ if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
+ rc = SQLITE_ERROR;
+ }
+
+ if( rc==SQLITE_DONE ){
+ rc = SQLITE_OK;
+ if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
+ if( !pRet ){
+ rc = SQLITE_ERROR;
+ }else{
+ pNotBranch->pLeft = pRet;
+ pRet = pNotBranch;
+ }
+ }
+ }
+ *pnConsumed = n - nIn;
+
+exprparse_out:
+ if( rc!=SQLITE_OK ){
+ sqlite3Fts3ExprFree(pRet);
+ sqlite3Fts3ExprFree(pNotBranch);
+ pRet = 0;
+ }
+ *ppExpr = pRet;
+ return rc;
+}
+
+/*
+** Parameters z and n contain a pointer to and length of a buffer containing
+** an fts3 query expression, respectively. This function attempts to parse the
+** query expression and create a tree of Fts3Expr structures representing the
+** parsed expression. If successful, *ppExpr is set to point to the head
+** of the parsed expression tree and SQLITE_OK is returned. If an error
+** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
+** error) is returned and *ppExpr is set to 0.
+**
+** If parameter n is a negative number, then z is assumed to point to a
+** nul-terminated string and the length is determined using strlen().
+**
+** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
+** use to normalize query tokens while parsing the expression. The azCol[]
+** array, which is assumed to contain nCol entries, should contain the names
+** of each column in the target fts3 table, in order from left to right.
+** Column names must be nul-terminated strings.
+**
+** The iDefaultCol parameter should be passed the index of the table column
+** that appears on the left-hand-side of the MATCH operator (the default
+** column to match against for tokens for which a column name is not explicitly
+** specified as part of the query string), or -1 if tokens may by default
+** match any table column.
+*/
+int sqlite3Fts3ExprParse(
+ sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
+ char **azCol, /* Array of column names for fts3 table */
+ int nCol, /* Number of entries in azCol[] */
+ int iDefaultCol, /* Default column to query */
+ const char *z, int n, /* Text of MATCH query */
+ Fts3Expr **ppExpr /* OUT: Parsed query structure */
+){
+ int nParsed;
+ int rc;
+ ParseContext sParse;
+ sParse.pTokenizer = pTokenizer;
+ sParse.azCol = (const char **)azCol;
+ sParse.nCol = nCol;
+ sParse.iDefaultCol = iDefaultCol;
+ sParse.nNest = 0;
+ if( z==0 ){
+ *ppExpr = 0;
+ return SQLITE_OK;
+ }
+ if( n<0 ){
+ n = strlen(z);
+ }
+ rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
+
+ /* Check for mismatched parenthesis */
+ if( rc==SQLITE_OK && sParse.nNest ){
+ rc = SQLITE_ERROR;
+ sqlite3Fts3ExprFree(*ppExpr);
+ *ppExpr = 0;
+ }
+
+ return rc;
+}
+
+/*
+** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+*/
+void sqlite3Fts3ExprFree(Fts3Expr *p){
+ if( p ){
+ sqlite3Fts3ExprFree(p->pLeft);
+ sqlite3Fts3ExprFree(p->pRight);
+ sqlite3_free(p);
+ }
+}
+
+/****************************************************************************
+*****************************************************************************
+** Everything after this point is just test code.
+*/
+
+#ifdef SQLITE_TEST
+
+#include <stdio.h>
+
+/*
+** Function to query the hash-table of tokenizers (see README.tokenizers).
+*/
+static int queryTokenizer(
+ sqlite3 *db,
+ const char *zName,
+ const sqlite3_tokenizer_module **pp
+){
+ int rc;
+ sqlite3_stmt *pStmt;
+ const char zSql[] = "SELECT fts3_tokenizer(?)";
+
+ *pp = 0;
+ rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
+
+ sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
+ memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
+ }
+ }
+
+ return sqlite3_finalize(pStmt);
+}
+
+/*
+** This function is part of the test interface for the query parser. It
+** writes a text representation of the query expression pExpr into the
+** buffer pointed to by argument zBuf. It is assumed that zBuf is large
+** enough to store the required text representation.
+*/
+static void exprToString(Fts3Expr *pExpr, char *zBuf){
+ switch( pExpr->eType ){
+ case FTSQUERY_PHRASE: {
+ Fts3Phrase *pPhrase = pExpr->pPhrase;
+ int i;
+ zBuf += sprintf(zBuf, "PHRASE %d %d", pPhrase->iColumn, pPhrase->isNot);
+ for(i=0; i<pPhrase->nToken; i++){
+ zBuf += sprintf(zBuf," %.*s",pPhrase->aToken[i].n,pPhrase->aToken[i].z);
+ zBuf += sprintf(zBuf,"%s", (pPhrase->aToken[i].isPrefix?"+":""));
+ }
+ return;
+ }
+
+ case FTSQUERY_NEAR:
+ zBuf += sprintf(zBuf, "NEAR/%d ", pExpr->nNear);
+ break;
+ case FTSQUERY_NOT:
+ zBuf += sprintf(zBuf, "NOT ");
+ break;
+ case FTSQUERY_AND:
+ zBuf += sprintf(zBuf, "AND ");
+ break;
+ case FTSQUERY_OR:
+ zBuf += sprintf(zBuf, "OR ");
+ break;
+ }
+
+ zBuf += sprintf(zBuf, "{");
+ exprToString(pExpr->pLeft, zBuf);
+ zBuf += strlen(zBuf);
+ zBuf += sprintf(zBuf, "} ");
+
+ zBuf += sprintf(zBuf, "{");
+ exprToString(pExpr->pRight, zBuf);
+ zBuf += strlen(zBuf);
+ zBuf += sprintf(zBuf, "}");
+}
+
+/*
+** This is the implementation of a scalar SQL function used to test the
+** expression parser. It should be called as follows:
+**
+** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
+**
+** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
+** to parse the query expression (see README.tokenizers). The second argument
+** is the query expression to parse. Each subsequent argument is the name
+** of a column of the fts3 table that the query expression may refer to.
+** For example:
+**
+** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
+*/
+static void fts3ExprTest(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ sqlite3_tokenizer_module const *pModule = 0;
+ sqlite3_tokenizer *pTokenizer;
+ int rc;
+ char **azCol = 0;
+ const char *zExpr;
+ int nExpr;
+ int nCol;
+ int ii;
+ Fts3Expr *pExpr;
+ sqlite3 *db = sqlite3_context_db_handle(context);
+
+ if( argc<3 ){
+ sqlite3_result_error(context,
+ "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
+ );
+ return;
+ }
+
+ rc = queryTokenizer(db, (const char *)sqlite3_value_text(argv[0]), &pModule);
+ if( rc==SQLITE_NOMEM ){
+ sqlite3_result_error_nomem(context);
+ goto exprtest_out;
+ }else if( !pModule ){
+ sqlite3_result_error(context, "No such tokenizer module", -1);
+ goto exprtest_out;
+ }
+
+ rc = pModule->xCreate(0, 0, &pTokenizer);
+ assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
+ if( rc==SQLITE_NOMEM ){
+ sqlite3_result_error_nomem(context);
+ goto exprtest_out;
+ }
+ pTokenizer->pModule = pModule;
+
+ zExpr = (const char *)sqlite3_value_text(argv[1]);
+ nExpr = sqlite3_value_bytes(argv[1]);
+ nCol = argc-2;
+ azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
+ if( !azCol ){
+ sqlite3_result_error_nomem(context);
+ goto exprtest_out;
+ }
+ for(ii=0; ii<nCol; ii++){
+ azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
+ }
+
+ rc = sqlite3Fts3ExprParse(
+ pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
+ );
+ if( rc==SQLITE_NOMEM ){
+ sqlite3_result_error_nomem(context);
+ goto exprtest_out;
+ }else if( rc==SQLITE_OK ){
+ char zBuf[4096];
+ exprToString(pExpr, zBuf);
+ sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+ sqlite3Fts3ExprFree(pExpr);
+ }else{
+ sqlite3_result_error(context, "Error parsing expression", -1);
+ }
+
+exprtest_out:
+ if( pTokenizer ){
+ rc = pModule->xDestroy(pTokenizer);
+ }
+ sqlite3_free(azCol);
+}
+
+/*
+** Register the query expression parser test function fts3_exprtest()
+** with database connection db.
+*/
+void sqlite3Fts3ExprInitTestInterface(sqlite3* db){
+ sqlite3_create_function(
+ db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
+ );
+}
+
+#endif
--- /dev/null
+/*
+** 2008 Nov 28
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+
+#include "fts3_tokenizer.h"
+#include "sqlite3.h"
+
+/*
+** The following describes the syntax supported by the fts3 MATCH
+** operator in a similar format to that used by the lemon parser
+** generator. This module does not use actually lemon, it uses a
+** custom parser.
+**
+** phrase ::= TOKEN.
+** phrase ::= TOKEN:COLUMN.
+** phrase ::= "TOKEN TOKEN TOKEN...".
+** phrase ::= phrase near phrase.
+**
+** near ::= NEAR.
+** near ::= NEAR / INTEGER.
+**
+** query ::= -TOKEN.
+** query ::= phrase.
+** query ::= LP query RP.
+** query ::= query NOT query.
+** query ::= query OR query.
+** query ::= query AND query.
+*/
+
+typedef struct Fts3Expr Fts3Expr;
+typedef struct Fts3Phrase Fts3Phrase;
+
+struct Fts3Phrase {
+ int nToken; /* Number of entries in aToken[] */
+ int iColumn; /* Index of column this phrase must match */
+ int isNot; /* Phrase prefixed by unary not (-) operator */
+ struct PhraseToken {
+ char *z;
+ int n; /* Number of bytes in buffer pointed to by z */
+ int isPrefix; /* True if token ends in with a "*" character */
+ } aToken[1];
+};
+
+struct Fts3Expr {
+ int eType; /* One of the FTSQUERY_XXX values defined below */
+ int nNear; /* Valid if eType==FTSQUERY_NEAR */
+ Fts3Expr *pParent;
+ Fts3Expr *pLeft;
+ Fts3Expr *pRight;
+ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
+};
+
+int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int,
+ const char *, int, Fts3Expr **);
+void sqlite3Fts3ExprFree(Fts3Expr *);
+
+/*
+** Candidate values for Fts3Query.eType. Note that the order of the first
+** four values is in order of precedence when parsing expressions. For
+** example, the following:
+**
+** "a OR b AND c NOT d NEAR e"
+**
+** is equivalent to:
+**
+** "a OR (b AND (c NOT (d NEAR e)))"
+*/
+#define FTSQUERY_NEAR 1
+#define FTSQUERY_NOT 2
+#define FTSQUERY_AND 3
+#define FTSQUERY_OR 4
+#define FTSQUERY_PHRASE 5
+
+#ifdef SQLITE_TEST
+void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
+#endif
+
LIBOBJ+= alter.o analyze.o attach.o auth.o \
bitvec.o btmutex.o btree.o build.o \
callback.o complete.o date.o delete.o expr.o fault.o \
- fts3.o fts3_hash.o fts3_icu.o fts3_porter.o \
+ fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
fts3_tokenizer.o fts3_tokenizer1.o \
func.o global.o hash.o \
icu.o insert.o journal.o legacy.o loadext.o \
SRC += \
$(TOP)/ext/fts3/fts3.c \
$(TOP)/ext/fts3/fts3.h \
+ $(TOP)/ext/fts3/fts3_expr.c \
+ $(TOP)/ext/fts3/fts3_expr.h \
$(TOP)/ext/fts3/fts3_hash.c \
$(TOP)/ext/fts3/fts3_hash.h \
$(TOP)/ext/fts3/fts3_icu.c \
$(TOP)/src/pcache1.c $(TOP)/src/select.c $(TOP)/src/tokenize.c \
$(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbe.c $(TOP)/src/vdbemem.c $(TOP)/src/where.c parse.c \
- $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_tokenizer.c
+ $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_expr.c \
+ $(TOP)/ext/fts3/fts3_tokenizer.c
# Header files used by all library source files.
#
$(TOP)/ext/fts2/fts2_tokenizer.h
EXTHDR += \
$(TOP)/ext/fts3/fts3.h \
+ $(TOP)/ext/fts3/fts3_expr.h \
$(TOP)/ext/fts3/fts3_hash.h \
$(TOP)/ext/fts3/fts3_tokenizer.h
EXTHDR += \
fts3.o: $(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c
+fts3_expr.o: $(TOP)/ext/fts3/fts3_expr.c $(HDR) $(EXTHDR)
+ $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_expr.c
+
fts3_hash.o: $(TOP)/ext/fts3/fts3_hash.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_hash.c
-C Version\s3.6.7\s(CVS\s6033)
-D 2008-12-16T18:15:59
+C Modify\sfts3\sto\ssupport\sa\smore\scomplex\sexpression\ssyntax\sthat\sallows\sparenthesis.\sThe\snew\ssyntax\sis\snot\sentirely\sbackwards\scompatible,\sso\sis\sdisabled\sby\sdefault.\sUse\s-DSQLITE_ENABLE_FTS3_PARENTHESIS\sto\senable\sit.\s(CVS\s6034)
+D 2008-12-17T15:18:18
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in f7e4c81c347b04f7b0f1c1b081a168645d7b8af7
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.tokenizers 226644a0eab97724e8de83061912e8bb248461b6
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c e67453b6ac421b79e600385491ed7f038b3bb271
+F ext/fts3/fts3.c 3aa6aef1eadc44606f6ed3c841062735a5210077
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
+F ext/fts3/fts3_expr.c b141145197cc749accb03d2b970813443b723edd
+F ext/fts3/fts3_expr.h 4dad4d87cf5d41ea924a815fe89a6f87dc76f277
F ext/fts3/fts3_hash.c e15e84d18f8df149ab290029872d4559c4c7c15a
F ext/fts3/fts3_hash.h 004b759e1602ff16dfa02fea3ca1c77336ad6798
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/rtree/viewrtree.tcl 09526398dae87a5a87c5aac2b3854dbaf8376869
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 09fe5815427dc7d0abb188bbcdf0e34896577210
-F main.mk 5923e75b5ac4b265f322597c3953dda7175f4405
+F main.mk f6eb58a66f942bf672ab58e74e30e72cad39b93f
F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a
F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f
F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac
F src/status.c 237b193efae0cf6ac3f0817a208de6c6c6ef6d76
F src/table.c 23db1e5f27c03160987c122a078b4bb51ef0b2f8
F src/tclsqlite.c 23afb60549af943e135ded441a631f4745be6040
-F src/test1.c 9c0502c3627162f969099e57932782057d9139b6
+F src/test1.c b193b8b80617bdb8297b25a87d00ee8d5a125d0d
F src/test2.c 897528183edf2839c2a3c991d415905db56f1240
F src/test3.c 88a246b56b824275300e6c899634fbac1dc94b14
F src/test4.c f79ab52d27ff49b784b631a42e2ccd52cfd5c84c
F test/fts3c.test 4c7ef29b37aca3e8ebb6a39b57910caa6506034e
F test/fts3d.test d92a47fe8ed59c9e53d2d8e6d2685bb380aadadc
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
+F test/fts3expr.test 18143e61503845b940fd7caacce53bce4307426f
F test/fts3near.test e8a9b4e16c63a795918b334b74d4aec14815bf8b
F test/func.test a50f0a4b69ac251debe1dce3ba29da7476dc8c52
F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
-P 8b8f6a6ab597e06e60557ab56c6ee7f8522ed570
-R a6ff965a91307b50874e830368f8fcd6
-U drh
-Z 76064440713dce47dc110c16fc71382b
+P f4f40370fb83d677df3fbf2c51c4bb4a3e5ccc7a
+R 0170cc023fb1827148ff15e7bc02335a
+U danielk1977
+Z 3401f60bb1566cbc5da4c344a91c4fb9
-f4f40370fb83d677df3fbf2c51c4bb4a3e5ccc7a
\ No newline at end of file
+7389b9ecb80294569845c40a23e0c832d07f7a45
\ No newline at end of file
** is not included in the SQLite library. It is used for automated
** testing of the SQLite library.
**
-** $Id: test1.c,v 1.337 2008/12/11 02:56:07 drh Exp $
+** $Id: test1.c,v 1.338 2008/12/17 15:18:18 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
extern int sqlite3_enable_in_opt;
extern char sqlite3_query_plan[];
static char *query_plan = sqlite3_query_plan;
+#ifdef SQLITE_ENABLE_FTS3
+ extern int sqlite3_fts3_enable_parentheses;
+#endif
#endif
for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
#ifdef SQLITE_TEST
Tcl_LinkVar(interp, "sqlite_enable_in_opt",
(char*)&sqlite3_enable_in_opt, TCL_LINK_INT);
+#ifdef SQLITE_ENABLE_FTS3
+ Tcl_LinkVar(interp, "sqlite_fts3_enable_parentheses",
+ (char*)&sqlite3_fts3_enable_parentheses, TCL_LINK_INT);
+#endif
#endif
return TCL_OK;
}
--- /dev/null
+# 2006 September 9
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library. The
+# focus of this script is testing the FTS3 module.
+#
+# $Id: fts3expr.test,v 1.1 2008/12/17 15:18:18 danielk1977 Exp $
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts3 {
+ finish_test
+ return
+}
+
+set sqlite_fts3_enable_parentheses 1
+
+proc test_fts3expr {expr} {
+ db one {SELECT fts3_exprtest('simple', $expr, 'a', 'b', 'c')}
+}
+do_test fts3expr-1.0 {
+ test_fts3expr "abcd"
+} {PHRASE 3 0 abcd}
+do_test fts3expr-1.1 {
+ test_fts3expr " tag "
+} {PHRASE 3 0 tag}
+
+do_test fts3expr-1.2 {
+ test_fts3expr "ab AND cd"
+} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.3 {
+ test_fts3expr "ab OR cd"
+} {OR {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.4 {
+ test_fts3expr "ab NOT cd"
+} {NOT {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.5 {
+ test_fts3expr "ab NEAR cd"
+} {NEAR/10 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+do_test fts3expr-1.6 {
+ test_fts3expr "ab NEAR/5 cd"
+} {NEAR/5 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
+
+do_test fts3expr-1.7 {
+ test_fts3expr {"one two three"}
+} {PHRASE 3 0 one two three}
+do_test fts3expr-1.8 {
+ test_fts3expr {zero "one two three" four}
+} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
+do_test fts3expr-1.9 {
+ test_fts3expr {"one* two three*"}
+} {PHRASE 3 0 one+ two three+}
+
+do_test fts3expr-1.10 {
+ test_fts3expr {one* two}
+} {AND {PHRASE 3 0 one+} {PHRASE 3 0 two}}
+do_test fts3expr-1.11 {
+ test_fts3expr {one two*}
+} {AND {PHRASE 3 0 one} {PHRASE 3 0 two+}}
+
+do_test fts3expr-1.14 {
+ test_fts3expr {a:one two}
+} {AND {PHRASE 0 0 one} {PHRASE 3 0 two}}
+do_test fts3expr-1.15 {
+ test_fts3expr {one b:two}
+} {AND {PHRASE 3 0 one} {PHRASE 1 0 two}}
+
+proc strip_phrase_data {L} {
+ if {[lindex $L 0] eq "PHRASE"} {
+ return [lrange $L 3 end]
+ }
+ return [list \
+ [lindex $L 0] \
+ [strip_phrase_data [lindex $L 1]] \
+ [strip_phrase_data [lindex $L 2]] \
+ ]
+}
+proc test_fts3expr2 {expr} {
+ strip_phrase_data [
+ db one {SELECT fts3_exprtest('simple', $expr, 'a', 'b', 'c')}
+ ]
+}
+do_test fts3expr-2.1 {
+ test_fts3expr2 "ab OR cd AND ef"
+} {OR ab {AND cd ef}}
+do_test fts3expr-2.2 {
+ test_fts3expr2 "cd AND ef OR ab"
+} {OR {AND cd ef} ab}
+do_test fts3expr-2.3 {
+ test_fts3expr2 "ab AND cd AND ef OR gh"
+} {OR {AND {AND ab cd} ef} gh}
+do_test fts3expr-2.4 {
+ test_fts3expr2 "ab AND cd OR ef AND gh"
+} {OR {AND ab cd} {AND ef gh}}
+do_test fts3expr-2.5 {
+ test_fts3expr2 "ab cd"
+} {AND ab cd}
+
+do_test fts3expr-3.1 {
+ test_fts3expr2 "(ab OR cd) AND ef"
+} {AND {OR ab cd} ef}
+do_test fts3expr-3.2 {
+ test_fts3expr2 "ef AND (ab OR cd)"
+} {AND ef {OR ab cd}}
+do_test fts3expr-3.3 {
+ test_fts3expr2 "(ab OR cd)"
+} {OR ab cd}
+do_test fts3expr-3.4 {
+ test_fts3expr2 "(((ab OR cd)))"
+} {OR ab cd}
+
+#------------------------------------------------------------------------
+# The following tests, fts3expr-4.*, test the parsers response to syntax
+# errors in query expressions. This is done using a real fts3 table and
+# MATCH clauses, not the parser test interface.
+#
+do_test fts3expr-4.1 {
+ execsql { CREATE VIRTUAL TABLE t1 USING fts3(a, b, c) }
+} {}
+
+# Mismatched parenthesis:
+do_test fts3expr-4.2.1 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example AND (hello OR world))' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.2.2 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example AND (hello OR world' }
+} {1 {SQL logic error or missing database}}
+
+# Unterminated quotation marks:
+do_test fts3expr-4.3.1 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example OR "hello world' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.3.2 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example OR hello world"' }
+} {1 {SQL logic error or missing database}}
+
+# Binary operators without the required operands.
+do_test fts3expr-4.4.1 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'OR hello world' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.4.2 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'hello world OR' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.4.3 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one (hello world OR) two' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.4.4 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one (OR hello world) two' }
+} {1 {SQL logic error or missing database}}
+
+# NEAR operators with something other than phrases as arguments.
+do_test fts3expr-4.5.1 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH '(hello OR world) NEAR one' }
+} {1 {SQL logic error or missing database}}
+do_test fts3expr-4.5.2 {
+ catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one NEAR (hello OR world)' }
+} {1 {SQL logic error or missing database}}
+
+#------------------------------------------------------------------------
+# The following OOM tests are designed to cover cases in fts3_expr.c.
+#
+source $testdir/malloc_common.tcl
+do_malloc_test fts3expr-malloc-1 -sqlbody {
+ SELECT fts3_exprtest('simple', 'a b c "d e f"', 'a', 'b', 'c')
+}
+do_malloc_test fts3expr-malloc-2 -tclprep {
+ set sqlite_fts3_enable_parentheses 0
+} -sqlbody {
+ SELECT fts3_exprtest('simple', 'a -b', 'a', 'b', 'c')
+} -cleanup {
+ set sqlite_fts3_enable_parentheses 1
+}
+
+#------------------------------------------------------------------------
+# The following tests are not very important. They cover error handling
+# cases in the test code, which makes test coverage easier to measure.
+#
+do_test fts3expr-5.1 {
+ catchsql { SELECT fts3_exprtest('simple', 'a b') }
+} {1 {Usage: fts3_exprtest(tokenizer, expr, col1, ...}}
+do_test fts3expr-5.2 {
+ catchsql { SELECT fts3_exprtest('doesnotexist', 'a b', 'c') }
+} {1 {No such tokenizer module}}
+do_test fts3expr-5.3 {
+ catchsql { SELECT fts3_exprtest('simple', 'a b OR', 'c') }
+} {1 {Error parsing expression}}
+
+#------------------------------------------------------------------------
+# The next set of tests verifies that things actually work as they are
+# supposed to when using the new syntax.
+#
+do_test fts3expr-6.1 {
+ execsql {
+ CREATE VIRTUAL TABLE t1 USING fts3(a);
+ }
+ for {set ii 1} {$ii < 32} {incr ii} {
+ set v [list]
+ if {$ii & 1} { lappend v one }
+ if {$ii & 2} { lappend v two }
+ if {$ii & 4} { lappend v three }
+ if {$ii & 8} { lappend v four }
+ if {$ii & 16} { lappend v five }
+ execsql { INSERT INTO t1 VALUES($v) }
+ }
+
+ execsql {SELECT rowid FROM t1 WHERE t1 MATCH 'five four one' ORDER BY rowid}
+} {25 27 29 31}
+
+foreach {id expr res} {
+
+ 2 "five four NOT one" {24 26 28 30}
+
+ 3 "five AND four OR one"
+ {1 3 5 7 9 11 13 15 17 19 21 23 24 25 26 27 28 29 30 31}
+
+ 4 "five AND (four OR one)" {17 19 21 23 24 25 26 27 28 29 30 31}
+
+ 5 "five NOT (four OR one)" {16 18 20 22}
+
+ 6 "(five NOT (four OR one)) OR (five AND (four OR one))"
+ {16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+ 7 "(five OR one) AND two AND three" {7 15 22 23 30 31}
+
+ 8 "five OR one AND two AND three"
+ {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+ 9 "five OR one two three"
+ {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+ 10 "five OR \"one two three\""
+ {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}
+
+ 11 "one two OR four five NOT three" {3 7 11 15 19 23 24 25 26 27 31}
+
+ 12 "(one two OR four five) NOT three" {3 11 19 24 25 26 27}
+
+ 13 "((((((one two OR four five)))))) NOT three" {3 11 19 24 25 26 27}
+
+} {
+ do_test fts3expr-6.$id {
+ execsql { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid }
+ } $res
+}
+
+set sqlite_fts3_enable_parentheses 0
+finish_test
+