}
pNot->eType = FTSQUERY_NOT;
pNot->pRight = p;
+ p->pParent = pNot;
if( pNotBranch ){
pNot->pLeft = pNotBranch;
+ pNotBranch->pParent = pNot;
}
pNotBranch = pNot;
p = pPrev;
pIter = pIter->pLeft;
}
pIter->pLeft = pRet;
+ pRet->pParent = pIter;
pRet = pNotBranch;
}
}
}
/*
-** Parameters z and n contain a pointer to and length of a buffer containing
-** an fts3 query expression, respectively. This function attempts to parse the
-** query expression and create a tree of Fts3Expr structures representing the
-** parsed expression. If successful, *ppExpr is set to point to the head
-** of the parsed expression tree and SQLITE_OK is returned. If an error
-** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
-** error) is returned and *ppExpr is set to 0.
+** Return SQLITE_ERROR if the maximum depth of the expression tree passed
+** as the only argument is more than nMaxDepth.
+*/
+static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
+ int rc = SQLITE_OK;
+ if( p ){
+ if( nMaxDepth==0 ){
+ rc = SQLITE_ERROR;
+ }else{
+ rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
+ if( rc==SQLITE_OK ){
+ rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
+ }
+ }
+ }
+ return rc;
+}
+
+/*
+** This function attempts to transform the expression tree at (*pp) to
+** an equivalent but more balanced form. The tree is modified in place.
+** If successful, SQLITE_OK is returned and (*pp) set to point to the
+** new root expression node.
**
-** If parameter n is a negative number, then z is assumed to point to a
-** nul-terminated string and the length is determined using strlen().
+** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
**
-** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
-** use to normalize query tokens while parsing the expression. The azCol[]
-** array, which is assumed to contain nCol entries, should contain the names
-** of each column in the target fts3 table, in order from left to right.
-** Column names must be nul-terminated strings.
+** Otherwise, if an error occurs, an SQLite error code is returned and
+** expression (*pp) freed.
+*/
+static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
+ int rc = SQLITE_OK; /* Return code */
+ Fts3Expr *pRoot = *pp; /* Initial root node */
+ Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */
+ int eType = pRoot->eType; /* Type of node in this tree */
+
+ if( nMaxDepth==0 ){
+ rc = SQLITE_ERROR;
+ }
+
+ if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
+ Fts3Expr **apLeaf;
+ apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
+ if( 0==apLeaf ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
+ }
+
+ if( rc==SQLITE_OK ){
+ int i;
+ Fts3Expr *p;
+
+ /* Set $p to point to the left-most leaf in the tree of eType nodes. */
+ for(p=pRoot; p->eType==eType; p=p->pLeft){
+ assert( p->pParent==0 || p->pParent->pLeft==p );
+ assert( p->pLeft && p->pRight );
+ }
+
+ /* This loop runs once for each leaf in the tree of eType nodes. */
+ while( 1 ){
+ int iLvl;
+ Fts3Expr *pParent = p->pParent; /* Current parent of p */
+
+ assert( pParent==0 || pParent->pLeft==p );
+ p->pParent = 0;
+ if( pParent ){
+ pParent->pLeft = 0;
+ }else{
+ pRoot = 0;
+ }
+ rc = fts3ExprBalance(&p, nMaxDepth-1);
+ if( rc!=SQLITE_OK ) break;
+
+ for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
+ if( apLeaf[iLvl]==0 ){
+ apLeaf[iLvl] = p;
+ p = 0;
+ }else{
+ assert( pFree );
+ pFree->pLeft = apLeaf[iLvl];
+ pFree->pRight = p;
+ pFree->pLeft->pParent = pFree;
+ pFree->pRight->pParent = pFree;
+
+ p = pFree;
+ pFree = pFree->pParent;
+ p->pParent = 0;
+ apLeaf[iLvl] = 0;
+ }
+ }
+ if( p ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_ERROR;
+ break;
+ }
+
+ /* If that was the last leaf node, break out of the loop */
+ if( pParent==0 ) break;
+
+ /* Set $p to point to the next leaf in the tree of eType nodes */
+ for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
+
+ /* Remove pParent from the original tree. */
+ assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
+ pParent->pRight->pParent = pParent->pParent;
+ if( pParent->pParent ){
+ pParent->pParent->pLeft = pParent->pRight;
+ }else{
+ assert( pParent==pRoot );
+ pRoot = pParent->pRight;
+ }
+
+ /* Link pParent into the free node list. It will be used as an
+ ** internal node of the new tree. */
+ pParent->pParent = pFree;
+ pFree = pParent;
+ }
+
+ if( rc==SQLITE_OK ){
+ p = 0;
+ for(i=0; i<nMaxDepth; i++){
+ if( apLeaf[i] ){
+ if( p==0 ){
+ p = apLeaf[i];
+ p->pParent = 0;
+ }else{
+ pFree->pRight = p;
+ pFree->pLeft = apLeaf[i];
+ pFree->pLeft->pParent = pFree;
+ pFree->pRight->pParent = pFree;
+
+ p = pFree;
+ pFree = pFree->pParent;
+ p->pParent = 0;
+ }
+ }
+ }
+ pRoot = p;
+ }else{
+ /* An error occurred. Delete the contents of the apLeaf[] array
+ ** and pFree list. Everything else is cleaned up by the call to
+ ** sqlite3Fts3ExprFree(pRoot) below. */
+ Fts3Expr *pDel;
+ for(i=0; i<nMaxDepth; i++){
+ sqlite3Fts3ExprFree(apLeaf[i]);
+ }
+ while( pDel=pFree ){
+ pFree = pDel->pParent;
+ sqlite3_free(pDel);
+ }
+ }
+
+ assert( pFree==0 );
+ sqlite3_free( apLeaf );
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3Fts3ExprFree(pRoot);
+ pRoot = 0;
+ }
+ *pp = pRoot;
+ return rc;
+}
+
+/*
+** This function is similar to sqlite3Fts3ExprParse(), with the following
+** differences:
**
-** The iDefaultCol parameter should be passed the index of the table column
-** that appears on the left-hand-side of the MATCH operator (the default
-** column to match against for tokens for which a column name is not explicitly
-** specified as part of the query string), or -1 if tokens may by default
-** match any table column.
+** 1. It does not do expression rebalancing.
+** 2. It does not check that the expression does not exceed the
+** maximum allowable depth.
+** 3. Even if it fails, *ppExpr may still be set to point to an
+** expression tree. It should be deleted using sqlite3Fts3ExprFree()
+** in this case.
*/
-int sqlite3Fts3ExprParse(
+static int fts3ExprParseUnbalanced(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
const char *z, int n, /* Text of MATCH query */
Fts3Expr **ppExpr /* OUT: Parsed query structure */
){
+ static const int MAX_EXPR_DEPTH = 12;
int nParsed;
int rc;
ParseContext sParse;
n = (int)strlen(z);
}
rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
+ assert( rc==SQLITE_OK || *ppExpr==0 );
/* Check for mismatched parenthesis */
if( rc==SQLITE_OK && sParse.nNest ){
rc = SQLITE_ERROR;
+ }
+
+ return rc;
+}
+
+/*
+** Parameters z and n contain a pointer to and length of a buffer containing
+** an fts3 query expression, respectively. This function attempts to parse the
+** query expression and create a tree of Fts3Expr structures representing the
+** parsed expression. If successful, *ppExpr is set to point to the head
+** of the parsed expression tree and SQLITE_OK is returned. If an error
+** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
+** error) is returned and *ppExpr is set to 0.
+**
+** If parameter n is a negative number, then z is assumed to point to a
+** nul-terminated string and the length is determined using strlen().
+**
+** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
+** use to normalize query tokens while parsing the expression. The azCol[]
+** array, which is assumed to contain nCol entries, should contain the names
+** of each column in the target fts3 table, in order from left to right.
+** Column names must be nul-terminated strings.
+**
+** The iDefaultCol parameter should be passed the index of the table column
+** that appears on the left-hand-side of the MATCH operator (the default
+** column to match against for tokens for which a column name is not explicitly
+** specified as part of the query string), or -1 if tokens may by default
+** match any table column.
+*/
+int sqlite3Fts3ExprParse(
+ sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
+ int iLangid, /* Language id for tokenizer */
+ char **azCol, /* Array of column names for fts3 table */
+ int bFts4, /* True to allow FTS4-only syntax */
+ int nCol, /* Number of entries in azCol[] */
+ int iDefaultCol, /* Default column to query */
+ const char *z, int n, /* Text of MATCH query */
+ Fts3Expr **ppExpr /* OUT: Parsed query structure */
+){
+ static const int MAX_EXPR_DEPTH = 12;
+ int rc = fts3ExprParseUnbalanced(
+ pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
+ );
+
+ /* Rebalance the expression. And check that its depth does not exceed
+ ** MAX_EXPR_DEPTH. */
+ if( rc==SQLITE_OK && *ppExpr ){
+ rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH);
+ if( rc==SQLITE_OK ){
+ rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH);
+ }
+ }
+ if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(*ppExpr);
*ppExpr = 0;
}
return rc;
}
+/*
+** Free a single node of an expression tree.
+*/
+static void fts3FreeExprNode(Fts3Expr *p){
+ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
+ sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
+ sqlite3_free(p->aMI);
+ sqlite3_free(p);
+}
+
/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+**
+** This function would be simpler if it recursively called itself. But
+** that would mean passing a sufficiently large expression to ExprParse()
+** could cause a stack overflow.
*/
-void sqlite3Fts3ExprFree(Fts3Expr *p){
- if( p ){
- assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
- sqlite3Fts3ExprFree(p->pLeft);
- sqlite3Fts3ExprFree(p->pRight);
- sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
- sqlite3_free(p->aMI);
- sqlite3_free(p);
+void sqlite3Fts3ExprFree(Fts3Expr *pDel){
+ Fts3Expr *p;
+ assert( pDel==0 || pDel->pParent==0 );
+ for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
+ assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
+ }
+ while( p ){
+ Fts3Expr *pParent = p->pParent;
+ fts3FreeExprNode(p);
+ if( pParent && p==pParent->pLeft && pParent->pRight ){
+ p = pParent->pRight;
+ while( p && (p->pLeft || p->pRight) ){
+ assert( p==p->pParent->pRight || p==p->pParent->pLeft );
+ p = (p->pLeft ? p->pLeft : p->pRight);
+ }
+ }else{
+ p = pParent;
+ }
}
}
** the returned expression text and then freed using sqlite3_free().
*/
static char *exprToString(Fts3Expr *pExpr, char *zBuf){
+ if( pExpr==0 ){
+ return sqlite3_mprintf("");
+ }
switch( pExpr->eType ){
case FTSQUERY_PHRASE: {
Fts3Phrase *pPhrase = pExpr->pPhrase;
azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
}
- rc = sqlite3Fts3ExprParse(
- pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
- );
+ if( sqlite3_user_data(context) ){
+ rc = sqlite3Fts3ExprParse(
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+ );
+ assert( rc==SQLITE_OK || pExpr==0 );
+ }else{
+ rc = fts3ExprParseUnbalanced(
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+ );
+ }
+
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
+ sqlite3Fts3ExprFree(pExpr);
sqlite3_result_error(context, "Error parsing expression", -1);
}else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
sqlite3_result_error_nomem(context);
** with database connection db.
*/
int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
- return sqlite3_create_function(
+ int rc = sqlite3_create_function(
db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
+ -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
+ );
+ }
+ return rc;
}
#endif
--- /dev/null
+# 2009 January 1
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library. The
+# focus of this script is testing the part of the FTS3 expression
+# parser that rebalances large expressions.
+#
+# $Id: fts3expr2.test,v 1.2 2009/06/05 17:09:12 drh Exp $
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+source $testdir/malloc_common.tcl
+set ::testprefix fts3expr3
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts3 {
+ finish_test
+ return
+}
+
+set sqlite_fts3_enable_parentheses 1
+
+proc strip_phrase_data {L} {
+ if {[lindex $L 0] eq "PHRASE"} {
+ return [list P [lrange $L 3 end]]
+ }
+ return [list \
+ [lindex $L 0] \
+ [strip_phrase_data [lindex $L 1]] \
+ [strip_phrase_data [lindex $L 2]] \
+ ]
+}
+proc test_fts3expr2 {expr} {
+ strip_phrase_data [
+ db one {SELECT fts3_exprtest_rebalance('simple', $expr, 'a', 'b', 'c')}
+ ]
+}
+
+proc balanced_exprtree_structure {nEntry} {
+ set L [list]
+ for {set i 1} {$i <= $nEntry} {incr i} {
+ lappend L xxx
+ }
+ while {[llength $L] > 1} {
+ set N [list]
+ if {[llength $L] % 2} {
+ foreach {a b} [lrange $L 0 end-1] { lappend N [list AND $a $b] }
+ lappend N [lindex $L end]
+ } else {
+ foreach {a b} $L { lappend N [list AND $a $b] }
+ }
+ set L $N
+ }
+ return [lindex $L 0]
+}
+
+proc balanced_and_tree {nEntry} {
+ set query [balanced_exprtree_structure $nEntry]
+ if {$query == "xxx"} {
+ return "P 1"
+ }
+ for {set i 1} {$i <= $nEntry} {incr i} {
+ regsub xxx $query "{P $i}" query
+ }
+ return $query
+}
+
+proc random_tree_structure {nEntry bParen op} {
+ set query xxx
+ for {set i 1} {$i < $nEntry} {incr i} {
+ set x1 [expr int(rand()*4.0)]
+ set x2 [expr int(rand()*2.0)]
+ if {$x1==0 && $bParen} {
+ set query "($query)"
+ }
+ if {$x2} {
+ set query "xxx $op $query"
+ } else {
+ set query "$query $op xxx"
+ }
+ }
+ return $query
+}
+
+proc random_and_query {nEntry {bParen 0}} {
+ set query [random_tree_structure $nEntry $bParen AND]
+ for {set i 1} {$i <= $nEntry} {incr i} {
+ regsub xxx $query $i query
+ }
+ return $query
+}
+
+proc random_or_query {nEntry} {
+ set query [random_tree_structure $nEntry 1 OR]
+ for {set i 1} {$i <= $nEntry} {incr i} {
+ regsub xxx $query $i query
+ }
+ return $query
+}
+
+proc random_andor_query {nEntry} {
+ set query [random_tree_structure $nEntry 1 AND]
+ for {set i 1} {$i <= $nEntry} {incr i} {
+ regsub xxx $query "([random_or_query $nEntry])" query
+ }
+ return $query
+}
+
+proc balanced_andor_tree {nEntry} {
+ set tree [balanced_exprtree_structure $nEntry]
+ set node "{[balanced_and_tree $nEntry]}"
+ regsub -all AND $node OR node
+ regsub -all xxx $tree $node tree
+ return $tree
+}
+
+# Test that queries like "1 AND 2 AND 3 AND 4..." are transformed to
+# balanced trees by FTS.
+#
+for {set i 1} {$i < 100} {incr i} {
+ do_test 1.$i {
+ test_fts3expr2 [random_and_query $i]
+ } [balanced_and_tree $i]
+}
+
+# Same again, except with parenthesis inserted at arbitrary points.
+#
+for {set i 1} {$i < 100} {incr i} {
+ do_test 2.$i {
+ test_fts3expr2 [random_and_query $i 1]
+ } [balanced_and_tree $i]
+}
+
+# Now attempt to balance two AND trees joined by an OR.
+#
+for {set i 1} {$i < 100} {incr i} {
+ do_test 3.$i {
+ test_fts3expr2 "[random_and_query $i 1] OR [random_and_query $i 1]"
+ } [list OR [balanced_and_tree $i] [balanced_and_tree $i]]
+}
+
+# Try trees of AND nodes with leaves that are themselves trees of OR nodes.
+#
+for {set i 2} {$i < 32} {incr i} {
+ do_test 3.$i {
+ test_fts3expr2 [random_andor_query $i]
+ } [balanced_andor_tree $i]
+}
+
+# These exceed the depth limit.
+#
+for {set i 33} {$i < 40} {incr i} {
+ do_test 3.$i {
+ list [catch {test_fts3expr2 [random_andor_query $i]} msg] $msg
+ } {1 {Error parsing expression}}
+}
+
+# This also exceeds the depth limit.
+#
+do_test 4.1 {
+ set q "1"
+ for {set i 2} {$i < 5000} {incr i} {
+ append q " AND $i"
+ }
+ list [catch {test_fts3expr2 $q} msg] $msg
+} {1 {Error parsing expression}}
+
+proc create_toggle_tree {nDepth} {
+ if {$nDepth == 0} { return xxx }
+ set nNew [expr $nDepth-1]
+ if {$nDepth % 2} {
+ return "([create_toggle_tree $nNew]) OR ([create_toggle_tree $nNew])"
+ }
+ return "([create_toggle_tree $nNew]) AND ([create_toggle_tree $nNew])"
+}
+
+do_test 4.2 {
+ list [catch {test_fts3expr2 [create_toggle_tree 17]} msg] $msg
+} {1 {Error parsing expression}}
+
+set query [random_andor_query 12]
+set result [balanced_andor_tree 12]
+do_faultsim_test fts3expr3-fault-1 -faults oom-* -body {
+ test_fts3expr2 $::query
+} -test {
+ faultsim_test_result [list 0 $::result]
+}
+
+set sqlite_fts3_enable_parentheses 0
+finish_test
+
+
+
+