From: dan Date: Wed, 7 May 2014 19:59:36 +0000 (+0000) Subject: Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer... X-Git-Tag: version-3.8.5~49 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d2d8ca600e5bb14050799f9246d29335dc87a476;p=thirdparty%2Fsqlite.git Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters. FossilOrigin-Name: e21bf7a2ade6373e94ea403c665f78e1ad22143f --- diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index 9c71f26ba1..95a9b1aada 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -185,40 +185,23 @@ static int getNextToken( int rc; sqlite3_tokenizer_cursor *pCursor; Fts3Expr *pRet = 0; - int nConsumed = 0; + int i = 0; - rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor); + /* Set variable i to the maximum number of bytes of input to tokenize. */ + for(i=0; iiLangid, z, i, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; int nByte; /* total space to allocate */ rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); - - if( (rc==SQLITE_OK || rc==SQLITE_DONE) && sqlite3_fts3_enable_parentheses ){ - int i; - if( rc==SQLITE_DONE ) iStart = n; - for(i=0; inNest++; - rc = fts3ExprParse(pParse, &z[i+1], n-i-1, &pRet, &nConsumed); - if( rc==SQLITE_OK && !pRet ){ - rc = SQLITE_DONE; - } - nConsumed = (int)(i + 1 + nConsumed); - break; - } - - if( z[i]==')' ){ - rc = SQLITE_DONE; - pParse->nNest--; - nConsumed = i+1; - break; - } - } - } - - if( nConsumed==0 && rc==SQLITE_OK ){ + if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; pRet = (Fts3Expr *)fts3MallocZero(nByte); if( !pRet ){ @@ -252,13 +235,14 @@ static int getNextToken( } } - nConsumed = iEnd; + *pnConsumed = iEnd; + }else if( i && rc==SQLITE_DONE ){ + rc = SQLITE_OK; } pModule->xClose(pCursor); } - *pnConsumed = nConsumed; *ppExpr = pRet; return rc; } @@ -508,6 +492,21 @@ static int getNextNode( return getNextString(pParse, &zInput[1], ii-1, ppExpr); } + if( sqlite3_fts3_enable_parentheses ){ + if( *zInput=='(' ){ + int nConsumed = 0; + pParse->nNest++; + rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); + if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } + *pnConsumed = (int)(zInput - z) + 1 + nConsumed; + return rc; + }else if( *zInput==')' ){ + pParse->nNest--; + *pnConsumed = (zInput - z) + 1; + *ppExpr = 0; + return SQLITE_DONE; + } + } /* If control flows to this point, this must be a regular token, or ** the end of the input. Read a regular token using the sqlite3_tokenizer @@ -626,96 +625,100 @@ static int fts3ExprParse( while( rc==SQLITE_OK ){ Fts3Expr *p = 0; int nByte = 0; + rc = getNextNode(pParse, zIn, nIn, &p, &nByte); + assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); if( rc==SQLITE_OK ){ - int isPhrase; - - if( !sqlite3_fts3_enable_parentheses - && p->eType==FTSQUERY_PHRASE && pParse->isNot - ){ - /* Create an implicit NOT operator. */ - Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); - if( !pNot ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_NOMEM; - goto exprparse_out; - } - pNot->eType = FTSQUERY_NOT; - pNot->pRight = p; - p->pParent = pNot; - if( pNotBranch ){ - pNot->pLeft = pNotBranch; - pNotBranch->pParent = pNot; - } - pNotBranch = pNot; - p = pPrev; - }else{ - int eType = p->eType; - isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); - - /* The isRequirePhrase variable is set to true if a phrase or - ** an expression contained in parenthesis is required. If a - ** binary operator (AND, OR, NOT or NEAR) is encounted when - ** isRequirePhrase is set, this is a syntax error. - */ - if( !isPhrase && isRequirePhrase ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } - - if( isPhrase && !isRequirePhrase ){ - /* Insert an implicit AND operator. */ - Fts3Expr *pAnd; - assert( pRet && pPrev ); - pAnd = fts3MallocZero(sizeof(Fts3Expr)); - if( !pAnd ){ + if( p ){ + int isPhrase; + + if( !sqlite3_fts3_enable_parentheses + && p->eType==FTSQUERY_PHRASE && pParse->isNot + ){ + /* Create an implicit NOT operator. */ + Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); + if( !pNot ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } - pAnd->eType = FTSQUERY_AND; - insertBinaryOperator(&pRet, pPrev, pAnd); - pPrev = pAnd; - } + pNot->eType = FTSQUERY_NOT; + pNot->pRight = p; + p->pParent = pNot; + if( pNotBranch ){ + pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; + } + pNotBranch = pNot; + p = pPrev; + }else{ + int eType = p->eType; + isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); + + /* The isRequirePhrase variable is set to true if a phrase or + ** an expression contained in parenthesis is required. If a + ** binary operator (AND, OR, NOT or NEAR) is encounted when + ** isRequirePhrase is set, this is a syntax error. + */ + if( !isPhrase && isRequirePhrase ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } - /* This test catches attempts to make either operand of a NEAR - ** operator something other than a phrase. For example, either of - ** the following: - ** - ** (bracketed expression) NEAR phrase - ** phrase NEAR (bracketed expression) - ** - ** Return an error in either case. - */ - if( pPrev && ( + if( isPhrase && !isRequirePhrase ){ + /* Insert an implicit AND operator. */ + Fts3Expr *pAnd; + assert( pRet && pPrev ); + pAnd = fts3MallocZero(sizeof(Fts3Expr)); + if( !pAnd ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; + } + pAnd->eType = FTSQUERY_AND; + insertBinaryOperator(&pRet, pPrev, pAnd); + pPrev = pAnd; + } + + /* This test catches attempts to make either operand of a NEAR + ** operator something other than a phrase. For example, either of + ** the following: + ** + ** (bracketed expression) NEAR phrase + ** phrase NEAR (bracketed expression) + ** + ** Return an error in either case. + */ + if( pPrev && ( (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) - )){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } - - if( isPhrase ){ - if( pRet ){ - assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); - pPrev->pRight = p; - p->pParent = pPrev; + )){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } + + if( isPhrase ){ + if( pRet ){ + assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); + pPrev->pRight = p; + p->pParent = pPrev; + }else{ + pRet = p; + } }else{ - pRet = p; + insertBinaryOperator(&pRet, pPrev, p); } - }else{ - insertBinaryOperator(&pRet, pPrev, p); + isRequirePhrase = !isPhrase; } - isRequirePhrase = !isPhrase; + pPrev = p; } assert( nByte>0 ); } assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); nIn -= nByte; zIn += nByte; - pPrev = p; } if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ diff --git a/manifest b/manifest index 77f24441e5..98fa92f28f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Make\ssure\sthe\sgroup_concat()\sfunction\sreturns\san\sempty\sstring,\snot\sa\sNULL,\s\nif\sit\shas\sat\sleast\sone\sinput\srow.\sFix\sfor\sticket\s[55746f9e65f8587]. -D 2014-05-07T18:23:04.692 +C Fix\sthe\sway\sparenthesis\sin\sMATCH\sexpressions\sare\shandled\sby\sFTS\sif\sthe\stokenizer\sconsiders\sthem\sto\sbe\stoken\scharacters. +D 2014-05-07T19:59:36.446 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in dd2b1aba364ff9b05de41086f74407f285c57670 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -82,7 +82,7 @@ F ext/fts3/fts3.c 41b1920b9a8657963f09cb93b208c2671c5568db F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3Int.h bdeb9015405e8facffb8fc7e09174521a2a780f4 F ext/fts3/fts3_aux.c 5c211e17a64885faeb16b9ba7772f9d5445c2365 -F ext/fts3/fts3_expr.c 5165c365cb5a035f5be8bb296f7aa3211d43e4ac +F ext/fts3/fts3_expr.c 2ac35bda474f00c14c19608e49a02c8c7ceb9970 F ext/fts3/fts3_hash.c 29b986e43f4e9dd40110eafa377dc0d63c422c60 F ext/fts3/fts3_hash.h 39cf6874dc239d6b4e30479b1975fe5b22a3caaf F ext/fts3/fts3_icu.c e319e108661147bcca8dd511cd562f33a1ba81b5 @@ -541,13 +541,14 @@ F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7 F test/fts3d.test 597b0b76e41f0d672e2731c4d7b631d628efd13f F test/fts3defer.test 0be4440b73a2e651fc1e472066686d6ada4b9963 -F test/fts3defer2.test a3b6cbeabaf28c9398652a4d101ea224d9358479 +F test/fts3defer2.test e880e3b65bdf999f4746cdaefa65f14a98b9b724 F test/fts3defer3.test dd53fc13223c6d8264a98244e9b19abd35ed71cd F test/fts3drop.test 1b906e293d6773812587b3dc458cb9e8f3f0c297 F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851 -F test/fts3expr.test 06f1a96facc8f3e4b1ad5cc001dc5c8e83e68b9f +F test/fts3expr.test 3401d47b229c4504424caf362cc4ff704cad4162 F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a F test/fts3expr3.test 9e91b8edbcb197bf2e92161aa7696446d96dce5f +F test/fts3expr4.test 0713d94ab951ed88a8c3629a4889a48c55c4067c F test/fts3fault.test cb72dccb0a3b9f730f16c5240f3fcb9303eb1660 F test/fts3fault2.test 3198eef2804deea7cac8403e771d9cbcb752d887 F test/fts3first.test dbdedd20914c8d539aa3206c9b34a23775644641 @@ -1169,8 +1170,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01 F tool/win/sqlite.vsix a94fb9b1b1ef06efc2898975cdfcfa9643731f5e -P 1a0d7d3d9dd54b783e3a805961287dd01f94770c f03fbf3700d9d4a654e3aa2e5caa810a8416bed9 -R aaf6cc2f3f5ec9bee45f0a384deaafae -T +closed f03fbf3700d9d4a654e3aa2e5caa810a8416bed9 -U drh -Z fad451fce86c7077ae7b04e526f1422c +P d01cedaa73d8f9e5502502a1068a9509d1de295c +R 28138eb84e0bfcefa0d0799bd622d421 +U dan +Z 3413f41d3a51a750e0d3b3c404322221 diff --git a/manifest.uuid b/manifest.uuid index ff91e39b9f..d0d7c962bf 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d01cedaa73d8f9e5502502a1068a9509d1de295c \ No newline at end of file +e21bf7a2ade6373e94ea403c665f78e1ad22143f \ No newline at end of file diff --git a/test/fts3defer2.test b/test/fts3defer2.test index 608371f15d..87af52461b 100644 --- a/test/fts3defer2.test +++ b/test/fts3defer2.test @@ -59,6 +59,7 @@ do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} + do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; diff --git a/test/fts3expr.test b/test/fts3expr.test index 192219f142..6e23faf633 100644 --- a/test/fts3expr.test +++ b/test/fts3expr.test @@ -509,4 +509,9 @@ do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah} do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah} set sqlite_fts3_enable_parentheses 0 + +do_test fts3expr-9.1 { + test_fts3expr "f (e NEAR/2 a)" +} {AND {PHRASE 3 0 f} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} + finish_test diff --git a/test/fts3expr4.test b/test/fts3expr4.test new file mode 100644 index 0000000000..94737971b4 --- /dev/null +++ b/test/fts3expr4.test @@ -0,0 +1,57 @@ +# 2014 May 7 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS3 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts3expr4 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3||!icu { + finish_test + return +} + +set sqlite_fts3_enable_parentheses 1 + +proc test_icu_fts3expr {expr} { + db one {SELECT fts3_exprtest('icu', $expr, 'a', 'b', 'c')} +} + +proc do_icu_expr_test {tn expr res} { + uplevel [list do_test $tn [list test_icu_fts3expr $expr] $res] +} + +#------------------------------------------------------------------------- +# +do_icu_expr_test 1.1 "abcd" {PHRASE 3 0 abcd} +do_icu_expr_test 1.2 " tag " {PHRASE 3 0 tag} +do_icu_expr_test 1.3 {"x y z"} {PHRASE 3 0 x y z} +do_icu_expr_test 1.4 {x OR y} {OR {PHRASE 3 0 x} {PHRASE 3 0 y}} +do_icu_expr_test 1.5 {(x OR y)} {OR {PHRASE 3 0 x} {PHRASE 3 0 y}} +do_icu_expr_test 1.6 { "(x OR y)" } {PHRASE 3 0 ( x or y )} + +# In "col:word", if "col" is not the name of a column, the entire thing +# is passed to the tokenizer. +# +do_icu_expr_test 1.7 {a:word} {PHRASE 0 0 word} +do_icu_expr_test 1.8 {d:word} {PHRASE 3 0 d:word} + +set sqlite_fts3_enable_parentheses 0 + +do_icu_expr_test 2.1 { + f (e NEAR/2 a) +} {AND {AND {AND {PHRASE 3 0 f} {PHRASE 3 0 (}} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} {PHRASE 3 0 )}} + +finish_test +