From: dan Date: Sat, 12 Mar 2016 16:32:16 +0000 (+0000) Subject: Fix handling of strings that contain zero tokens in fts5. And other problems found... X-Git-Tag: version-3.12.0~63 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e8c20120ce388253bcf5671b2aaf77348305272d;p=thirdparty%2Fsqlite.git Fix handling of strings that contain zero tokens in fts5. And other problems found by fuzzing. FossilOrigin-Name: 72b3ff0f0df83e62adda6584b4281cf086d45e45 --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index f9e1567fb6..35f15abbaf 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -701,6 +701,12 @@ Fts5ExprNode *sqlite3Fts5ParseNode( Fts5ExprNearset *pNear ); +Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( + Fts5Parse *pParse, + Fts5ExprNode *pLeft, + Fts5ExprNode *pRight +); + Fts5ExprPhrase *sqlite3Fts5ParseTerm( Fts5Parse *pParse, Fts5ExprPhrase *pPhrase, diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index ccc154c3b4..17fc43e011 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -442,7 +442,9 @@ static const char *fts5ConfigGobbleWord( *pbQuoted = 1; }else{ zRet = fts5ConfigSkipBareword(zIn); - zOut[zRet-zIn] = '\0'; + if( zRet ){ + zOut[zRet-zIn] = '\0'; + } } } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index d3f801b022..153d1c6472 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1268,6 +1268,8 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ if( Fts5NodeIsString(pNode) ){ /* Initialize all term iterators in the NEAR object. */ rc = fts5ExprNearInitAll(pExpr, pNode); + }else if( pNode->xNext==0 ){ + pNode->bEof = 1; }else{ int i; int nEof = 0; @@ -1319,23 +1321,22 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ */ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ Fts5ExprNode *pRoot = p->pRoot; - int rc = SQLITE_OK; - if( pRoot->xNext ){ - p->pIndex = pIdx; - p->bDesc = bDesc; - rc = fts5ExprNodeFirst(p, pRoot); + int rc; /* Return code */ - /* If not at EOF but the current rowid occurs earlier than iFirst in - ** the iteration order, move to document iFirst or later. */ - if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){ - rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); - } + p->pIndex = pIdx; + p->bDesc = bDesc; + rc = fts5ExprNodeFirst(p, pRoot); - /* If the iterator is not at a real match, skip forward until it is. */ - while( pRoot->bNomatch ){ - assert( pRoot->bEof==0 && rc==SQLITE_OK ); - rc = fts5ExprNodeNext(p, pRoot, 0, 0); - } + /* If not at EOF but the current rowid occurs earlier than iFirst in + ** the iteration order, move to document iFirst or later. */ + if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){ + rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); + } + + /* If the iterator is not at a real match, skip forward until it is. */ + while( pRoot->bNomatch ){ + assert( pRoot->bEof==0 && rc==SQLITE_OK ); + rc = fts5ExprNodeNext(p, pRoot, 0, 0); } return rc; } @@ -1476,8 +1477,7 @@ static int fts5ParseTokenize( /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; - assert( pPhrase==0 || pPhrase->nTerm>0 ); - if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ + if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); @@ -1578,7 +1578,7 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; - }else if( sCtx.pPhrase ){ + }else{ if( pAppend==0 ){ if( (pParse->nPhrase % 8)==0 ){ @@ -1595,9 +1595,14 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( pParse->nPhrase++; } + if( sCtx.pPhrase==0 ){ + /* This happens when parsing a token or quoted phrase that contains + ** no token characters at all. (e.g ... MATCH '""'). */ + sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase)); + }else if( sCtx.pPhrase->nTerm ){ + sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; + } pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; - assert( sCtx.pPhrase->nTerm>0 ); - sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } return sCtx.pPhrase; @@ -1693,23 +1698,25 @@ void sqlite3Fts5ParseSetDistance( Fts5ExprNearset *pNear, Fts5Token *p ){ - int nNear = 0; - int i; - if( p->n ){ - for(i=0; in; i++){ - char c = (char)p->p[i]; - if( c<'0' || c>'9' ){ - sqlite3Fts5ParseError( - pParse, "expected integer, got \"%.*s\"", p->n, p->p - ); - return; + if( pNear ){ + int nNear = 0; + int i; + if( p->n ){ + for(i=0; in; i++){ + char c = (char)p->p[i]; + if( c<'0' || c>'9' ){ + sqlite3Fts5ParseError( + pParse, "expected integer, got \"%.*s\"", p->n, p->p + ); + return; + } + nNear = nNear * 10 + (p->p[i] - '0'); } - nNear = nNear * 10 + (p->p[i] - '0'); + }else{ + nNear = FTS5_DEFAULT_NEARDIST; } - }else{ - nNear = FTS5_DEFAULT_NEARDIST; + pNear->nNear = nNear; } - pNear->nNear = nNear; } /* @@ -1896,10 +1903,14 @@ Fts5ExprNode *sqlite3Fts5ParseNode( int iPhrase; for(iPhrase=0; iPhrasenPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; + if( pNear->apPhrase[iPhrase]->nTerm==0 ){ + pRet->xNext = 0; + pRet->eType = FTS5_EOF; + } } if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL - && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm!=1) + && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm>1) ){ assert( pParse->rc==SQLITE_OK ); pParse->rc = SQLITE_ERROR; @@ -1928,6 +1939,70 @@ Fts5ExprNode *sqlite3Fts5ParseNode( return pRet; } +Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( + Fts5Parse *pParse, /* Parse context */ + Fts5ExprNode *pLeft, /* Left hand child expression */ + Fts5ExprNode *pRight /* Right hand child expression */ +){ + Fts5ExprNode *pRet = 0; + Fts5ExprNode *pPrev; + + if( pParse->rc ){ + sqlite3Fts5ParseNodeFree(pLeft); + sqlite3Fts5ParseNodeFree(pRight); + }else{ + + assert( pLeft->eType==FTS5_STRING + || pLeft->eType==FTS5_TERM + || pLeft->eType==FTS5_EOF + || pLeft->eType==FTS5_AND + ); + assert( pRight->eType==FTS5_STRING + || pRight->eType==FTS5_TERM + || pRight->eType==FTS5_EOF + ); + + if( pLeft->eType==FTS5_AND ){ + pPrev = pLeft->apChild[pLeft->nChild-1]; + }else{ + pPrev = pLeft; + } + assert( pPrev->eType==FTS5_STRING + || pPrev->eType==FTS5_TERM + || pPrev->eType==FTS5_EOF + ); + + if( pRight->eType==FTS5_EOF ){ + assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] ); + sqlite3Fts5ParseNodeFree(pRight); + pRet = pLeft; + pParse->nPhrase--; + } + else if( pPrev->eType==FTS5_EOF ){ + Fts5ExprPhrase **ap; + + if( pPrev==pLeft ){ + pRet = pRight; + }else{ + pLeft->apChild[pLeft->nChild-1] = pRight; + pRet = pLeft; + } + + ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; + assert( ap[0]==pPrev->pNear->apPhrase[0] ); + memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); + pParse->nPhrase--; + + sqlite3Fts5ParseNodeFree(pPrev); + } + else{ + pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0); + } + } + + return pRet; +} + static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ int nByte = 0; Fts5ExprTerm *p; @@ -2062,6 +2137,9 @@ static char *fts5ExprPrintTcl( static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; + if( pExpr->eType==0 ){ + return sqlite3_mprintf("\"\""); + }else if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; @@ -2122,7 +2200,7 @@ static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ zRet = 0; }else{ int e = pExpr->apChild[i]->eType; - int b = (e!=FTS5_STRING && e!=FTS5_TERM); + int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF); zRet = fts5PrintfAppend(zRet, "%s%s%z%s", (i==0 ? "" : zOp), (b?"(":""), z, (b?")":"") diff --git a/ext/fts5/fts5parse.y b/ext/fts5/fts5parse.y index 2bdf4b09b2..1607d3846a 100644 --- a/ext/fts5/fts5parse.y +++ b/ext/fts5/fts5parse.y @@ -104,7 +104,7 @@ expr(A) ::= exprlist(X). {A = X;} exprlist(A) ::= cnearset(X). {A = X;} exprlist(A) ::= exprlist(X) cnearset(Y). { - A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0); + A = sqlite3Fts5ParseImplicitAnd(pParse, X, Y); } cnearset(A) ::= nearset(X). { diff --git a/ext/fts5/test/fts5eb.test b/ext/fts5/test/fts5eb.test index 8205396047..69418aae63 100644 --- a/ext/fts5/test/fts5eb.test +++ b/ext/fts5/test/fts5eb.test @@ -33,12 +33,12 @@ foreach {tn expr res} { 1 {abc} {"abc"} 2 {abc ""} {"abc"} 3 {""} {} - 4 {abc OR ""} {"abc"} - 5 {abc NOT ""} {"abc"} - 6 {abc AND ""} {"abc"} - 7 {"" OR abc} {"abc"} - 8 {"" NOT abc} {"abc"} - 9 {"" AND abc} {"abc"} + 4 {abc OR ""} {"abc" OR ""} + 5 {abc NOT ""} {"abc" NOT ""} + 6 {abc AND ""} {"abc" AND ""} + 7 {"" OR abc} {"" OR "abc"} + 8 {"" NOT abc} {"" NOT "abc"} + 9 {"" AND abc} {"" AND "abc"} 10 {abc + "" + def} {"abc" + "def"} 11 {abc "" def} {"abc" AND "def"} 12 {r+e OR w} {"r" + "e" OR "w"} diff --git a/ext/fts5/test/fts5fuzz1.test b/ext/fts5/test/fts5fuzz1.test new file mode 100644 index 0000000000..638620d025 --- /dev/null +++ b/ext/fts5/test/fts5fuzz1.test @@ -0,0 +1,89 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +return_if_no_fts5 +set testprefix fts5fuzz1 + + +#------------------------------------------------------------------------- +reset_db +do_catchsql_test 1.1 { + CREATE VIRTUAL TABLE f1 USING fts5(a b); +} {/1 {parse error in.*}/} + + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE f1 USING fts5(a, b); + INSERT INTO f1 VALUES('a b', 'c d'); + INSERT INTO f1 VALUES('e f', 'a b'); +} + +do_execsql_test 2.2.1 { + SELECT rowid FROM f1('""'); +} {} + +do_execsql_test 2.2.2 { + SELECT rowid FROM f1('"" AND a'); +} {} + + +do_execsql_test 2.2.3 { + SELECT rowid FROM f1('"" a'); +} {1 2} + +do_execsql_test 2.2.4 { + SELECT rowid FROM f1('"" OR a'); +} {1 2} + +do_execsql_test 2.3 { + SELECT a, b FROM f1('NEAR("")'); +} {} + +do_execsql_test 2.4 { + SELECT a, b FROM f1('NEAR("", 5)'); +} {} + +do_execsql_test 2.5 { + SELECT a, b FROM f1('NEAR("" c, 5)'); +} {} + +do_execsql_test 2.6 { + SELECT a, b FROM f1('NEAR("" c d, 5)'); +} {} + +do_execsql_test 2.7 { + SELECT a, b FROM f1('NEAR(c d, 5)'); +} {{a b} {c d}} + +do_execsql_test 2.8 { + SELECT rowid FROM f1('NEAR("a" "b", 5)'); +} {1 2} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 3.2 { + CREATE VIRTUAL TABLE f2 USING fts5(o, t, tokenize="ascii separators abc"); + SELECT * FROM f2('a+4'); +} {} + + + + + +finish_test + + diff --git a/manifest b/manifest index eb0ce7dd6e..b9bfda53ca 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Do\snot\sallow\sthe\suse\sof\sWAL\smode\swith\snolock=1\sbecause\sit\sdoes\snot\swork. -D 2016-03-11T23:07:30.911 +C Fix\shandling\sof\sstrings\sthat\scontain\szero\stokens\sin\sfts5.\sAnd\sother\sproblems\sfound\sby\sfuzzing. +D 2016-03-12T16:32:16.002 F Makefile.in f53429fb2f313c099283659d0df6f20f932c861f F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc df0bf9ff7f8b3f4dd9fb4cc43f92fe58f6ec5c66 @@ -98,11 +98,11 @@ F ext/fts3/unicode/mkunicode.tcl 2debed3f582d77b3fdd0b8830880250021571fd8 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h ff9c2782e8ed890b0de2f697a8d63971939e70c7 -F ext/fts5/fts5Int.h 59e13423371512df1992532a08fe80518244f96b +F ext/fts5/fts5Int.h 4060504b7979601d99e1385c2b5713036854979a F ext/fts5/fts5_aux.c daa57fb45216491814520bbb587e97bf81ced458 F ext/fts5/fts5_buffer.c 4c1502d4c956cd092c89ce4480867f9d8bf325cd -F ext/fts5/fts5_config.c abd3ae1107ca8b8def7445a0a3b70c2e46d05986 -F ext/fts5/fts5_expr.c 8e8e4635f655133eb39018072fc0f0942a2c4337 +F ext/fts5/fts5_config.c 5af9c360e99669d29f06492c370892394aba0857 +F ext/fts5/fts5_expr.c c4166ba0de2e87c444a7eedc8623e32653138ca0 F ext/fts5/fts5_hash.c f3a7217c86eb8f272871be5f6aa1b6798960a337 F ext/fts5/fts5_index.c d4f0c12e4f04bbc3a06b6da052039f2ce3e45438 F ext/fts5/fts5_main.c b8501e1a6a11591c53b18ce7aea7e5386cfb0421 @@ -114,7 +114,7 @@ F ext/fts5/fts5_tokenize.c 2ce7b44183538ec46b7907726262ee43ffdd39a8 F ext/fts5/fts5_unicode2.c b450b209b157d598f7b9df9f837afb75a14c24bf F ext/fts5/fts5_varint.c a5aceacda04dafcbae725413d7a16818ecd65738 F ext/fts5/fts5_vocab.c dba72ca393d71c2588548b51380387f6b44c77a8 -F ext/fts5/fts5parse.y 86fe6ba094a47e02fe8be2571539e6833d197764 +F ext/fts5/fts5parse.y fcc5e92e570d38cab38488b2109cbf67468923b2 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl b01c584144b5064f30e6c648145a2dd6bc440841 F ext/fts5/test/fts5aa.test 7e814df4a0e6c22a6fe2d84f210fdc0b5068a084 @@ -146,7 +146,7 @@ F ext/fts5/test/fts5detail.test ef5c690535a797413acaf5ad9b8ab5d49972df69 F ext/fts5/test/fts5dlidx.test 13871a14641017ae42f6f1055a8067bafd44cb3d F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b F ext/fts5/test/fts5ea.test b01e3a18cdfabbff8104a96a5242a06a68a998a0 -F ext/fts5/test/fts5eb.test 021aa80b7ac09b964249aa32ced9ee908703e4aa +F ext/fts5/test/fts5eb.test c516ae0c934be6fd29ec95ea8b5f11f461311535 F ext/fts5/test/fts5fault1.test e09040d3e17b8c0837101e8c79c8a874c4376fb7 F ext/fts5/test/fts5fault2.test d8c6c7f916ccbdfc10b2c69530e9dd3bc8313232 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 @@ -159,6 +159,7 @@ F ext/fts5/test/fts5fault9.test e10e395428a9ea0596ebe752ff7123d16ab78e08 F ext/fts5/test/fts5faultA.test fa5d59c0ff62b7125cd14eee38ded1c46e15a7ea F ext/fts5/test/fts5faultB.test 92ae906284062bf081b6c854afa54dcb1aa9ef88 F ext/fts5/test/fts5full.test 6f6143af0c6700501d9fd597189dfab1555bb741 +F ext/fts5/test/fts5fuzz1.test 7a6411c39959d8f4cb8f11e840a787e74a3bfbef F ext/fts5/test/fts5hash.test 06f9309ccb4d5050a131594e9e47d0b21456837d F ext/fts5/test/fts5integrity.test f5e4f8d284385875068ad0f3e894ce43e9de835d F ext/fts5/test/fts5matchinfo.test f7dde99697bcb310ea8faa8eb2714d9f4dfc0e1b @@ -1455,7 +1456,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 83efcdebfa255cd0663ecc6b74eb99183e2f3a10 -R 95ba30529f9bfc1f4da2d6ab93334cdc -U drh -Z 5b477c118f77a96b5b851b408af856c1 +P 74f5d3b07f6e5e977858c73957c6f9337ae3ca3e +R 561f4a7c15c16c270cfb03e11b79c482 +U dan +Z 78cc445fb6679cc3e05486871c5dafe8 diff --git a/manifest.uuid b/manifest.uuid index 3b0b773e3e..00db2d08b1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -74f5d3b07f6e5e977858c73957c6f9337ae3ca3e \ No newline at end of file +72b3ff0f0df83e62adda6584b4281cf086d45e45 \ No newline at end of file