From: dan Date: Tue, 18 Oct 2011 19:39:41 +0000 (+0000) Subject: If a token within an FTS query is prefixed with a '^' character, it must be the first... X-Git-Tag: version-3.7.9~18^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3f1ea8d114e20a377a12d5a7a88e5b4f6cc76e60;p=thirdparty%2Fsqlite.git If a token within an FTS query is prefixed with a '^' character, it must be the first token in a column of data to match. FossilOrigin-Name: 63ac33c860eb32ce96699f06bf83121cec2ffaca --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 951d131667..289adeb4df 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -2347,6 +2347,67 @@ static void fts3DoclistPhraseMerge( *pnRight = p - aOut; } +/* +** When this function is called, pList points to a doclist containing position +** data, length *pnList bytes. This removes all entries from the doclist that +** do not correspond to the first token in a column and overwrites pList +** with the result. *pnList is set to the length of the new doclist before +** returning. +** +** If bDescDoclist is true, then both the input and output are in descending +** order. Otherwise, ascending. +*/ +static void fts3DoclistFirstFilter( + int bDescDoclist, /* True if pList is a descending doclist */ + char *pList, /* Buffer containing doclist */ + int *pnList /* IN/OUT: Size of doclist */ +){ + char *p = pList; + char *pOut = pList; + char *pEnd = &pList[*pnList]; + + sqlite3_int64 iDoc; + sqlite3_int64 iPrev; + int bFirstOut = 0; + + fts3GetDeltaVarint3(&p, pEnd, 0, &iDoc); + while( p ){ + int bWritten = 0; + if( *p!=0x01 ){ + if( *p==0x02 ){ + fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); + *pOut++ = 0x02; + bWritten = 1; + } + fts3ColumnlistCopy(0, &p); + } + + while( *p==0x01 ){ + sqlite3_int64 iCol; + p++; + p += sqlite3Fts3GetVarint(p, &iCol); + if( *p==0x02 ){ + if( bWritten==0 ){ + fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); + bWritten = 1; + } + pOut += sqlite3Fts3PutVarint(pOut, iCol); + *pOut++ = 0x02; + } + fts3ColumnlistCopy(0, &p); + } + if( bWritten ){ + *pOut++ = 0x00; + } + + assert( *p==0x00 ); + p++; + fts3GetDeltaVarint3(&p, pEnd, bDescDoclist, &iDoc); + } + + *pnList = (pOut - pList); +} + /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single @@ -3518,6 +3579,10 @@ static void fts3EvalPhraseMergeToken( ){ assert( iToken!=p->iDoclistToken ); + if( p->aToken[iToken].bFirst ){ + fts3DoclistFirstFilter(pTab->bDescIdx, pList, &nList); + } + if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; @@ -3721,6 +3786,7 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ && p->nToken==1 && pFirst->pSegcsr && pFirst->pSegcsr->bLookup + && pFirst->bFirst==0 ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 552d73d764..c9b291c6cc 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -310,6 +310,7 @@ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ + int bFirst; /* True if token must appear at position 0 */ /* Variables above this point are populated when the expression is ** parsed (by code in fts3_expr.c). Below this point the variables are diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index 7eb2962d44..e6193a1392 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -180,9 +180,21 @@ static int getNextToken( pRet->pPhrase->aToken[0].isPrefix = 1; iEnd++; } - if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ - pParse->isNot = 1; + + while( 1 ){ + if( !sqlite3_fts3_enable_parentheses + && iStart>0 && z[iStart-1]=='-' + ){ + pParse->isNot = 1; + iStart--; + }else if( iStart>0 && z[iStart-1]=='^' ){ + pRet->pPhrase->aToken[0].bFirst = 1; + iStart--; + }else{ + break; + } } + } nConsumed = iEnd; } @@ -281,6 +293,7 @@ static int getNextString( pToken->n = nByte; pToken->isPrefix = (iEndbFirst = (iBegin>0 && zInput[iBegin-1]=='^'); nToken = ii+1; } } diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 47bed0dd5e..40c8e2f9ad 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -3117,6 +3117,7 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ Fts3PhraseToken *pPT = pDef->pToken; if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->bFirst==0 || iPos==0) && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) ){ diff --git a/manifest b/manifest index 78cf5c490d..4ab3fba4d2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Cherrypick\spatch\s[3126754c72]\sfrom\sthe\strunk\sinto\sthe\scontent=\sbranch. -D 2011-10-18T12:49:59.086 +C If\sa\stoken\swithin\san\sFTS\squery\sis\sprefixed\swith\sa\s'^'\scharacter,\sit\smust\sbe\sthe\sfirst\stoken\sin\sa\scolumn\sof\sdata\sto\smatch. +D 2011-10-18T19:39:41.203 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in a162fe39e249b8ed4a65ee947c30152786cfe897 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -62,11 +62,11 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 15e1725f3dc7c0028676831d82b376e93b87527e +F ext/fts3/fts3.c e12a151b5f0f8d444744554f91dbb89dbf0654df F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h 06f442ce096e6254432a6b16a56b6fe7b24bd372 +F ext/fts3/fts3Int.h bc27eebe2c5919115aa1858fdd308a230af6a359 F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691 -F ext/fts3/fts3_expr.c 23791de01b3a5d313d76e02befd2601d4096bc2b +F ext/fts3/fts3_expr.c dd0facbede8fd7d1376670cc6154f1fef3a4c5bc F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c 6c8f395cdf9e1e3afa7fadb7e523dbbf381c6dfa @@ -77,7 +77,7 @@ F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894 F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68 -F ext/fts3/fts3_write.c 06520aa8a0a32a7bed08b29a9004fde1cb7f0318 +F ext/fts3/fts3_write.c 567380f2d6671df16cfbb56324b321c71d5ab0d3 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -469,13 +469,14 @@ F test/fts3corrupt.test 7b0f91780ca36118d73324ec803187208ad33b32 F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7 F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52 -F test/fts3defer.test ffd4e07f79a09660d4b3e2613b041ab9b6100d91 +F test/fts3defer.test b7bdf79da91365b00e7c21d70e9d0c617b9306b9 F test/fts3defer2.test 35867d33ba6db03f6c73bd6f5fc333ae14f68c81 F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851 F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a F test/fts3fault.test f83e556465bb69dc8bc676339eca408dce4ca246 F test/fts3fault2.test b62a2bc843c20414405f80e5eeb78e39bc68fe53 +F test/fts3first.test 10f42914701d559c9fabfd7725b56c9f1b542fe8 F test/fts3malloc.test b86ea33db9e8c58c0c2f8027a9fcadaf6a1568be F test/fts3matchinfo.test 6507fe1c342e542300d65ea637d4110eccf894e6 F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844 @@ -621,7 +622,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806 F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16 F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 -F test/permutations.test d850b5000a13baf042d5a20eb747079477dad45e +F test/permutations.test 522823b47238cb1754198f80817fe9f9158ede55 F test/pragma.test c8108e01da04f16e67e5754e610bc62c1b993f6c F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947 F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea @@ -966,7 +967,7 @@ F tool/symbols.sh caaf6ccc7300fd43353318b44524853e222557d5 F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh b7fdb2cc525f5ef4fa43c80e771636dd3690f9d2 -P 289ee43179369fce2fde50870d72c445e184e896 -R 50f1458a2cdbee488c53894b18c71839 +P f9b5b217088a6aeb25eba184ab92d1a842a680a6 +R 4545768a323f698040377ce29e450e89 U dan -Z e075e042ac91a648318521977fb348f1 +Z a7972271e048cbba7c3ba6dcd813d2e6 diff --git a/manifest.uuid b/manifest.uuid index f94940003d..1ccbaef9b8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f9b5b217088a6aeb25eba184ab92d1a842a680a6 \ No newline at end of file +63ac33c860eb32ce96699f06bf83121cec2ffaca \ No newline at end of file diff --git a/test/fts3defer.test b/test/fts3defer.test index ab30bbe2c3..7fbe6b14ea 100644 --- a/test/fts3defer.test +++ b/test/fts3defer.test @@ -426,6 +426,13 @@ foreach {tn setup} { SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"' } {8 15 26 92 96} } + + do_select_test 7.1 { + SELECT rowid FROM t1 WHERE t1 MATCH '^zm mjpavjuhw' + } {56 62} + do_select_test 7.2 { + SELECT rowid FROM t1 WHERE t1 MATCH '^azavwm zm' + } {43} } set testprefix fts3defer diff --git a/test/fts3first.test b/test/fts3first.test new file mode 100644 index 0000000000..6e309d0d9d --- /dev/null +++ b/test/fts3first.test @@ -0,0 +1,79 @@ +# 2011 October 18 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/malloc_common.tcl + +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE x1 USING FTS4(a, b, c); + INSERT INTO x1(docid,a,b,c) VALUES(0, 'K H D S T', 'V M N Y K', 'S Z N Q S'); + INSERT INTO x1(docid,a,b,c) VALUES(1, 'K N J L W', 'S Z W J Q', 'D U W S E'); + INSERT INTO x1(docid,a,b,c) VALUES(2, 'B P M O I', 'R P H W S', 'R J L L E'); + INSERT INTO x1(docid,a,b,c) VALUES(3, 'U R Q M L', 'M J K A V', 'Q W J T J'); + INSERT INTO x1(docid,a,b,c) VALUES(4, 'N J C Y N', 'R U D X V', 'B O U A Q'); + INSERT INTO x1(docid,a,b,c) VALUES(5, 'Q L X L U', 'I F N X S', 'U Q A N Y'); + INSERT INTO x1(docid,a,b,c) VALUES(6, 'M R G U T', 'U V I Q P', 'X Y D L S'); + INSERT INTO x1(docid,a,b,c) VALUES(7, 'D Y P O I', 'X J P K R', 'V O T H V'); + INSERT INTO x1(docid,a,b,c) VALUES(8, 'R Y D L R', 'U U E S J', 'N W L M R'); + INSERT INTO x1(docid,a,b,c) VALUES(9, 'Z P F N P', 'W A X D U', 'V A E Q A'); + INSERT INTO x1(docid,a,b,c) VALUES(10, 'Q I A Q M', 'N D K H C', 'A H T Q Z'); + INSERT INTO x1(docid,a,b,c) VALUES(11, 'T E R Q B', 'C I B C B', 'F Z U W R'); + INSERT INTO x1(docid,a,b,c) VALUES(12, 'E S V U W', 'T P F W H', 'A M D J Q'); + INSERT INTO x1(docid,a,b,c) VALUES(13, 'X S B T Y', 'U D N D P', 'X Z Y G F'); + INSERT INTO x1(docid,a,b,c) VALUES(14, 'K H A B L', 'S R C C Z', 'D W E H J'); + INSERT INTO x1(docid,a,b,c) VALUES(15, 'C E U C C', 'W F M N M', 'T Z U X T'); + INSERT INTO x1(docid,a,b,c) VALUES(16, 'Q G C G H', 'H N N B H', 'B Q I H Y'); + INSERT INTO x1(docid,a,b,c) VALUES(17, 'Q T S K B', 'W B D Y N', 'V J P E C'); + INSERT INTO x1(docid,a,b,c) VALUES(18, 'A J M O Q', 'L G Y Y A', 'G N M R N'); + INSERT INTO x1(docid,a,b,c) VALUES(19, 'T R Y P Y', 'N V Y B X', 'L Z T N T'); + + CREATE VIRTUAL TABLE x2 USING FTS4(a, b, c, order=DESC); + INSERT INTO x2(docid, a, b, c) SELECT docid, a, b, c FROM x1; +} + +foreach x {1 2} { + foreach {tn match res} { + 1 "^K" {0 1 14} + 2 "^S" {0 1 14} + 3 "^W" {9 15 17} + 4 "^J" {} + 5 "^E" {12} + 6 "V ^-E" {0 3 4 6 7 9 17 19} + 7 "V -^E" {0 3 4 6 7 9 17 19} + 8 "^-E V" {0 3 4 6 7 9 17 19} + 9 "-^E V" {0 3 4 6 7 9 17 19} + 10 "V" {0 3 4 6 7 9 12 17 19} + + 11 {"^K H"} {0 14} + 12 {"K H"} {0 10 14} + 13 {"K ^H"} {} + } { + set rev [list] + for {set ii [expr [llength $res]-1]} {$ii>=0} {incr ii -1} { + lappend rev [lindex $res $ii] + } + do_execsql_test 1.$x.$tn.1 {SELECT docid FROM x1 WHERE x1 MATCH $match} $res + do_execsql_test 1.$x.$tn.2 {SELECT docid FROM x2 WHERE x2 MATCH $match} $rev + } + + do_execsql_test 1.$x.[expr $tn+1] { + INSERT INTO x1(x1) VALUES('optimize'); + INSERT INTO x2(x2) VALUES('optimize'); + } {} +} + +finish_test diff --git a/test/permutations.test b/test/permutations.test index 14330d0cc1..7c3b026c67 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -185,6 +185,7 @@ test_suite "fts3" -prefix "" -description { fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test + fts3first.test }