From: dan Date: Fri, 21 Jun 2013 17:30:47 +0000 (+0000) Subject: Add the "notindexed" option to fts4. X-Git-Tag: version-3.8.0~136^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8def92bac38f32eb0876fb769dfdc52c7779e929;p=thirdparty%2Fsqlite.git Add the "notindexed" option to fts4. FossilOrigin-Name: 8ff2b8f5948ccddce70102e6d68d464c66e4f7ca --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 374d690688..ccd2345bf1 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -1081,6 +1081,8 @@ static int fts3InitVtab( char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ char *zContent = 0; /* content=? parameter (or NULL) */ char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ + char **azNotindexed = 0; /* The set of notindexed= columns */ + int nNotindexed = 0; /* Size of azNotindexed[] array */ assert( strlen(argv[0])==4 ); assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) @@ -1090,9 +1092,19 @@ static int fts3InitVtab( nDb = (int)strlen(argv[1]) + 1; nName = (int)strlen(argv[2]) + 1; - aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) ); - if( !aCol ) return SQLITE_NOMEM; - memset((void *)aCol, 0, sizeof(const char *) * (argc-2)); + nByte = sizeof(const char *) * (argc-2); + aCol = (const char **)sqlite3_malloc(nByte); + if( aCol ){ + memset(aCol, 0, nByte); + azNotindexed = (char **)sqlite3_malloc(nByte); + } + if( azNotindexed ){ + memset(azNotindexed, 0, nByte); + } + if( !aCol || !azNotindexed ){ + rc = SQLITE_NOMEM; + goto fts3_init_out; + } /* Loop through all of the arguments passed by the user to the FTS3/4 ** module (i.e. all the column names and special arguments). This loop @@ -1131,7 +1143,8 @@ static int fts3InitVtab( { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ { "order", 5 }, /* 4 -> ORDER */ { "content", 7 }, /* 5 -> CONTENT */ - { "languageid", 10 } /* 6 -> LANGUAGEID */ + { "languageid", 10 }, /* 6 -> LANGUAGEID */ + { "notindexed", 10 } /* 7 -> NOTINDEXED */ }; int iOpt; @@ -1197,6 +1210,11 @@ static int fts3InitVtab( zLanguageid = zVal; zVal = 0; break; + + case 7: /* NOTINDEXED */ + azNotindexed[nNotindexed++] = zVal; + zVal = 0; + break; } } sqlite3_free(zVal); @@ -1268,6 +1286,7 @@ static int fts3InitVtab( nByte = sizeof(Fts3Table) + /* Fts3Table */ nCol * sizeof(char *) + /* azColumn */ nIndex * sizeof(struct Fts3Index) + /* aIndex */ + nCol * sizeof(u8) + /* abNotindexed */ nName + /* zName */ nDb + /* zDb */ nString; /* Space for azColumn strings */ @@ -1301,9 +1320,10 @@ static int fts3InitVtab( for(i=0; iaIndex[i].hPending, FTS3_HASH_STRING, 1); } + p->abNotindexed = (u8 *)&p->aIndex[nIndex]; /* Fill in the zName and zDb fields of the vtab structure. */ - zCsr = (char *)&p->aIndex[nIndex]; + zCsr = (char *)&p->abNotindexed[nCol]; p->zName = zCsr; memcpy(zCsr, argv[2], nName); zCsr += nName; @@ -1324,7 +1344,26 @@ static int fts3InitVtab( assert( zCsr <= &((char *)p)[nByte] ); } - if( (zCompress==0)!=(zUncompress==0) ){ + /* Fill in the abNotindexed array */ + for(iCol=0; iColazColumn[iCol]); + for(i=0; iazColumn[iCol], zNot, n) ){ + p->abNotindexed[iCol] = 1; + sqlite3_free(zNot); + azNotindexed[i] = 0; + } + } + } + for(i=0; inColumn+2; i++){ - const char *zText = (const char *)sqlite3_value_text(apVal[i]); - int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]); - if( rc!=SQLITE_OK ){ - return rc; + int iCol = i-2; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_value_text(apVal[i]); + int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); + if( rc!=SQLITE_OK ){ + return rc; + } + aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); } - aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); } return SQLITE_OK; } @@ -1052,9 +1055,12 @@ static void fts3DeleteTerms( int iLangid = langidFromSelect(p, pSelect); rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ - const char *zText = (const char *)sqlite3_column_text(pSelect, i); - rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]); - aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + int iCol = i-1; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pSelect, i); + rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + } } if( rc!=SQLITE_OK ){ sqlite3_reset(pSelect); @@ -3296,9 +3302,11 @@ static int fts3DoRebuild(Fts3Table *p){ rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ - const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); - rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); - aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); + if( p->abNotindexed[iCol]==0 ){ + const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); + rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); + } } if( p->bHasDocsize ){ fts3InsertDocsize(&rc, p, aSz); @@ -5101,35 +5109,37 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ iDocid = sqlite3_column_int64(pCsr->pStmt, 0); for(i=0; inColumn && rc==SQLITE_OK; i++){ - const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); - sqlite3_tokenizer_cursor *pTC = 0; - - rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); - while( rc==SQLITE_OK ){ - char const *zToken; /* Buffer containing token */ - int nToken = 0; /* Number of bytes in token */ - int iDum1 = 0, iDum2 = 0; /* Dummy variables */ - int iPos = 0; /* Position of token in zText */ - - rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); - for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ - Fts3PhraseToken *pPT = pDef->pToken; - if( (pDef->iCol>=p->nColumn || pDef->iCol==i) - && (pPT->bFirst==0 || iPos==0) - && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) - ){ - fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); + if( p->abNotindexed[i]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); + sqlite3_tokenizer_cursor *pTC = 0; + + rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ + + rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + Fts3PhraseToken *pPT = pDef->pToken; + if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->bFirst==0 || iPos==0) + && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) + ){ + fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); + } } } + if( pTC ) pModule->xClose(pTC); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; } - if( pTC ) pModule->xClose(pTC); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } - - for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ - if( pDef->pList ){ - rc = fts3PendingListAppendVarint(&pDef->pList, 0); + + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + if( pDef->pList ){ + rc = fts3PendingListAppendVarint(&pDef->pList, 0); + } } } } diff --git a/manifest b/manifest index 4b1070228c..949e9c0e1f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Disable\sposix_fallocate()\sfor\sall\ssystems,\sall\sthe\stime,\sunless\sthe\nHAVE_POSIX_FALLOCATE\scompile-time\smacro\sis\ssupplied. -D 2013-06-20T14:07:37.671 +C Add\sthe\s"notindexed"\soption\sto\sfts4. +D 2013-06-21T17:30:47.476 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -78,9 +78,9 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 931b3c83abdd1ab3bb389b2130431c2a9ff73b91 +F ext/fts3/fts3.c 3cb4afd0accf903fbe66e2cc48d6f44e5ccb8a76 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h cb4df04cf886d9920a71df9e8faaa5aae2fa48c6 +F ext/fts3/fts3Int.h 2659cdfc8cde3f981ea99b2f60383e6239f9f4d5 F ext/fts3/fts3_aux.c b02632f6dd0e375ce97870206d914ea6d8df5ccd F ext/fts3/fts3_expr.c f8eb1046063ba342c7114eba175cabb31c4a64e7 F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 @@ -96,7 +96,7 @@ F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9 F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d -F ext/fts3/fts3_write.c 6a1fc0e922e76b68e594bf7bc33bac72af9dc47b +F ext/fts3/fts3_write.c cd264daa4f92b8eb6d61245333d0e3b147e8fd80 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 @@ -550,6 +550,7 @@ F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891 F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test c19c85ca1faa7b6d536832b49c12e1867235f584 +F test/fts4noti.test 7710af8ad41b23571d62c0cee2a062c30ecb8012 F test/fts4unicode.test c8ac44217bf6c17812b03eaafa6c06995ad304c2 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test b0fc34fdc36897769651975a2b0a606312753643 @@ -706,7 +707,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16 F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test 4614301e38398df7fdd5f28f4ed8f272b328251b -F test/permutations.test d997a947ab8aabb15f763d50a030b3c11e8ef1b6 +F test/permutations.test 7161be2569550924a8a437272acabfe9e6f48b86 F test/pragma.test 5e7de6c32a5d764f09437d2025f07e4917b9e178 F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1093,7 +1094,10 @@ F tool/vdbe-compress.tcl f12c884766bd14277f4fcedcae07078011717381 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac -P 2b2ade92788be623af6f57e37d98994be2cec142 -R 4c8df645fee7190a620dee659859568b -U drh -Z 4cfe85260540c90b4ee0898bd7491d71 +P b674462243138fcee192ef05d434665e30c681c4 +R 977878ca999ebdf95aeef1dd83187bfc +T *branch * fts4-notindexed +T *sym-fts4-notindexed * +T -sym-trunk * +U dan +Z 6bb2b422fa6b5870298aeb31440d85b9 diff --git a/manifest.uuid b/manifest.uuid index 225d72de72..ddcb0bcc50 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b674462243138fcee192ef05d434665e30c681c4 \ No newline at end of file +8ff2b8f5948ccddce70102e6d68d464c66e4f7ca \ No newline at end of file diff --git a/test/fts4noti.test b/test/fts4noti.test new file mode 100644 index 0000000000..4a49933d0a --- /dev/null +++ b/test/fts4noti.test @@ -0,0 +1,111 @@ +# 2013 June 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the notindexed=xxx FTS4 option. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set ::testprefix fts4noti + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + + +#------------------------------------------------------------------------- +# Test that typos in "notindexed=" column names are detected. +# +do_execsql_test 1.0 { + CREATE TABLE cc(a, b, c); +} +foreach {tn arg res} { + 1 "(b, c, notindexed=a)" {1 {no such column: a}} + 2 "(a, b, notindexed=a)" {0 {}} + 3 "(a, b, notindexed=a, notindexed=a)" {0 {}} + 4 "(notindexed=a, a, b)" {0 {}} + 5 "(notindexed=a, notindexed=b, notindexed=c, a, b, c, d)" {0 {}} + 6 "(notindexed=a, notindexed=B, notindexed=c, a, b, c, d)" {0 {}} + 7 "(notindexed=a, notindexed=b, notindexed=c, a, B, c, d)" {0 {}} + 8 "(notindexed=d, content=cc)" {1 {no such column: d}} + 9 "(notindexed=a, content=cc)" {0 {}} +} { + do_catchsql_test 1.$tn "CREATE VIRTUAL TABLE t1 USING fts4 $arg" $res + if {[lindex $res 0]==0} { execsql "DROP TABLE t1" } +} + +do_execsql_test 1.x { SELECT name FROM sqlite_master } {cc} + + +#------------------------------------------------------------------------- +# Test that notindexed columns are not indexed. +# +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, notindexed=b); + INSERT INTO t1 VALUES('one two', 'three four', 'five six'); + INSERT INTO t1 VALUES('three four', 'five six', 'one two'); +} +do_execsql_test 2.2 { SELECT docid FROM t1 WHERE t1 MATCH 'one' } {1 2} +do_execsql_test 2.3 { SELECT docid FROM t1 WHERE t1 MATCH 'three' } {2} +do_execsql_test 2.4 { SELECT docid FROM t1 WHERE t1 MATCH 'five' } {1} + +do_execsql_test 2.5 { INSERT INTO t1(t1) VALUES('optimize') } + +do_execsql_test 2.6 { SELECT docid FROM t1 WHERE t1 MATCH 'one' } {1 2} +do_execsql_test 2.7 { SELECT docid FROM t1 WHERE t1 MATCH 'three' } {2} +do_execsql_test 2.8 { SELECT docid FROM t1 WHERE t1 MATCH 'five' } {1} + +do_execsql_test 2.9 { INSERT INTO t1(t1) VALUES('rebuild') } + +do_execsql_test 2.10 { SELECT docid FROM t1 WHERE t1 MATCH 'one' } {1 2} +do_execsql_test 2.11 { SELECT docid FROM t1 WHERE t1 MATCH 'three' } {2} +do_execsql_test 2.12 { SELECT docid FROM t1 WHERE t1 MATCH 'five' } {1} + +do_execsql_test 2.13 { + SELECT * FROM t1 WHERE docid=1 +} {{one two} {three four} {five six}} +do_execsql_test 2.14 { + SELECT * FROM t1 WHERE docid=2 +} {{three four} {five six} {one two}} + +do_execsql_test 2.x { DROP TABLE t1 } + +#------------------------------------------------------------------------- +# Test that notindexed columns are not scanned for deferred tokens. +# + +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t2 USING fts4(x, y, notindexed=x); +} +do_test 3.2 { + set v [string repeat " 1" 50000] + set v1 "x $v" + set v2 "y $v" + execsql { + INSERT INTO t2 VALUES(1, 'x y z'); + INSERT INTO t2 VALUES(2, $v1); + INSERT INTO t2 VALUES(3, $v2); + INSERT INTO t2 VALUES(4, $v2); + INSERT INTO t2 VALUES(5, $v2); + INSERT INTO t2 VALUES(6, $v2); + } +} {} + +do_execsql_test 3.3 { SELECT x FROM t2 WHERE t2 MATCH '2' } {} +do_execsql_test 3.4 { SELECT x FROM t2 WHERE t2 MATCH '1' } {2 3 4 5 6} +do_execsql_test 3.5 { SELECT x FROM t2 WHERE t2 MATCH 'x' } {1 2} +do_execsql_test 3.6 { SELECT x FROM t2 WHERE t2 MATCH 'x 1' } {2} + + + +finish_test diff --git a/test/permutations.test b/test/permutations.test index bc3ceb8085..d035a1afa5 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -194,7 +194,7 @@ test_suite "fts3" -prefix "" -description { fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test - fts4check.test fts4unicode.test + fts4check.test fts4unicode.test fts4noti.test } test_suite "nofaultsim" -prefix "" -description {