From: dan Date: Wed, 24 Nov 2010 19:26:18 +0000 (+0000) Subject: Add code for the matchinfo 'longest common substring' feature. X-Git-Tag: version-3.7.4~36^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f9376540fa8e77b2924bba7605d705c627c1d9b4;p=thirdparty%2Fsqlite.git Add code for the matchinfo 'longest common substring' feature. FossilOrigin-Name: 71011a4f9baf09ec6935ad591145252bf3c286ed --- diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index 4d0134c652..ec468408bf 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -970,7 +970,123 @@ static int fts3MatchinfoSelectDoctotal( if( paLen ) *paLen = a; return SQLITE_OK; } + +typedef struct LcsIterator LcsIterator; +struct LcsIterator { + Fts3Expr *pExpr; /* Pointer to phrase expression */ + char *pRead; /* Cursor used to iterate through aDoclist */ + int iPosOffset; /* Tokens count up to end of this phrase */ + int iCol; /* Current column number */ + int iPos; /* Current position */ +}; + +#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; + +static int fts3MatchinfoLcsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + LcsIterator *aIter = (LcsIterator *)pCtx; + aIter[iPhrase].pExpr = pExpr; + return SQLITE_OK; +} + +static int fts3LcsIteratorAdvance(LcsIterator *pIter){ + char *pRead = pIter->pRead; + sqlite3_int64 iRead; + int rc = 0; + + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + if( iRead==0 ){ + pIter->iCol = LCS_ITERATOR_FINISHED; + rc = 1; + }else{ + if( iRead==1 ){ + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + pIter->iCol = iRead; + pIter->iPos = pIter->iPosOffset; + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + rc = 1; + } + pIter->iPos += (iRead-2); + } + + pIter->pRead = pRead; + return rc; +} +static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ + LcsIterator *aIter; + int i; + int iCol; + int nToken = 0; + + /* Allocate and populate the array of LcsIterator objects. The array + ** contains one element for each matchable phrase in the query. + **/ + aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); + if( !aIter ) return SQLITE_NOMEM; + memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); + (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + nToken -= pIter->pExpr->pPhrase->nToken; + pIter->iPosOffset = nToken; + pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); + if( pIter->pRead ){ + pIter->iPos = pIter->iPosOffset; + fts3LcsIteratorAdvance(&aIter[i]); + }else{ + pIter->iCol = LCS_ITERATOR_FINISHED; + } + } + + for(iCol=0; iColnCol; iCol++){ + int nLcs = 0; + int nLive = 0; + + for(i=0; inPhrase; i++){ + assert( aIter[i].iCol>=iCol ); + if( aIter[i].iCol==iCol ) nLive++; + } + + while( nLive>0 ){ + LcsIterator *pAdv = 0; + int nThisLcs = 0; + char *aRead; + sqlite3_int64 iRead; + + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + int nToken = pIter->pExpr->pPhrase->nToken; + + if( iCol!=pIter->iCol ){ + nThisLcs = 0; + continue; + } + + if( pAdv==0 || pIter->iPosiPos ){ + pAdv = pIter; + } + + if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ + nThisLcs++; + }else{ + nThisLcs = 1; + } + + if( nThisLcs>nLcs ) nLcs = nThisLcs; + } + if( fts3LcsIteratorAdvance(pAdv) ) nLive--; + } + + pInfo->aMatchinfo[iCol] = nLcs; + } + + sqlite3_free(aIter); +} static int fts3MatchinfoValues( Fts3Cursor *pCsr, /* FTS3 cursor object */ @@ -1048,9 +1164,12 @@ static int fts3MatchinfoValues( break; } + case FTS3_MATCHINFO_LCS: + fts3MatchinfoLcs(pCsr, pInfo); + break; - default: - assert( zArg[i]==FTS3_MATCHINFO_LCS ); + default: + assert( !"this cannot happen" ); } pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index b18c429637..945c22dfd2 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -328,6 +328,9 @@ int sqlite3Fts3SelectDocsize( return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt); } +void sqlite3Fts3MatchinfoLcs(Fts3Expr *pExpr, u32 *aOut){ +} + /* ** Similar to fts3SqlStmt(). Except, after binding the parameters in ** array apVal[] to the SQL statement identified by eStmt, the statement diff --git a/manifest b/manifest index 6722a26b90..711e3331d5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\scrashes\sthat\scan\soccur\swhen\squeries\sare\srun\son\san\sFTS4\stable\scontaining\szero\srows. -D 2010-11-24T15:02:23 +C Add\scode\sfor\sthe\smatchinfo\s'longest\scommon\ssubstring'\sfeature. +D 2010-11-24T19:26:19 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in e7a59672eaeb04408d1fa8501618d7501a3c5e39 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -69,11 +69,11 @@ F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba -F ext/fts3/fts3_snippet.c 967ca2d3201fd6555062c7e929bcc2b89ef8dcb8 +F ext/fts3/fts3_snippet.c be2648ff61a18af2d4a33eadbb26c0a6f06a6e26 F ext/fts3/fts3_tokenizer.c 1301b0ee3ef414caae3257a702215925cc48cd9c F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d -F ext/fts3/fts3_write.c 9b2db92b815fdd50b5531eb6db912c71feca6a70 +F ext/fts3/fts3_write.c b4e5b4c74f755a6f494dab9c131ad9bb04bab50c F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -440,13 +440,13 @@ F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7 F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52 F test/fts3defer.test d6cb0db9b5997ecf863d96ff419f83f8f2c87f4f -F test/fts3defer2.test 548eb2ca7e6a1515a7bc151721e223be14c51f9d +F test/fts3defer2.test da840efaedebfdd54293d04b36098e2d9872caa6 F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851 F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a F test/fts3fault.test 81fd40ceb12f33f9d16c5637d0f8d95d4556c456 F test/fts3malloc.test 9c8cc3f885bb4dfc66d0460c52f68f45e4710d1b -F test/fts3matchinfo.test 2dfdf80a927e3dc02f2e42337e5aa0b835994f6e +F test/fts3matchinfo.test 41991bd810c6896a07c19a236ba3b489b16ba970 F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844 F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2 F test/fts3rnd.test 707533ce943f490443ce5e696236bb1675a37635 @@ -889,7 +889,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P ae40b34cf7c24c9601bdfb5cbe5b20f05a376ea8 -R bf9b07886b7dc4c93620c54d1a7703f7 +P ed61fd20adef44d0f6b2345e0205b25f3641a15e +R c6aa2ef350d272455977bd9187b17fdc U dan -Z c507e6c3579e8a599c5f904476d64b95 +Z a65c681ffac166b61fab0e3fae2a5af0 diff --git a/manifest.uuid b/manifest.uuid index 753f2ab88a..716989eb82 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ed61fd20adef44d0f6b2345e0205b25f3641a15e \ No newline at end of file +71011a4f9baf09ec6935ad591145252bf3c286ed \ No newline at end of file diff --git a/test/fts3defer2.test b/test/fts3defer2.test index 32f044b1c7..142c92a9dd 100644 --- a/test/fts3defer2.test +++ b/test/fts3defer2.test @@ -91,6 +91,7 @@ foreach {tn sql} { } } { execsql $sql + do_execsql_test 2.2.$tn { SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; } [list \ diff --git a/test/fts3matchinfo.test b/test/fts3matchinfo.test index 03c4f22c8e..85b33206a0 100644 --- a/test/fts3matchinfo.test +++ b/test/fts3matchinfo.test @@ -181,6 +181,7 @@ do_execsql_test 4.1.0 { INSERT INTO t4 VALUES('a b c d e', 'f g h i j'); INSERT INTO t4 VALUES('f g h i j', 'a b c d e'); } + do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { p {3 3} c {2 2} @@ -192,8 +193,12 @@ do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { l {{5 5} {5 5}} a {{5 5} {5 5}} + s {{3 0} {0 3}} + xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - + xpxsscplax - } + do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { p {1 1} c {2 2} @@ -205,8 +210,53 @@ do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { l {{5 5} {5 5}} a {{5 5} {5 5}} + s {{0 1} {1 0}} + xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - + sxsxs - +} + +do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} } +do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} } +do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} } +do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} } + +do_execsql_test 4.2.0 { + CREATE VIRTUAL TABLE t5 USING fts4; + INSERT INTO t5 VALUES('a a a a a'); + INSERT INTO t5 VALUES('a b a b a'); + INSERT INTO t5 VALUES('c b c b c'); } +do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} { s {2 1} } +do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} } +do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} } +do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} } +do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } + +do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; +do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {x } 50000]')"; + +#do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { s {2 1} } +#do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} } +#do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} } +#do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} } +#do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +#do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } +# +#do_execsql_test 4.4.0 { +# UPDATE t5_segments +# SET block = zeroblob(length(block)) +# WHERE length(block)>10000; +#} +# +#do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} } +#do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } +#do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} } +#do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} } +#do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +#do_matchinfo_test 4.4.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } finish_test +