From: dan Date: Sat, 5 Jul 2014 07:54:01 +0000 (+0000) Subject: Add support for the "colname : " syntax to fts5. X-Git-Tag: version-3.8.11~114^2~170 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c2642d7c1e8ea94289344c4d92cd5b7013b94d4b;p=thirdparty%2Fsqlite.git Add support for the "colname : " syntax to fts5. FossilOrigin-Name: 004667106e552e832a564b77e242b86f183d4441 --- diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 5a29f04709..8a84f958ba 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -99,6 +99,7 @@ struct Fts5Parse { */ typedef struct Fts5PoslistIter Fts5PoslistIter; struct Fts5PoslistIter { + int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ @@ -116,19 +117,31 @@ static int fts5PoslistIterNext(Fts5PoslistIter *pIter){ pIter->i += getVarint32(&pIter->a[pIter->i], iVal); if( iVal==1 ){ pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - pIter->iPos = ((u64)iVal << 32); - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + if( pIter->iCol>=0 && iVal>pIter->iCol ){ + pIter->bEof = 1; + }else{ + pIter->iPos = ((u64)iVal << 32); + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + } } pIter->iPos += (iVal-2); } return pIter->bEof; } -static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ +static int fts5PoslistIterInit( + int iCol, /* If (iCol>=0), this column only */ + const u8 *a, int n, /* Poslist buffer to iterate through */ + Fts5PoslistIter *pIter /* Iterator object to initialize */ +){ memset(pIter, 0, sizeof(*pIter)); pIter->a = a; pIter->n = n; - fts5PoslistIterNext(pIter); + pIter->iCol = iCol; + do { + fts5PoslistIterNext(pIter); + }while( pIter->bEof==0 && (pIter->iPos >> 32)bEof; } typedef struct Fts5PoslistWriter Fts5PoslistWriter; @@ -325,6 +338,7 @@ static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ */ static int fts5ExprPhraseIsMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + int iCol, /* If >=0, search for matches in iCol only */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ @@ -334,6 +348,8 @@ static int fts5ExprPhraseIsMatch( int i; int rc = SQLITE_OK; + fts5BufferZero(&pPhrase->poslist); + /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ @@ -346,10 +362,9 @@ static int fts5ExprPhraseIsMatch( for(i=0; inTerm; i++){ int n; const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &n); - fts5PoslistIterInit(a, n, &aIter[i]); + if( fts5PoslistIterInit(iCol, a, n, &aIter[i]) ) goto ismatch_out; } - fts5BufferZero(&pPhrase->poslist); while( 1 ){ int bMatch; i64 iPos = aIter[0].iPos; @@ -384,6 +399,22 @@ static int fts5ExprPhraseIsMatch( } +/* +** The near-set object passed as the first argument contains more than +** one phrase. All phrases currently point to the same row. The +** Fts5ExprPhrase.poslist buffers are populated accordingly. This function +** tests if the current row contains instances of each phrase sufficiently +** close together to meet the NEAR constraint. Output variable *pbMatch +** is set to true if it does, or false otherwise. +** +** If no error occurs, SQLITE_OK is returned. Or, if an error does occur, +** an SQLite error code. If a value other than SQLITE_OK is returned, the +** final value of *pbMatch is undefined. +** +** TODO: This function should also edit the position lists associated +** with each phrase to remove any phrase instances that are not part of +** a set of intances that collectively matches the NEAR constraint. +*/ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ Fts5PoslistIter aStatic[4]; Fts5PoslistIter *aIter = aStatic; @@ -392,6 +423,8 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ int bMatch; i64 iMax; + assert( pNear->nPhrase>1 ); + /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ @@ -403,7 +436,7 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ /* Initialize a term iterator for each phrase */ for(i=0; inPhrase; i++){ Fts5Buffer *pPoslist = &pNear->apPhrase[i]->poslist; - fts5PoslistIterInit(pPoslist->p, pPoslist->n, &aIter[i]); + fts5PoslistIterInit(-1, pPoslist->p, pPoslist->n, &aIter[i]); } iMax = aIter[0].iPos; @@ -557,14 +590,14 @@ static int fts5ExprNearNextMatch( for(i=0; inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 ){ + if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); + rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); if( rc!=SQLITE_OK ) return rc; if( bMatch==0 ) break; }else{ int n; - u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n); + const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n); fts5BufferSet(&rc, &pPhrase->poslist, n, a); } } @@ -1033,6 +1066,82 @@ static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ return zNew; } +/* +** Compose a tcl-readable representation of expression pExpr. Return a +** pointer to a buffer containing that representation. It is the +** responsibility of the caller to at some point free the buffer using +** sqlite3_free(). +*/ +static char *fts5ExprPrintTcl( + Fts5Config *pConfig, + const char *zNearsetCmd, + Fts5ExprNode *pExpr +){ + char *zRet = 0; + if( pExpr->eType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pExpr->pNear; + int i; + int iTerm; + + zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); + if( pNear->iCol>=0 ){ + zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol); + if( zRet==0 ) return 0; + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); + if( zRet==0 ) return 0; + } + + zRet = fts5PrintfAppend(zRet, "--"); + if( zRet==0 ) return 0; + + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + + zRet = fts5PrintfAppend(zRet, " {"); + for(iTerm=0; zRet && iTermnTerm; iTerm++){ + char *zTerm = pPhrase->aTerm[iTerm].zTerm; + zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); + } + + if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); + if( zRet==0 ) return 0; + } + + if( zRet ) zRet = fts5PrintfAppend(zRet, "]"); + if( zRet==0 ) return 0; + + }else{ + char *zOp = 0; + char *z1 = 0; + char *z2 = 0; + switch( pExpr->eType ){ + case FTS5_AND: zOp = "&&"; break; + case FTS5_NOT: zOp = "&& !"; break; + case FTS5_OR: zOp = "||"; break; + default: assert( 0 ); + } + + z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft); + z2 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRight); + if( z1 && z2 ){ + int b1 = pExpr->pLeft->eType!=FTS5_STRING; + int b2 = pExpr->pRight->eType!=FTS5_STRING; + zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", + b1 ? "(" : "", z1, b1 ? ")" : "", + zOp, + b2 ? "(" : "", z2, b2 ? ")" : "" + ); + } + sqlite3_free(z1); + sqlite3_free(z2); + } + + return zRet; +} + static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; if( pExpr->eType==FTS5_STRING ){ @@ -1117,12 +1226,18 @@ static void fts5ExprFunction( Fts5Expr *pExpr = 0; int rc; int i; + int bTcl = sqlite3_user_data(pCtx)!=0; const char **azConfig; /* Array of arguments for Fts5Config */ + const char *zNearsetCmd = "nearset"; int nConfig; /* Size of azConfig[] */ Fts5Config *pConfig = 0; - nConfig = nArg + 2; + if( bTcl && nArg>1 ){ + zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); + } + + nConfig = nArg + 2 - bTcl; azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); if( azConfig==0 ){ sqlite3_result_error_nomem(pCtx); @@ -1131,8 +1246,8 @@ static void fts5ExprFunction( azConfig[0] = 0; azConfig[1] = "main"; azConfig[2] = "tbl"; - for(i=1; ipRoot); + char *zText; + if( bTcl ){ + zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); + }else{ + zText = fts5ExprPrint(pConfig, pExpr->pRoot); + } if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); sqlite3_free(zText); @@ -1166,9 +1286,22 @@ static void fts5ExprFunction( ** UDF with the SQLite handle passed as the only argument. */ int sqlite3Fts5ExprInit(sqlite3 *db){ - int rc = sqlite3_create_function( - db, "fts5_expr", -1, SQLITE_UTF8, 0, fts5ExprFunction, 0, 0 - ); + struct Fts5ExprFunc { + const char *z; + void *p; + void (*x)(sqlite3_context*,int,sqlite3_value**); + } aFunc[] = { + { "fts5_expr", 0, fts5ExprFunction }, + { "fts5_expr_tcl", (void*)1, fts5ExprFunction }, + }; + int i; + int rc = SQLITE_OK; + + for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){ + struct Fts5ExprFunc *p = &aFunc[i]; + rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, p->p, p->x, 0, 0); + } + return rc; } diff --git a/manifest b/manifest index f8d149a28e..0526a1e9b9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\sNEAR\sexpressions\sto\sfts5. -D 2014-07-03T20:39:39.548 +C Add\ssupport\sfor\sthe\s"colname\s:\s"\ssyntax\sto\sfts5. +D 2014-07-05T07:54:01.680 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,7 +107,7 @@ F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 F ext/fts5/fts5Int.h b7a684ff3508ab24437886f8bc873a16f494a7db F ext/fts5/fts5_buffer.c f1a26a79e2943fe4388e531fa141941b5eb6d31a F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 84dd8c1f313f795b41f3fc5f73bee013e8301b68 +F ext/fts5/fts5_expr.c 618e6641c8dc428c146ec84bf30ff0b3da6b28c7 F ext/fts5/fts5_index.c d8ab9712e38dc1beb9a9145ec89e18dc083c0467 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -594,7 +594,7 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test 4db86a9473ee2a8c2cb30e0d81df21c6022f99b6 -F test/fts5ac.test cc4fc45a85fde7fbe8da135aed6b25d2795ba9f6 +F test/fts5ac.test c7ca34a477d638195a839c961e1b572890dc5d0d F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1190,7 +1190,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2e5652e6526b8fb3f5c163168d95bc0bb4c93686 -R f894350f59d9ccf43dee7a3b5d2aafff +P 250ae8d40115e2e47cc5a1e8a427fa8c0a89124d +R 213fb14ea45e358dcb308401853c4570 U dan -Z be26a54244aa4231a7a300eba9899e25 +Z 0b8632fefc20738326985d3c409c9be8 diff --git a/manifest.uuid b/manifest.uuid index a90859e837..506ee6aff0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -250ae8d40115e2e47cc5a1e8a427fa8c0a89124d \ No newline at end of file +004667106e552e832a564b77e242b86f183d4441 \ No newline at end of file diff --git a/test/fts5ac.test b/test/fts5ac.test index d5556ef706..c4586481bf 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -137,6 +137,89 @@ do_test 1.1 { } } {} +proc phrasematch {phrase value} { + if {[string first $phrase $value]>=0} { + return 1 + } + return 0 +} + +# Usage: +# +proc nearmatch {nNear phraselist value} { + set nPhrase [llength $phraselist] + + set phraselist [string tolower $phraselist] + set value [string tolower $value] + + if {$nPhrase==1} { + set bMatch [phrasematch [lindex $phraselist 0] $value] + } else { + set nValue [llength $value] + if {$nNear >= $nValue} {set nNear [expr $nValue-1]} + + for {set i $nNear} {$i < $nValue} {incr i} { + set bMatch 1 + foreach phrase $phraselist { + set iMin [expr $i - $nNear - [llength $phrase]] + set iMax [expr $i - 1 + [llength $phrase]] + set subdoc [lrange $value $iMin $iMax] + if {![phrasematch $phrase $subdoc]} { + set bMatch 0 + break + } + } + if {$bMatch} break + } + } + return $bMatch +} + +# Usage: +# +# nearset aCol ?-near N? ?-col C? -- phrase1 phrase2... +# +proc nearset {aCol args} { + set O(-near) 10 + set O(-col) -1 + + set nOpt [lsearch -exact $args --] + if {$nOpt<0} { error "no -- option" } + + foreach {k v} [lrange $args 0 [expr $nOpt-1]] { + if {[info exists O($k)]==0} { error "unrecognized option $k" } + set O($k) $v + } + + set phraselist [lrange $args [expr $nOpt+1] end] + + set bMatch 0 + set iCol -1 + foreach col $aCol { + incr iCol + if {$O(-col)>=0 && $O(-col)!=$iCol} continue + + if {[nearmatch $O(-near) $phraselist $col]} { + set bMatch 1 + break + } + } + + return $bMatch +} + +proc matchdata {expr} { + set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] + set res [list] + foreach {id x y} $::data { + set cols [list $x $y] + if $tclexpr { + set res [concat $id $res] + } + } + return $res +} + foreach {tn phrase} { 1 "o" 2 "b q" @@ -149,16 +232,51 @@ foreach {tn phrase} { 9 "no" 10 "L O O L V V K" } { - set res [list] - foreach {id x y} $data { - set pat [string tolower $phrase] - if {[string first $pat $x]>=0 || [string first $pat $y]>=0} { - set res [concat $id $res] - } - } - set n [llength $res] - do_execsql_test 1.2.$tn.$n { - SELECT rowid FROM xx WHERE xx match '"' || $phrase || '"' + + set expr "\"$phrase\"" + set res [matchdata $expr] + + do_execsql_test 1.2.$tn.[llength $res] { + SELECT rowid FROM xx WHERE xx match $expr + } $res +} + +# Test the "nearmatch" commnad. +# +do_test 2.0 { nearmatch 2 {a b} {a x x b} } 1 +do_test 2.1 { nearmatch 2 {b a} {a x x b} } 1 +do_test 2.2 { nearmatch 1 {b a} {a x x b} } 0 +do_test 2.3 { nearmatch 1 {"a b" "c d"} {x x a b x c d} } 1 +do_test 2.4 { nearmatch 1 {"a b" "c d"} {x a b x x c d} } 0 +do_test 2.5 { nearmatch 400 {a b} {a x x b} } 1 +do_test 2.6 { nearmatch 0 {a} {a x x b} } 1 +do_test 2.7 { nearmatch 0 {b} {a x x b} } 1 + +foreach {tn expr tclexpr} { + 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} +} { + do_execsql_test 3.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr] +} + +#------------------------------------------------------------------------- +# +foreach {tn expr} { + 1 { NEAR(r c) } + 2 { NEAR(r c, 5) } + 3 { NEAR(r c, 3) } + 4 { NEAR(r c, 2) } + 5 { NEAR(r c, 0) } + 6 { NEAR(a b c) } + 7 { NEAR(a b c, 8) } + 8 { x : NEAR(r c) } + 9 { y : NEAR(r c) } + 10 { x : "r c" } + 11 { y : "r c" } +} { + + set res [matchdata $expr] + do_execsql_test 2.$tn.[llength $res] { + SELECT rowid FROM xx WHERE xx match $expr } $res }