** dlwInit - initialize to write a given type doclistto a buffer.
** dlwDestroy - clear the writer's memory. Does not free buffer.
** dlwAppend - append raw doclist data to buffer.
+** dlwCopy - copy next doclist from reader to writer.
** dlwAdd - construct doclist element and append to buffer.
** Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter).
*/
}
pWriter->iPrevDocid = iLastDocid;
}
+static void dlwCopy(DLWriter *pWriter, DLReader *pReader){
+ dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
+ dlrDocid(pReader), dlrDocid(pReader));
+}
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
char c[VARINT_MAX];
int n = putVarint(c, iDocid-pWriter->iPrevDocid);
** plwInit - init for writing a document's poslist.
** plwDestroy - clear a writer.
** plwAdd - append position and offset information.
+** plwCopy - copy next position's data from reader to writer.
** plwTerminate - add any necessary doclist terminator.
**
** Calling plwAdd() after plwTerminate() may result in a corrupt
}
dataBufferAppend(pWriter->dlw->b, c, n);
}
+static void plwCopy(PLWriter *pWriter, PLReader *pReader){
+ plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),
+ plrStartOffset(pReader), plrEndOffset(pReader));
+}
static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){
char c[VARINT_MAX];
int n;
dlwDestroy(&writer);
}
+/* Helper function for posListUnion(). Compares the current position
+** between left and right, returning as standard C idiom of <0 if
+** left<right, >0 if left>right, and 0 if left==right. "End" always
+** compares greater.
+*/
+static int posListCmp(PLReader *pLeft, PLReader *pRight){
+ assert( pLeft->iType==pRight->iType );
+ if( pLeft->iType==DL_DOCIDS ) return 0;
+
+ if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1;
+ if( plrAtEnd(pRight) ) return -1;
+
+ if( plrColumn(pLeft)<plrColumn(pRight) ) return -1;
+ if( plrColumn(pLeft)>plrColumn(pRight) ) return 1;
+
+ if( plrPosition(pLeft)<plrPosition(pRight) ) return -1;
+ if( plrPosition(pLeft)>plrPosition(pRight) ) return 1;
+ if( pLeft->iType==DL_POSITIONS ) return 0;
+
+ if( plrStartOffset(pLeft)<plrStartOffset(pRight) ) return -1;
+ if( plrStartOffset(pLeft)>plrStartOffset(pRight) ) return 1;
+
+ if( plrEndOffset(pLeft)<plrEndOffset(pRight) ) return -1;
+ if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1;
+
+ return 0;
+}
+
+/* Write the union of position lists in pLeft and pRight to pOut.
+** "Union" in this case meaning "All unique position tuples". Should
+** work with any doclist type, though both inputs and the output
+** should be the same type.
+*/
+static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
+ PLReader left, right;
+ PLWriter writer;
+
+ assert( dlrDocid(pLeft)==dlrDocid(pRight) );
+ assert( pLeft->iType==pRight->iType );
+ assert( pLeft->iType==pOut->iType );
+
+ plrInit(&left, pLeft);
+ plrInit(&right, pRight);
+ plwInit(&writer, pOut, dlrDocid(pLeft));
+
+ while( !plrAtEnd(&left) || !plrAtEnd(&right) ){
+ int c = posListCmp(&left, &right);
+ if( c<0 ){
+ plwCopy(&writer, &left);
+ plrStep(&left);
+ }else if( c>0 ){
+ plwCopy(&writer, &right);
+ plrStep(&right);
+ }else{
+ plwCopy(&writer, &left);
+ plrStep(&left);
+ plrStep(&right);
+ }
+ }
+
+ plwTerminate(&writer);
+ plwDestroy(&writer);
+ plrDestroy(&left);
+ plrDestroy(&right);
+}
+
+/* Write the union of doclists in pLeft and pRight to pOut. For
+** docids in common between the inputs, the union of the position
+** lists is written. Inputs and outputs are always type DL_DEFAULT.
+*/
+static void docListUnion(
+ const char *pLeft, int nLeft,
+ const char *pRight, int nRight,
+ DataBuffer *pOut /* Write the combined doclist here */
+){
+ DLReader left, right;
+ DLWriter writer;
+
+ if( nLeft==0 ){
+ dataBufferAppend(pOut, pRight, nRight);
+ return;
+ }
+ if( nRight==0 ){
+ dataBufferAppend(pOut, pLeft, nLeft);
+ return;
+ }
+
+ dlrInit(&left, DL_DEFAULT, pLeft, nLeft);
+ dlrInit(&right, DL_DEFAULT, pRight, nRight);
+ dlwInit(&writer, DL_DEFAULT, pOut);
+
+ while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
+ if( dlrAtEnd(&right) ){
+ dlwCopy(&writer, &left);
+ dlrStep(&left);
+ }else if( dlrAtEnd(&left) ){
+ dlwCopy(&writer, &right);
+ dlrStep(&right);
+ }else if( dlrDocid(&left)<dlrDocid(&right) ){
+ dlwCopy(&writer, &left);
+ dlrStep(&left);
+ }else if( dlrDocid(&left)>dlrDocid(&right) ){
+ dlwCopy(&writer, &right);
+ dlrStep(&right);
+ }else{
+ posListUnion(&left, &right, &writer);
+ dlrStep(&left);
+ dlrStep(&right);
+ }
+ }
+
+ dlrDestroy(&left);
+ dlrDestroy(&right);
+ dlwDestroy(&writer);
+}
+
/* pLeft and pRight are DLReaders positioned to the same docid.
**
** If there are no instances in pLeft or pRight where the position
** include the positions from pRight that are one more than a
** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1.
*/
-static void mergePosList(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
+static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight,
+ DLWriter *pOut){
PLReader left, right;
PLWriter writer;
int match = 0;
}else if( dlrDocid(&right)<dlrDocid(&left) ){
dlrStep(&right);
}else{
- mergePosList(&left, &right, &writer);
+ posListPhraseMerge(&left, &right, &writer);
dlrStep(&left);
dlrStep(&right);
}
}
}
-/* strcmp-style comparison of pReader's current term against pTerm. */
+/* strcmp-style comparison of pReader's current term against pTerm.
+** If isPrefix, equality means equal through nTerm bytes.
+*/
static int leafReaderTermCmp(LeafReader *pReader,
- const char *pTerm, int nTerm){
+ const char *pTerm, int nTerm, int isPrefix){
int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm;
if( n==0 ){
if( pReader->term.nData>0 ) return -1;
c = memcmp(pReader->term.pData, pTerm, n);
if( c!=0 ) return c;
+ if( isPrefix && n==nTerm ) return 0;
return pReader->term.nData - nTerm;
}
if( leavesReaderAtEnd(lr2) ) return -1;
return leafReaderTermCmp(&lr1->leafReader,
- leavesReaderTerm(lr2), leavesReaderTermBytes(lr2));
+ leavesReaderTerm(lr2), leavesReaderTermBytes(lr2),
+ 0);
}
/* Similar to leavesReaderTermCmp(), with additional ordering by idx
** Internal function for loadSegmentLeaf().
*/
static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader,
- const char *pTerm, int nTerm, DataBuffer *out){
+ const char *pTerm, int nTerm, int isPrefix,
+ DataBuffer *out){
assert( nTerm>0 );
/* Process while the prefix matches. */
** on a better name. [Meanwhile, break encapsulation rather than
** use a confusing name.]
*/
- int rc, c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm);
+ int rc;
+ int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix);
if( c==0 ){
const char *pData = leavesReaderData(pReader);
int nData = leavesReaderDataBytes(pReader);
- assert( out->nData==0 );
- dataBufferReplace(out, pData, nData);
+ if( out->nData==0 ){
+ dataBufferReplace(out, pData, nData);
+ }else{
+ DataBuffer result;
+ dataBufferInit(&result, out->nData+nData);
+ docListUnion(out->pData, out->nData, pData, nData, &result);
+ dataBufferDestroy(out);
+ *out = result;
+ /* TODO(shess) Rather than destroy out, we could retain it for
+ ** later reuse.
+ */
+ }
}
- if( c>=0 ) break; /* Past any possible matches. */
+ if( c>0 ) break; /* Past any possible matches. */
rc = leavesReaderStep(v, pReader);
if( rc!=SQLITE_OK ) return rc;
/* Call loadSegmentLeavesInt() with pData/nData as input. */
static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
- const char *pTerm, int nTerm, DataBuffer *out){
+ const char *pTerm, int nTerm, int isPrefix,
+ DataBuffer *out){
LeavesReader reader;
int rc;
rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader);
if( rc!=SQLITE_OK ) return rc;
- rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
+ rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
leavesReaderReset(&reader);
leavesReaderDestroy(&reader);
return rc;
*/
static int loadSegmentLeaves(fulltext_vtab *v,
sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
- const char *pTerm, int nTerm, DataBuffer *out){
+ const char *pTerm, int nTerm, int isPrefix,
+ DataBuffer *out){
int rc;
LeavesReader reader;
rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
if( rc!=SQLITE_OK ) return rc;
- rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
+ rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
leavesReaderReset(&reader);
leavesReaderDestroy(&reader);
return rc;
DataBuffer *out){
/* Special case where root is a leaf. */
if( *pData=='\0' ){
- assert( !isPrefix ); /* TODO(shess) Add prefix support. */
- return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out);
+ return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out);
}else{
int rc;
sqlite_int64 iStartChild, iEndChild;
assert( iStartChild<=iLeavesEnd );
assert( iEndChild<=iLeavesEnd );
- assert( !isPrefix ); /* TODO(shess) Add prefix support. */
- return loadSegmentLeaves(v, iStartChild, iEndChild, pTerm, nTerm, out);
+ return loadSegmentLeaves(v, iStartChild, iEndChild,
+ pTerm, nTerm, isPrefix, out);
}
}
** merge its doclist over *out (any duplicate doclists read from the
** segment rooted at pData will overwrite those in *out).
*/
-/* NOTE(shess) Previous code passed out down to sub-routines for use
-** in docListMerge(). This version deoptimizes things slightly, but
-** prefix searches require a different merge function entirely.
-*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
sqlite_int64 iLeavesEnd,
const char *pTerm, int nTerm, int isPrefix,