#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
#define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */
+#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
+
/*
** Details:
**
** 5. Segment doclist indexes:
**
** A list of varints - the first docid on each page (starting with the
-** second) of the doclist. First element in the list is a literal docid.
-** Each docid thereafter is a (negative) delta.
+** first termless page) of the doclist. First element in the list is a
+** literal docid. Each docid thereafter is a (negative) delta. If there
+** are no docids at all on a page, a 0x00 byte takes the place of the
+** delta value.
*/
/*
** (1<<HEIGHT_BITS). This is because the rowid address space for nodes
** with such a height is used by doclist indexes.
*/
-#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_SEGMENT_HEIGHT_BITS)-1)
+#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1)
/*
** The rowid for the doclist index associated with leaf page pgno of segment
Fts5Buffer buf; /* Buffer containing page data */
Fts5Buffer term; /* Buffer containing previous term on page */
};
-
struct Fts5SegWriter {
int iIdx; /* Index to write to */
int iSegid; /* Segid to write to */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
+ Fts5Buffer dlidx; /* Doclist index */
+ i64 iDlidxPrev; /* Previous rowid appended to dlidx */
+ int bDlidxPrevValid; /* True if iDlidxPrev is valid */
};
/*
**
** iLeaf: The page number of the leaf page the entry points to.
**
-** term: A split-key that all terms on leaf page $leaf must be greater
+** term: A split-key that all terms on leaf page $iLeaf must be greater
** than or equal to. The "term" associated with the first b-tree
** hierarchy entry (the one that points to leaf page 1) is always
** an empty string.
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){
+ if( pSeg->pgnoFirst==0 ){
+ /* This happens if the segment is being used as an input to an incremental
+ ** merge and all data has already been "trimmed". See function
+ ** fts5TrimSegments() for details. In this case leave the iterator empty.
+ ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
+ ** at EOF already. */
+ assert( pIter->pLeaf==0 );
+ return;
+ }
if( p->rc==SQLITE_OK ){
memset(pIter, 0, sizeof(*pIter));
return i;
}
+/*
+** If an "nEmpty" record must be written to the b-tree before the next
+** term, write it now.
+*/
+static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
+ if( pWriter->nEmpty ){
+ Fts5PageWriter *pPg = &pWriter->aWriter[1];
+ int bFlag = 0;
+ if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
+ i64 iKey = FTS5_DOCLIST_IDX_ROWID(
+ pWriter->iIdx, pWriter->iSegid,
+ pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
+ );
+ fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n);
+ bFlag = 1;
+ }
+ fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
+ fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
+ pWriter->nEmpty = 0;
+ }
+
+ /* Whether or not it was written to disk, zero the doclist index at this
+ ** point */
+ sqlite3Fts5BufferZero(&pWriter->dlidx);
+ pWriter->bDlidxPrevValid = 0;
+}
+
/*
** This is called once for each leaf page except the first that contains
}
pPage = &pWriter->aWriter[iHeight];
- if( pWriter->nEmpty ){
- assert( iHeight==1 );
- fts5BufferAppendVarint(&p->rc, &pPage->buf, 0);
- fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->nEmpty);
- pWriter->nEmpty = 0;
- }
+ fts5WriteBtreeNEmpty(p, pWriter);
if( pPage->buf.n>=p->pgsz ){
/* pPage will be written to disk. The term will be written into the
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter /* Writer object */
){
+ if( pWriter->bFirstRowidInPage ){
+ /* No rowids on this page. Append an 0x00 byte to the current
+ ** doclist-index */
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, 0);
+ }
pWriter->nEmpty++;
}
+/*
+** Rowid iRowid has just been appended to the current leaf page. As it is
+** the first on its page, append an entry to the current doclist-index.
+*/
+static void fts5WriteDlidxAppend(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ i64 iRowid
+){
+ i64 iVal;
+ if( pWriter->bDlidxPrevValid ){
+ iVal = pWriter->iDlidxPrev - iRowid;
+ }else{
+ iVal = iRowid;
+ }
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, iVal);
+ pWriter->bDlidxPrevValid = 1;
+ pWriter->iDlidxPrev = iRowid;
+}
+
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
Fts5PageWriter *pPage = &pWriter->aWriter[0];
Fts5PageWriter *pPage = &pWriter->aWriter[0];
/* If this is to be the first docid written to the page, set the
- ** docid-pointer in the page-header. */
- if( pWriter->bFirstRowidInPage ) fts5PutU16(pPage->buf.p, pPage->buf.n);
+ ** docid-pointer in the page-header. Also append a value to the dlidx
+ ** buffer, in case a doclist-index is required. */
+ if( pWriter->bFirstRowidInPage ){
+ fts5PutU16(pPage->buf.p, pPage->buf.n);
+ fts5WriteDlidxAppend(p, pWriter, iRowid);
+ }
/* Write the docid. */
if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
fts5WriteAppendZerobyte(p, pWriter);
}
+/*
+** Flush any data cached by the writer object to the database. Free any
+** allocations associated with the writer.
+*/
static void fts5WriteFinish(
Fts5Index *p,
- Fts5SegWriter *pWriter,
- int *pnHeight,
- int *pnLeaf
+ Fts5SegWriter *pWriter, /* Writer object */
+ int *pnHeight, /* OUT: Height of the b-tree */
+ int *pnLeaf /* OUT: Number of leaf pages in b-tree */
){
int i;
*pnLeaf = pWriter->aWriter[0].pgno;
*pnHeight = pWriter->nWriter;
fts5WriteFlushLeaf(p, pWriter);
- if( pWriter->nWriter>1 && pWriter->nEmpty ){
- Fts5PageWriter *pPg = &pWriter->aWriter[1];
- fts5BufferAppendVarint(&p->rc, &pPg->buf, 0);
- fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
+ if( pWriter->nWriter>1 ){
+ fts5WriteBtreeNEmpty(p, pWriter);
}
for(i=1; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
fts5BufferFree(&pPg->buf);
}
sqlite3_free(pWriter->aWriter);
+ sqlite3Fts5BufferFree(&pWriter->dlidx);
}
static void fts5WriteInit(
a = sqlite3_value_blob(apVal[1]);
fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno);
+ if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
+ int i = 0;
+ i64 iPrev;
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)",
+ iIdx, iSegid, iHeight, iPgno
+ );
+ if( n>0 ){
+ i = getVarint(&a[i], (u64*)&iPrev);
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
+ }
+ while( i<n ){
+ i64 iVal;
+ i += getVarint(&a[i], (u64*)&iVal);
+ if( iVal==0 ){
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, " x");
+ }else{
+ iPrev = iPrev - iVal;
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
+ }
+ }
+
+ }else
if( iSegid==0 ){
if( iRowid==FTS5_AVERAGES_ROWID ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, "{averages} ");
-C Add\sa\scomment\sexplaining\swhy\sfts5\scannot\scache\s"sorter\sstatements".
-D 2014-07-31T17:53:03.405
+C Add\s"doclist\sindex"\srecords\sto\sthe\sdatabase.\sThese\sare\sto\smake\snavigating\swithin\svery\slarge\sdoclists\sfaster.\sThey\sare\snot\syet\sused\sby\squeries.
+D 2014-08-01T11:16:25.207
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710
F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a
-F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997
+F ext/fts5/fts5_index.c 618d54ecf41887b6db59491b71e654ae3315f8c9
F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e
F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74
F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420
+F test/fts5ah.test bfa6ebd7ee87f73c4146b9e316a105fd0e43d01a
F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 37a417d27e4ebafd4783f62728d7467316b75b17
-R e0b14b9e45e7f8113c4d7a699a937c5a
+P e6af3b7a3cf331210f4c87848e2af007dbd5ef30
+R a017a4de54c141d4f4f840978af83e33
U dan
-Z cd0e862a57439796abd2a3aa1ce5c8f8
+Z 90f2786a7e9f28e43c6798f77c65d6dc