** End of interface to code in fts5_index.c.
**************************************************************************/
+/**************************************************************************
+** Interface to code in fts5_hash.c.
+*/
+typedef struct Fts5Hash Fts5Hash;
+
+/*
+** Create a hash table, free a hash table.
+*/
+int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize);
+void sqlite3Fts5HashFree(Fts5Hash*);
+
+int sqlite3Fts5HashWrite(
+ Fts5Hash*,
+ i64 iRowid, /* Rowid for this entry */
+ int iCol, /* Column token appears in (-ve -> delete) */
+ int iPos, /* Position of token within column */
+ const char *pToken, int nToken /* Token to add or remove to or from index */
+);
+
+/*
+** Empty (but do not delete) a hash table.
+*/
+void sqlite3Fts5HashClear(Fts5Hash*);
+
+/*
+** Iterate through the contents of the hash table.
+*/
+int sqlite3Fts5HashIterate(
+ Fts5Hash*,
+ void *pCtx,
+ int (*xTerm)(void*, const char*, int),
+ int (*xEntry)(void*, i64, const u8*, int),
+ int (*xTermDone)(void*)
+);
+
+
+
+/*
+** End of interface to code in fts5_hash.c.
+**************************************************************************/
+
/**************************************************************************
** Interface to code in fts5_storage.c. fts5_storage.c contains contains
** code to access the data stored in the %_content and %_docsize tables.
--- /dev/null
+/*
+** 2014 August 11
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+
+#include "fts5Int.h"
+
+typedef struct Fts5HashEntry Fts5HashEntry;
+
+/*
+** This file contains the implementation of an in-memory hash table used
+** to accumuluate "term -> doclist" content before it is flused to a level-0
+** segment.
+*/
+
+
+struct Fts5Hash {
+ int *pnByte; /* Pointer to bytes counter */
+ int nEntry; /* Number of entries currently in hash */
+ int nSlot; /* Size of aSlot[] array */
+ Fts5HashEntry **aSlot; /* Array of hash slots */
+};
+
+/*
+** Each entry in the hash table is represented by an object of the
+** following type. Each object, its key (zKey[]) and its current data
+** are stored in a single memory allocation. The position list data
+** immediately follows the key data in memory.
+**
+** The data that follows the key is in a similar, but not identical format
+** to the doclist data stored in the database. It is:
+**
+** * Rowid, as a varint
+** * Position list, without 0x00 terminator.
+** * Size of previous position list and rowid, as a 4 byte
+** big-endian integer.
+**
+** iRowidOff:
+** Offset of last rowid written to data area. Relative to first byte of
+** structure.
+**
+** nData:
+** Bytes of data written since iRowidOff.
+*/
+struct Fts5HashEntry {
+ Fts5HashEntry *pNext; /* Next hash entry with same hash-key */
+
+ int nAlloc; /* Total size of allocation */
+ int iRowidOff; /* Offset of last rowid written */
+ int nData; /* Total bytes of data (incl. structure) */
+
+ int iCol; /* Column of last value written */
+ int iPos; /* Position of last value written */
+ i64 iRowid; /* Rowid of last value written */
+ char zKey[0]; /* Nul-terminated entry key */
+};
+
+
+/*
+** Allocate a new hash table.
+*/
+int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
+ int rc = SQLITE_OK;
+ Fts5Hash *pNew;
+
+ *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
+ if( pNew==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ int nByte;
+ memset(pNew, 0, sizeof(Fts5Hash));
+ pNew->pnByte = pnByte;
+
+ pNew->nSlot = 1024;
+ nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
+ pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
+ if( pNew->aSlot==0 ){
+ sqlite3_free(pNew);
+ *ppNew = 0;
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pNew->aSlot, 0, nByte);
+ }
+ }
+ return rc;
+}
+
+/*
+** Free a hash table object.
+*/
+void sqlite3Fts5HashFree(Fts5Hash *pHash){
+ if( pHash ){
+ sqlite3Fts5HashClear(pHash);
+ sqlite3_free(pHash->aSlot);
+ sqlite3_free(pHash);
+ }
+}
+
+/*
+** Empty (but do not delete) a hash table.
+*/
+void sqlite3Fts5HashClear(Fts5Hash *pHash){
+ int i;
+ for(i=0; i<pHash->nSlot; i++){
+ if( pHash->aSlot[i] ){
+ sqlite3_free(pHash->aSlot[i]);
+ pHash->aSlot[i] = 0;
+ }
+ }
+}
+
+static unsigned int fts5HashKey(Fts5Hash *pHash, const char *p, int n){
+ int i;
+ unsigned int h = 13;
+ for(i=n-1; i>=0; i--){
+ h = (h << 3) ^ h ^ p[i];
+ }
+ return (h % pHash->nSlot);
+}
+
+/*
+** Store the 32-bit integer passed as the second argument in buffer p.
+*/
+static int fts5PutNativeInt(u8 *p, int i){
+ assert( sizeof(i)==4 );
+ memcpy(p, &i, sizeof(i));
+ return sizeof(i);
+}
+
+/*
+** Read and return the 32-bit integer stored in buffer p.
+*/
+static int fts5GetNativeU32(u8 *p){
+ int i;
+ assert( sizeof(i)==4 );
+ memcpy(&i, p, sizeof(i));
+ return i;
+}
+
+int sqlite3Fts5HashWrite(
+ Fts5Hash *pHash,
+ i64 iRowid, /* Rowid for this entry */
+ int iCol, /* Column token appears in (-ve -> delete) */
+ int iPos, /* Position of token within column */
+ const char *pToken, int nToken /* Token to add or remove to or from index */
+){
+ unsigned int iHash = fts5HashKey(pHash, pToken, nToken);
+ Fts5HashEntry *p;
+ u8 *pPtr;
+ int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
+
+ /* Attempt to locate an existing hash object */
+ for(p=pHash->aSlot[iHash]; p; p=p->pNext){
+ if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break;
+ }
+
+ /* If an existing hash entry cannot be found, create a new one. */
+ if( p==0 ){
+ int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64;
+ if( nByte<128 ) nByte = 128;
+
+ p = (Fts5HashEntry*)sqlite3_malloc(nByte);
+ if( !p ) return SQLITE_NOMEM;
+ memset(p, 0, sizeof(Fts5HashEntry));
+ p->nAlloc = nByte;
+ memcpy(p->zKey, pToken, nToken);
+ p->zKey[nToken] = '\0';
+ p->iRowidOff = p->nData = nToken + 1 + sizeof(Fts5HashEntry);
+ p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid);
+ p->iRowid = iRowid;
+ p->pNext = pHash->aSlot[iHash];
+ pHash->aSlot[iHash] = p;
+
+ nIncr += p->nData;
+ }
+
+ /* Check there is enough space to append a new entry. Worst case scenario
+ ** is:
+ **
+ ** + 4 bytes for the previous entry size field,
+ ** + 9 bytes for a new rowid,
+ ** + 1 byte for a "new column" byte,
+ ** + 3 bytes for a new column number (16-bit max) as a varint,
+ ** + 5 bytes for the new position offset (32-bit max).
+ */
+ if( (p->nAlloc - p->nData) < (4 + 9 + 1 + 3 + 5) ){
+ int nNew = p->nAlloc * 2;
+ Fts5HashEntry *pNew;
+ Fts5HashEntry **pp;
+ pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
+ if( pNew==0 ) return SQLITE_NOMEM;
+ pNew->nAlloc = nNew;
+ for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pNext);
+ *pp = pNew;
+ p = pNew;
+ }
+ pPtr = (u8*)p;
+ nIncr -= p->nData;
+
+ /* If this is a new rowid, append the 4-byte size field for the previous
+ ** entry, and the new rowid for this entry. */
+ if( iRowid!=p->iRowid ){
+ p->nData += fts5PutNativeInt(&pPtr[p->nData], p->nData - p->iRowidOff);
+ p->iRowidOff = p->nData;
+ p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid);
+ p->iCol = 0;
+ p->iPos = 0;
+ p->iRowid = iRowid;
+ }
+
+ if( iCol>=0 ){
+ /* Append a new column value, if necessary */
+ assert( iCol>=p->iCol );
+ if( iCol!=p->iCol ){
+ pPtr[p->nData++] = 0x01;
+ p->nData += sqlite3PutVarint(&pPtr[p->nData], iCol);
+ p->iCol = iCol;
+ p->iPos = 0;
+ }
+
+ /* Append the new position offset */
+ p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
+ p->iPos = iPos;
+ }
+ nIncr += p->nData;
+
+ *pHash->pnByte += nIncr;
+ return SQLITE_OK;
+}
+
+
+/*
+** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
+** each sorted in key order. This function merges the two lists into a
+** single list and returns a pointer to its first element.
+*/
+static Fts5HashEntry *fts5HashEntryMerge(
+ Fts5HashEntry *pLeft,
+ Fts5HashEntry *pRight
+){
+ Fts5HashEntry *p1 = pLeft;
+ Fts5HashEntry *p2 = pRight;
+ Fts5HashEntry *pRet = 0;
+ Fts5HashEntry **ppOut = &pRet;
+
+ while( p1 || p2 ){
+ if( p1==0 ){
+ *ppOut = p2;
+ p2 = 0;
+ }else if( p2==0 ){
+ *ppOut = p1;
+ p1 = 0;
+ }else{
+ int i = 0;
+ while( p1->zKey[i]==p2->zKey[i] ) i++;
+
+ if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
+ /* p2 is smaller */
+ *ppOut = p2;
+ ppOut = &p2->pNext;
+ p2 = p2->pNext;
+ }else{
+ /* p1 is smaller */
+ *ppOut = p1;
+ ppOut = &p1->pNext;
+ p1 = p1->pNext;
+ }
+ *ppOut = 0;
+ }
+ }
+
+ return pRet;
+}
+
+/*
+** Extract all tokens from hash table iHash and link them into a list
+** in sorted order. The hash table is cleared before returning. It is
+** the responsibility of the caller to free the elements of the returned
+** list.
+*/
+static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){
+ const int nMergeSlot = 32;
+ Fts5HashEntry **ap;
+ Fts5HashEntry *pList;
+ int iSlot;
+ int i;
+
+ *ppSorted = 0;
+ ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
+ if( !ap ) return SQLITE_NOMEM;
+ memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
+
+ for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
+ while( pHash->aSlot[iSlot] ){
+ Fts5HashEntry *pEntry = pHash->aSlot[iSlot];
+ pHash->aSlot[iSlot] = pEntry->pNext;
+ pEntry->pNext = 0;
+ for(i=0; ap[i]; i++){
+ pEntry = fts5HashEntryMerge(pEntry, ap[i]);
+ ap[i] = 0;
+ }
+ ap[i] = pEntry;
+ }
+ }
+
+ pList = 0;
+ for(i=0; i<nMergeSlot; i++){
+ pList = fts5HashEntryMerge(pList, ap[i]);
+ }
+
+ sqlite3_free(ap);
+ *ppSorted = pList;
+ return SQLITE_OK;
+}
+
+int sqlite3Fts5HashIterate(
+ Fts5Hash *pHash,
+ void *pCtx,
+ int (*xTerm)(void*, const char*, int),
+ int (*xEntry)(void*, i64, const u8*, int),
+ int (*xTermDone)(void*)
+){
+ Fts5HashEntry *pList;
+ int rc;
+
+ rc = fts5HashEntrySort(pHash, &pList);
+ if( rc==SQLITE_OK ){
+ while( pList ){
+ Fts5HashEntry *pNext = pList->pNext;
+ if( rc==SQLITE_OK ){
+ u8 *pPtr = (u8*)pList;
+ int nKey = strlen(pList->zKey);
+ int iOff = pList->iRowidOff;
+ int iEnd = sizeof(Fts5HashEntry) + nKey + 1;
+ int nByte = pList->nData - pList->iRowidOff;
+
+ rc = xTerm(pCtx, pList->zKey, nKey);
+ while( rc==SQLITE_OK && iOff ){
+ int nVarint;
+ i64 iRowid;
+ nVarint = getVarint(&pPtr[iOff], (u64*)&iRowid);
+ rc = xEntry(pCtx, iRowid, &pPtr[iOff+nVarint], nByte-nVarint);
+ if( iOff==iEnd ){
+ iOff = 0;
+ }else{
+ nByte = fts5GetNativeU32(&pPtr[iOff-sizeof(int)]);
+ iOff = iOff - sizeof(int) - nByte;
+ }
+ }
+ if( rc==SQLITE_OK ){
+ rc = xTermDone(pCtx);
+ }
+ }
+ sqlite3_free(pList);
+ pList = pNext;
+ }
+ }
+ return rc;
+}
+
+
+
*/
#include "fts5Int.h"
-#include "fts3_hash.h"
/*
** Overview:
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
-typedef struct Fts5PendingDoclist Fts5PendingDoclist;
-typedef struct Fts5PendingPoslist Fts5PendingPoslist;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
** Variables related to the accumulation of tokens and doclists within the
** in-memory hash tables before they are flushed to disk.
*/
- Fts3Hash *aHash; /* One hash for terms, one for each prefix */
+ Fts5Hash **apHash; /* Array of hash tables */
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
i64 iWriteRowid; /* Rowid for current doc being written */
int nRef; /* Ref count */
};
-/*
-** Before it is flushed to a level-0 segment, term data is collected in
-** the hash tables in the Fts5Index.aHash[] array. Hash table keys are
-** terms (or, for prefix indexes, term prefixes) and values are instances
-** of type Fts5PendingDoclist.
-*/
-struct Fts5PendingDoclist {
- u8 *pTerm; /* Term for this entry */
- int nTerm; /* Bytes of data at pTerm */
- Fts5PendingPoslist *pPoslist; /* Linked list of position lists */
- int iCol; /* Column for last entry in pPending */
- int iPos; /* Pos value for last entry in pPending */
- Fts5PendingDoclist *pNext; /* Used during merge sort */
-};
-struct Fts5PendingPoslist {
- i64 iRowid; /* Rowid for this doclist entry */
- Fts5Buffer buf; /* Current doclist contents */
- Fts5PendingPoslist *pNext; /* Previous poslist for same term */
-};
-
/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the
return (p->rc || pIter->chunk.pLeaf==0);
}
-
-/*
-** Allocate memory. The difference between this function and fts5IdxMalloc()
-** is that this increments the Fts5Index.nPendingData variable by the
-** number of bytes allocated. It should be used for all allocations used
-** to store pending-data within the in-memory hash tables.
-*/
-static void *fts5PendingMalloc(Fts5Index *p, int nByte){
- p->nPendingData += nByte;
- return fts5IdxMalloc(p, nByte);
-}
-
/*
** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken)
** in hash table for index iIdx. If iIdx is zero, this is the main terms
int iPos, /* Position of token within column */
const char *pToken, int nToken /* Token to add or remove to or from index */
){
- Fts5Config *pConfig = p->pConfig;
- Fts3Hash *pHash;
- Fts5PendingDoclist *pDoclist;
- Fts5PendingPoslist *pPoslist;
- i64 iRowid = p->iWriteRowid; /* Rowid associated with these tokens */
-
- /* If an error has already occured this call is a no-op. */
- if( p->rc!=SQLITE_OK ) return;
-
- /* Find the hash table to use. It has already been allocated. */
- assert( iIdx<=pConfig->nPrefix );
- assert( iIdx==0 || nToken==pConfig->aPrefix[iIdx-1] );
- pHash = &p->aHash[iIdx];
-
- /* Find the doclist to append to. Allocate a new doclist object if
- ** required. */
- pDoclist = (Fts5PendingDoclist*)fts3HashFind(pHash, pToken, nToken);
- if( pDoclist==0 ){
- Fts5PendingDoclist *pDel;
- pDoclist = fts5PendingMalloc(p, sizeof(Fts5PendingDoclist) + nToken);
- if( pDoclist==0 ) return;
- pDoclist->pTerm = (u8*)&pDoclist[1];
- pDoclist->nTerm = nToken;
- memcpy(pDoclist->pTerm, pToken, nToken);
- pDel = fts3HashInsert(pHash, pDoclist->pTerm, nToken, pDoclist);
- if( pDel ){
- assert( pDoclist==pDel );
- sqlite3_free(pDel);
- p->rc = SQLITE_NOMEM;
- return;
- }
- }
-
- /* Find the poslist to append to. Allocate a new object if required. */
- pPoslist = pDoclist->pPoslist;
- if( pPoslist==0 || pPoslist->iRowid!=iRowid ){
- pPoslist = fts5PendingMalloc(p, sizeof(Fts5PendingPoslist));
- if( pPoslist==0 ) return;
- pPoslist->pNext = pDoclist->pPoslist;
- pPoslist->iRowid = iRowid;
- pDoclist->pPoslist = pPoslist;
- pDoclist->iCol = 0;
- pDoclist->iPos = 0;
- }
-
- /* Append the values to the position list. */
- if( iCol>=0 ){
- p->nPendingData -= pPoslist->buf.nSpace;
- if( iCol!=pDoclist->iCol ){
- fts5BufferAppendVarint(&p->rc, &pPoslist->buf, 1);
- fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iCol);
- pDoclist->iCol = iCol;
- pDoclist->iPos = 0;
- }
- fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iPos + 2 - pDoclist->iPos);
- p->nPendingData += pPoslist->buf.nSpace;
- pDoclist->iPos = iPos;
- }
-}
-
-/*
-** Free the pending-doclist object passed as the only argument.
-*/
-static void fts5FreePendingDoclist(Fts5PendingDoclist *p){
- Fts5PendingPoslist *pPoslist;
- Fts5PendingPoslist *pNext;
- for(pPoslist=p->pPoslist; pPoslist; pPoslist=pNext){
- pNext = pPoslist->pNext;
- fts5BufferFree(&pPoslist->buf);
- sqlite3_free(pPoslist);
+ if( p->rc==SQLITE_OK ){
+ p->rc = sqlite3Fts5HashWrite(
+ p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken
+ );
}
- sqlite3_free(p);
}
/*
if( p->rc!=SQLITE_OK ) return;
/* Allocate hash tables if they have not already been allocated */
- if( p->aHash==0 ){
+ if( p->apHash==0 ){
int nHash = pConfig->nPrefix + 1;
- p->aHash = (Fts3Hash*)sqlite3_malloc(sizeof(Fts3Hash) * nHash);
- if( p->aHash==0 ){
- p->rc = SQLITE_NOMEM;
- }else{
- for(i=0; i<nHash; i++){
- fts3HashInit(&p->aHash[i], FTS3_HASH_STRING, 0);
- }
+ p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash);
+ for(i=0; p->rc==SQLITE_OK && i<nHash; i++){
+ p->rc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData);
}
}
return 0;
}
-static Fts5PendingDoclist *fts5PendingMerge(
- Fts5Index *p,
- Fts5PendingDoclist *pLeft,
- Fts5PendingDoclist *pRight
-){
- Fts5PendingDoclist *p1 = pLeft;
- Fts5PendingDoclist *p2 = pRight;
- Fts5PendingDoclist *pRet = 0;
- Fts5PendingDoclist **ppOut = &pRet;
-
- while( p1 || p2 ){
- if( p1==0 ){
- *ppOut = p2;
- p2 = 0;
- }else if( p2==0 ){
- *ppOut = p1;
- p1 = 0;
- }else{
- int nCmp = MIN(p1->nTerm, p2->nTerm);
- int res = memcmp(p1->pTerm, p2->pTerm, nCmp);
- if( res==0 ) res = p1->nTerm - p2->nTerm;
-
- if( res>0 ){
- /* p2 is smaller */
- *ppOut = p2;
- ppOut = &p2->pNext;
- p2 = p2->pNext;
- }else{
- /* p1 is smaller */
- *ppOut = p1;
- ppOut = &p1->pNext;
- p1 = p1->pNext;
- }
- *ppOut = 0;
- }
- }
-
- return pRet;
-}
-
-/*
-** Extract all tokens from hash table iHash and link them into a list
-** in sorted order. The hash table is cleared before returning. It is
-** the responsibility of the caller to free the elements of the returned
-** list.
-**
-** If an error occurs, set the Fts5Index.rc error code. If an error has
-** already occurred, this function is a no-op.
-*/
-static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){
- const int nMergeSlot = 32;
- Fts3Hash *pHash;
- Fts3HashElem *pE; /* Iterator variable */
- Fts5PendingDoclist **ap;
- Fts5PendingDoclist *pList;
- int i;
-
- ap = fts5IdxMalloc(p, sizeof(Fts5PendingDoclist*) * nMergeSlot);
- if( !ap ) return 0;
-
- pHash = &p->aHash[iHash];
- for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
- int i;
- Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE);
- assert( pDoclist->pNext==0 );
- for(i=0; ap[i]; i++){
- pDoclist = fts5PendingMerge(p, pDoclist, ap[i]);
- ap[i] = 0;
- }
- ap[i] = pDoclist;
- }
-
- pList = 0;
- for(i=0; i<nMergeSlot; i++){
- pList = fts5PendingMerge(p, pList, ap[i]);
- }
-
- sqlite3_free(ap);
- fts3HashClear(pHash);
- return pList;
-}
-
-
/*
** Discard all data currently cached in the hash-tables.
*/
Fts5Config *pConfig = p->pConfig;
int i;
for(i=0; i<=pConfig->nPrefix; i++){
- Fts3Hash *pHash = &p->aHash[i];
- Fts3HashElem *pE; /* Iterator variable */
- for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
- Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE);
- fts5FreePendingDoclist(pDoclist);
- }
- fts3HashClear(pHash);
+ sqlite3Fts5HashClear(p->apHash[i]);
}
p->nPendingData = 0;
}
fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0);
}
-/*
-** Write the contents of pending-doclist object pDoclist to writer pWriter.
-**
-** If an error occurs, set the Fts5Index.rc error code. If an error has
-** already occurred, this function is a no-op.
-*/
-static void fts5WritePendingDoclist(
- Fts5Index *p, /* FTS5 backend object */
- Fts5SegWriter *pWriter, /* Write to this writer object */
- Fts5PendingDoclist *pDoclist /* Doclist to write to pWriter */
-){
- Fts5PendingPoslist *pPoslist; /* Used to iterate through the doclist */
-
- /* Append the term */
- fts5WriteAppendTerm(p, pWriter, pDoclist->nTerm, pDoclist->pTerm);
-
- /* Append the position list for each rowid */
- for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){
- int i = 0;
-
- /* Append the rowid itself */
- fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid);
-
- /* Append the size of the position list in bytes */
- fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n);
-
- /* Copy the position list to the output segment */
- while( i<pPoslist->buf.n){
- int iVal;
- i += getVarint32(&pPoslist->buf.p[i], iVal);
- fts5WriteAppendPoslistInt(p, pWriter, iVal);
- }
- }
-
- /* Write the doclist terminator */
- fts5WriteAppendZerobyte(p, pWriter);
-}
-
/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
}
}
+typedef struct Fts5FlushCtx Fts5FlushCtx;
+struct Fts5FlushCtx {
+ Fts5Index *pIdx;
+ Fts5SegWriter writer;
+};
+
+static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){
+ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
+ int rc = SQLITE_OK;
+ fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm);
+ return rc;
+}
+
+static int fts5FlushTermDone(void *pCtx){
+ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
+ int rc = SQLITE_OK;
+ /* Write the doclist terminator */
+ fts5WriteAppendZerobyte(p->pIdx, &p->writer);
+ return rc;
+}
+
+static int fts5FlushNewEntry(
+ void *pCtx,
+ i64 iRowid,
+ const u8 *aPoslist,
+ int nPoslist
+){
+ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
+ int rc = SQLITE_OK;
+ int i = 0;
+
+ /* Append the rowid itself */
+ fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid);
+
+ /* Append the size of the position list in bytes */
+ fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist);
+
+ /* Copy the position list to the output segment */
+ while( i<nPoslist ){
+ int iVal;
+ i += getVarint32(&aPoslist[i], iVal);
+ fts5WriteAppendPoslistInt(p->pIdx, &p->writer, iVal);
+ }
+
+ return rc;
+}
+
/*
** Flush the contents of in-memory hash table iHash to a new level-0
** segment on disk. Also update the corresponding structure record.
iSegid = fts5AllocateSegid(p, pStruct);
if( iSegid ){
- Fts5SegWriter writer;
- Fts5PendingDoclist *pList;
- Fts5PendingDoclist *pIter;
- Fts5PendingDoclist *pNext;
-
Fts5StructureSegment *pSeg; /* New segment within pStruct */
int nHeight; /* Height of new segment b-tree */
+ int rc;
+ Fts5FlushCtx ctx;
- pList = fts5PendingList(p, iHash);
- assert( pList!=0 || p->rc!=SQLITE_OK );
- fts5WriteInit(p, &writer, iHash, iSegid);
+ fts5WriteInit(p, &ctx.writer, iHash, iSegid);
+ ctx.pIdx = p;
- for(pIter=pList; pIter; pIter=pNext){
- pNext = pIter->pNext;
- fts5WritePendingDoclist(p, &writer, pIter);
- fts5FreePendingDoclist(pIter);
- }
- fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
+ rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx,
+ fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone
+ );
+ if( p->rc==SQLITE_OK ) p->rc = rc;
+ fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);
/* Edit the Fts5Structure and write it back to the database. */
if( pStruct->nLevel==0 ){
/* If an error has already occured this call is a no-op. */
if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return;
- assert( p->aHash );
+ assert( p->apHash );
/* Flush the terms and each prefix index to disk */
for(i=0; i<=pConfig->nPrefix; i++){
assert( p->pReader==0 );
sqlite3_finalize(p->pWriter);
sqlite3_finalize(p->pDeleter);
- sqlite3_free(p->aHash);
+ if( p->apHash ){
+ int i;
+ for(i=0; i<=p->pConfig->nPrefix; i++){
+ sqlite3Fts5HashFree(p->apHash[i]);
+ }
+ sqlite3_free(p->apHash);
+ }
sqlite3_free(p->zDataTbl);
sqlite3_free(p);
return rc;
if( aBuf && pStruct ){
Fts5DoclistIter *pDoclist;
int i;
- i64 iLastRowid;
+ i64 iLastRowid = 0;
Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */
Fts5Buffer doclist;
LIBOBJ += fts5_buffer.o
LIBOBJ += fts5_config.o
LIBOBJ += fts5_expr.o
+LIBOBJ += fts5_hash.o
LIBOBJ += fts5_index.o
LIBOBJ += fts5_storage.o
LIBOBJ += fts5parse.o
$(TOP)/ext/fts5/fts5.c \
$(TOP)/ext/fts5/fts5_config.c \
$(TOP)/ext/fts5/fts5_expr.c \
+ $(TOP)/ext/fts5/fts5_hash.c \
$(TOP)/ext/fts5/fts5_index.c \
fts5parse.c \
$(TOP)/ext/fts5/fts5_storage.c
fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c
+fts5_hash.o: $(TOP)/ext/fts5/fts5_hash.c $(HDR) $(EXTHDR)
+ $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_hash.c
+
fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c
-C Fix\san\suninitialized\svariable\scausing\sa\sproblem\sduring\sfts5\stable\sinitialization.
-D 2014-08-09T18:22:59.679
+C Replace\sthe\shash\stable\sborrowed\sfrom\sfts3.
+D 2014-08-11T19:44:52.686
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368
F ext/fts5/fts5.c 15e585ed0194f94a1da360808f29184f9d44554c
F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a
-F ext/fts5/fts5Int.h 410001da21bcc3d09b4290d4858352d0985ac7a6
+F ext/fts5/fts5Int.h f17a25546d598fdc5cc47f576d38063fd9290963
F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710
F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24
-F ext/fts5/fts5_index.c 75b2ebfa97ad6054bba98cb923cd2d3c6cc5b112
+F ext/fts5/fts5_hash.c 2af412d00f65ad427f18acbe421c113413cdef06
+F ext/fts5/fts5_index.c ccef8703b6228a39090b0a03b83f163e69627ff2
F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
-F main.mk 8118631727a27fa88eb38a07ac3b86ecb86e9eb0
+F main.mk c4fff232b880b91bf665cd2951465de61178e444
F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce
F tool/lemon.c 3ff0fec22f92dfb54e62eeb48772eddffdbeb0d6
F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc
-F tool/loadfts.c 3bdd46090112c84df44a4fbae740af3836108b3f
+F tool/loadfts.c b5b3206ddd58d89ec8d54038c784bcadd6195915
F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6
F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383
F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 2821825f7a481755a333dcdcad780b3e24448f20
-R c0a232bfa9626e6e9a9c306fc05ca763
+P a14fa876f0eb66028e302b908967cc4a05ede9fc
+R b81a5fabd4e838059b5d12635ffcd939
U dan
-Z 9113dc9c4d427c4fad9a129f5cfa7a9b
+Z 39c621bf94a400035f58731d1ee0f6cd
-a14fa876f0eb66028e302b908967cc4a05ede9fc
\ No newline at end of file
+617e2fac1c128212254f71b1a8fddaf0d1d90262
\ No newline at end of file
" -fts [345] FTS version to use (default=5)\n"
" -idx [01] Create a mapping from filename to rowid (default=0)\n"
" -dir <path> Root of directory tree to load data from (default=.)\n"
+" -trans <integer> Number of inserts per transaction (default=1)\n"
, zArgv0
);
exit(1);
*/
typedef struct VisitContext VisitContext;
struct VisitContext {
+ int nRowPerTrans;
sqlite3 *db; /* Database handle */
sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */
};
sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
sqlite3_step(p->pInsert);
rc = sqlite3_reset(p->pInsert);
- if( rc!=SQLITE_OK ) sqlite_error_out("insert", p->db);
+ if( rc!=SQLITE_OK ){
+ sqlite_error_out("insert", p->db);
+ }else if( p->nRowPerTrans>0
+ && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0
+ ){
+ sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0);
+ }
}
/*
const char *zDir = "."; /* Directory to scan */
int i;
int rc;
+ int nRowPerTrans = 0;
sqlite3 *db;
char *zSql;
VisitContext sCtx;
iFts = atoi(zArg);
if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
}
+ if( strcmp(zOpt, "-trans")==0 ){
+ nRowPerTrans = atoi(zArg);
+ }
else if( strcmp(zOpt, "-idx")==0 ){
bMap = atoi(zArg);
if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
/* Compile the INSERT statement to write data to the FTS table. */
memset(&sCtx, 0, sizeof(VisitContext));
sCtx.db = db;
+ sCtx.nRowPerTrans = nRowPerTrans;
rc = sqlite3_prepare_v2(db,
"INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
);
if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);
/* Load all files in the directory hierarchy into the FTS table. */
+ if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
traverse(zDir, (void*)&sCtx, visit_file);
+ if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);
/* Clean up and exit. */
sqlite3_finalize(sCtx.pInsert);