From: dan Date: Mon, 11 Aug 2014 19:44:52 +0000 (+0000) Subject: Replace the hash table borrowed from fts3. X-Git-Tag: version-3.8.11~114^2~137 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c7fe7a969b383bf3bdb280bbc6b31e1a21a401d9;p=thirdparty%2Fsqlite.git Replace the hash table borrowed from fts3. FossilOrigin-Name: 617e2fac1c128212254f71b1a8fddaf0d1d90262 --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 4ae110fd19..4ef8454e1f 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -283,6 +283,47 @@ int sqlite3Fts5IndexReads(Fts5Index *p); ** End of interface to code in fts5_index.c. **************************************************************************/ +/************************************************************************** +** Interface to code in fts5_hash.c. +*/ +typedef struct Fts5Hash Fts5Hash; + +/* +** Create a hash table, free a hash table. +*/ +int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize); +void sqlite3Fts5HashFree(Fts5Hash*); + +int sqlite3Fts5HashWrite( + Fts5Hash*, + i64 iRowid, /* Rowid for this entry */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +); + +/* +** Empty (but do not delete) a hash table. +*/ +void sqlite3Fts5HashClear(Fts5Hash*); + +/* +** Iterate through the contents of the hash table. +*/ +int sqlite3Fts5HashIterate( + Fts5Hash*, + void *pCtx, + int (*xTerm)(void*, const char*, int), + int (*xEntry)(void*, i64, const u8*, int), + int (*xTermDone)(void*) +); + + + +/* +** End of interface to code in fts5_hash.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables. diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c new file mode 100644 index 0000000000..3b50f3ac78 --- /dev/null +++ b/ext/fts5/fts5_hash.c @@ -0,0 +1,371 @@ +/* +** 2014 August 11 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#include "fts5Int.h" + +typedef struct Fts5HashEntry Fts5HashEntry; + +/* +** This file contains the implementation of an in-memory hash table used +** to accumuluate "term -> doclist" content before it is flused to a level-0 +** segment. +*/ + + +struct Fts5Hash { + int *pnByte; /* Pointer to bytes counter */ + int nEntry; /* Number of entries currently in hash */ + int nSlot; /* Size of aSlot[] array */ + Fts5HashEntry **aSlot; /* Array of hash slots */ +}; + +/* +** Each entry in the hash table is represented by an object of the +** following type. Each object, its key (zKey[]) and its current data +** are stored in a single memory allocation. The position list data +** immediately follows the key data in memory. +** +** The data that follows the key is in a similar, but not identical format +** to the doclist data stored in the database. It is: +** +** * Rowid, as a varint +** * Position list, without 0x00 terminator. +** * Size of previous position list and rowid, as a 4 byte +** big-endian integer. +** +** iRowidOff: +** Offset of last rowid written to data area. Relative to first byte of +** structure. +** +** nData: +** Bytes of data written since iRowidOff. +*/ +struct Fts5HashEntry { + Fts5HashEntry *pNext; /* Next hash entry with same hash-key */ + + int nAlloc; /* Total size of allocation */ + int iRowidOff; /* Offset of last rowid written */ + int nData; /* Total bytes of data (incl. structure) */ + + int iCol; /* Column of last value written */ + int iPos; /* Position of last value written */ + i64 iRowid; /* Rowid of last value written */ + char zKey[0]; /* Nul-terminated entry key */ +}; + + +/* +** Allocate a new hash table. +*/ +int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){ + int rc = SQLITE_OK; + Fts5Hash *pNew; + + *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + int nByte; + memset(pNew, 0, sizeof(Fts5Hash)); + pNew->pnByte = pnByte; + + pNew->nSlot = 1024; + nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; + pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); + if( pNew->aSlot==0 ){ + sqlite3_free(pNew); + *ppNew = 0; + rc = SQLITE_NOMEM; + }else{ + memset(pNew->aSlot, 0, nByte); + } + } + return rc; +} + +/* +** Free a hash table object. +*/ +void sqlite3Fts5HashFree(Fts5Hash *pHash){ + if( pHash ){ + sqlite3Fts5HashClear(pHash); + sqlite3_free(pHash->aSlot); + sqlite3_free(pHash); + } +} + +/* +** Empty (but do not delete) a hash table. +*/ +void sqlite3Fts5HashClear(Fts5Hash *pHash){ + int i; + for(i=0; inSlot; i++){ + if( pHash->aSlot[i] ){ + sqlite3_free(pHash->aSlot[i]); + pHash->aSlot[i] = 0; + } + } +} + +static unsigned int fts5HashKey(Fts5Hash *pHash, const char *p, int n){ + int i; + unsigned int h = 13; + for(i=n-1; i>=0; i--){ + h = (h << 3) ^ h ^ p[i]; + } + return (h % pHash->nSlot); +} + +/* +** Store the 32-bit integer passed as the second argument in buffer p. +*/ +static int fts5PutNativeInt(u8 *p, int i){ + assert( sizeof(i)==4 ); + memcpy(p, &i, sizeof(i)); + return sizeof(i); +} + +/* +** Read and return the 32-bit integer stored in buffer p. +*/ +static int fts5GetNativeU32(u8 *p){ + int i; + assert( sizeof(i)==4 ); + memcpy(&i, p, sizeof(i)); + return i; +} + +int sqlite3Fts5HashWrite( + Fts5Hash *pHash, + i64 iRowid, /* Rowid for this entry */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + unsigned int iHash = fts5HashKey(pHash, pToken, nToken); + Fts5HashEntry *p; + u8 *pPtr; + int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ + + /* Attempt to locate an existing hash object */ + for(p=pHash->aSlot[iHash]; p; p=p->pNext){ + if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break; + } + + /* If an existing hash entry cannot be found, create a new one. */ + if( p==0 ){ + int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64; + if( nByte<128 ) nByte = 128; + + p = (Fts5HashEntry*)sqlite3_malloc(nByte); + if( !p ) return SQLITE_NOMEM; + memset(p, 0, sizeof(Fts5HashEntry)); + p->nAlloc = nByte; + memcpy(p->zKey, pToken, nToken); + p->zKey[nToken] = '\0'; + p->iRowidOff = p->nData = nToken + 1 + sizeof(Fts5HashEntry); + p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); + p->iRowid = iRowid; + p->pNext = pHash->aSlot[iHash]; + pHash->aSlot[iHash] = p; + + nIncr += p->nData; + } + + /* Check there is enough space to append a new entry. Worst case scenario + ** is: + ** + ** + 4 bytes for the previous entry size field, + ** + 9 bytes for a new rowid, + ** + 1 byte for a "new column" byte, + ** + 3 bytes for a new column number (16-bit max) as a varint, + ** + 5 bytes for the new position offset (32-bit max). + */ + if( (p->nAlloc - p->nData) < (4 + 9 + 1 + 3 + 5) ){ + int nNew = p->nAlloc * 2; + Fts5HashEntry *pNew; + Fts5HashEntry **pp; + pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); + if( pNew==0 ) return SQLITE_NOMEM; + pNew->nAlloc = nNew; + for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pNext); + *pp = pNew; + p = pNew; + } + pPtr = (u8*)p; + nIncr -= p->nData; + + /* If this is a new rowid, append the 4-byte size field for the previous + ** entry, and the new rowid for this entry. */ + if( iRowid!=p->iRowid ){ + p->nData += fts5PutNativeInt(&pPtr[p->nData], p->nData - p->iRowidOff); + p->iRowidOff = p->nData; + p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid); + p->iCol = 0; + p->iPos = 0; + p->iRowid = iRowid; + } + + if( iCol>=0 ){ + /* Append a new column value, if necessary */ + assert( iCol>=p->iCol ); + if( iCol!=p->iCol ){ + pPtr[p->nData++] = 0x01; + p->nData += sqlite3PutVarint(&pPtr[p->nData], iCol); + p->iCol = iCol; + p->iPos = 0; + } + + /* Append the new position offset */ + p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); + p->iPos = iPos; + } + nIncr += p->nData; + + *pHash->pnByte += nIncr; + return SQLITE_OK; +} + + +/* +** Arguments pLeft and pRight point to linked-lists of hash-entry objects, +** each sorted in key order. This function merges the two lists into a +** single list and returns a pointer to its first element. +*/ +static Fts5HashEntry *fts5HashEntryMerge( + Fts5HashEntry *pLeft, + Fts5HashEntry *pRight +){ + Fts5HashEntry *p1 = pLeft; + Fts5HashEntry *p2 = pRight; + Fts5HashEntry *pRet = 0; + Fts5HashEntry **ppOut = &pRet; + + while( p1 || p2 ){ + if( p1==0 ){ + *ppOut = p2; + p2 = 0; + }else if( p2==0 ){ + *ppOut = p1; + p1 = 0; + }else{ + int i = 0; + while( p1->zKey[i]==p2->zKey[i] ) i++; + + if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){ + /* p2 is smaller */ + *ppOut = p2; + ppOut = &p2->pNext; + p2 = p2->pNext; + }else{ + /* p1 is smaller */ + *ppOut = p1; + ppOut = &p1->pNext; + p1 = p1->pNext; + } + *ppOut = 0; + } + } + + return pRet; +} + +/* +** Extract all tokens from hash table iHash and link them into a list +** in sorted order. The hash table is cleared before returning. It is +** the responsibility of the caller to free the elements of the returned +** list. +*/ +static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){ + const int nMergeSlot = 32; + Fts5HashEntry **ap; + Fts5HashEntry *pList; + int iSlot; + int i; + + *ppSorted = 0; + ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot); + if( !ap ) return SQLITE_NOMEM; + memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); + + for(iSlot=0; iSlotnSlot; iSlot++){ + while( pHash->aSlot[iSlot] ){ + Fts5HashEntry *pEntry = pHash->aSlot[iSlot]; + pHash->aSlot[iSlot] = pEntry->pNext; + pEntry->pNext = 0; + for(i=0; ap[i]; i++){ + pEntry = fts5HashEntryMerge(pEntry, ap[i]); + ap[i] = 0; + } + ap[i] = pEntry; + } + } + + pList = 0; + for(i=0; ipNext; + if( rc==SQLITE_OK ){ + u8 *pPtr = (u8*)pList; + int nKey = strlen(pList->zKey); + int iOff = pList->iRowidOff; + int iEnd = sizeof(Fts5HashEntry) + nKey + 1; + int nByte = pList->nData - pList->iRowidOff; + + rc = xTerm(pCtx, pList->zKey, nKey); + while( rc==SQLITE_OK && iOff ){ + int nVarint; + i64 iRowid; + nVarint = getVarint(&pPtr[iOff], (u64*)&iRowid); + rc = xEntry(pCtx, iRowid, &pPtr[iOff+nVarint], nByte-nVarint); + if( iOff==iEnd ){ + iOff = 0; + }else{ + nByte = fts5GetNativeU32(&pPtr[iOff-sizeof(int)]); + iOff = iOff - sizeof(int) - nByte; + } + } + if( rc==SQLITE_OK ){ + rc = xTermDone(pCtx); + } + } + sqlite3_free(pList); + pList = pNext; + } + } + return rc; +} + + + diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 737c226051..e1f6c70f60 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -17,7 +17,6 @@ */ #include "fts5Int.h" -#include "fts3_hash.h" /* ** Overview: @@ -276,8 +275,6 @@ typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; -typedef struct Fts5PendingDoclist Fts5PendingDoclist; -typedef struct Fts5PendingPoslist Fts5PendingPoslist; typedef struct Fts5PosIter Fts5PosIter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; @@ -300,7 +297,7 @@ struct Fts5Index { ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. */ - Fts3Hash *aHash; /* One hash for terms, one for each prefix */ + Fts5Hash **apHash; /* Array of hash tables */ int nMaxPendingData; /* Max pending data before flush to disk */ int nPendingData; /* Current bytes of pending data */ i64 iWriteRowid; /* Rowid for current doc being written */ @@ -347,26 +344,6 @@ struct Fts5Data { int nRef; /* Ref count */ }; -/* -** Before it is flushed to a level-0 segment, term data is collected in -** the hash tables in the Fts5Index.aHash[] array. Hash table keys are -** terms (or, for prefix indexes, term prefixes) and values are instances -** of type Fts5PendingDoclist. -*/ -struct Fts5PendingDoclist { - u8 *pTerm; /* Term for this entry */ - int nTerm; /* Bytes of data at pTerm */ - Fts5PendingPoslist *pPoslist; /* Linked list of position lists */ - int iCol; /* Column for last entry in pPending */ - int iPos; /* Pos value for last entry in pPending */ - Fts5PendingDoclist *pNext; /* Used during merge sort */ -}; -struct Fts5PendingPoslist { - i64 iRowid; /* Rowid for this doclist entry */ - Fts5Buffer buf; /* Current doclist contents */ - Fts5PendingPoslist *pNext; /* Previous poslist for same term */ -}; - /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the @@ -2458,18 +2435,6 @@ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ return (p->rc || pIter->chunk.pLeaf==0); } - -/* -** Allocate memory. The difference between this function and fts5IdxMalloc() -** is that this increments the Fts5Index.nPendingData variable by the -** number of bytes allocated. It should be used for all allocations used -** to store pending-data within the in-memory hash tables. -*/ -static void *fts5PendingMalloc(Fts5Index *p, int nByte){ - p->nPendingData += nByte; - return fts5IdxMalloc(p, nByte); -} - /* ** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken) ** in hash table for index iIdx. If iIdx is zero, this is the main terms @@ -2485,78 +2450,11 @@ static void fts5AddTermToHash( int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ - Fts5Config *pConfig = p->pConfig; - Fts3Hash *pHash; - Fts5PendingDoclist *pDoclist; - Fts5PendingPoslist *pPoslist; - i64 iRowid = p->iWriteRowid; /* Rowid associated with these tokens */ - - /* If an error has already occured this call is a no-op. */ - if( p->rc!=SQLITE_OK ) return; - - /* Find the hash table to use. It has already been allocated. */ - assert( iIdx<=pConfig->nPrefix ); - assert( iIdx==0 || nToken==pConfig->aPrefix[iIdx-1] ); - pHash = &p->aHash[iIdx]; - - /* Find the doclist to append to. Allocate a new doclist object if - ** required. */ - pDoclist = (Fts5PendingDoclist*)fts3HashFind(pHash, pToken, nToken); - if( pDoclist==0 ){ - Fts5PendingDoclist *pDel; - pDoclist = fts5PendingMalloc(p, sizeof(Fts5PendingDoclist) + nToken); - if( pDoclist==0 ) return; - pDoclist->pTerm = (u8*)&pDoclist[1]; - pDoclist->nTerm = nToken; - memcpy(pDoclist->pTerm, pToken, nToken); - pDel = fts3HashInsert(pHash, pDoclist->pTerm, nToken, pDoclist); - if( pDel ){ - assert( pDoclist==pDel ); - sqlite3_free(pDel); - p->rc = SQLITE_NOMEM; - return; - } - } - - /* Find the poslist to append to. Allocate a new object if required. */ - pPoslist = pDoclist->pPoslist; - if( pPoslist==0 || pPoslist->iRowid!=iRowid ){ - pPoslist = fts5PendingMalloc(p, sizeof(Fts5PendingPoslist)); - if( pPoslist==0 ) return; - pPoslist->pNext = pDoclist->pPoslist; - pPoslist->iRowid = iRowid; - pDoclist->pPoslist = pPoslist; - pDoclist->iCol = 0; - pDoclist->iPos = 0; - } - - /* Append the values to the position list. */ - if( iCol>=0 ){ - p->nPendingData -= pPoslist->buf.nSpace; - if( iCol!=pDoclist->iCol ){ - fts5BufferAppendVarint(&p->rc, &pPoslist->buf, 1); - fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iCol); - pDoclist->iCol = iCol; - pDoclist->iPos = 0; - } - fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iPos + 2 - pDoclist->iPos); - p->nPendingData += pPoslist->buf.nSpace; - pDoclist->iPos = iPos; - } -} - -/* -** Free the pending-doclist object passed as the only argument. -*/ -static void fts5FreePendingDoclist(Fts5PendingDoclist *p){ - Fts5PendingPoslist *pPoslist; - Fts5PendingPoslist *pNext; - for(pPoslist=p->pPoslist; pPoslist; pPoslist=pNext){ - pNext = pPoslist->pNext; - fts5BufferFree(&pPoslist->buf); - sqlite3_free(pPoslist); + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3Fts5HashWrite( + p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken + ); } - sqlite3_free(p); } /* @@ -2582,15 +2480,11 @@ void sqlite3Fts5IndexWrite( if( p->rc!=SQLITE_OK ) return; /* Allocate hash tables if they have not already been allocated */ - if( p->aHash==0 ){ + if( p->apHash==0 ){ int nHash = pConfig->nPrefix + 1; - p->aHash = (Fts3Hash*)sqlite3_malloc(sizeof(Fts3Hash) * nHash); - if( p->aHash==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - for(i=0; iaHash[i], FTS3_HASH_STRING, 0); - } + p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash); + for(i=0; p->rc==SQLITE_OK && irc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData); } } @@ -2635,89 +2529,6 @@ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ return 0; } -static Fts5PendingDoclist *fts5PendingMerge( - Fts5Index *p, - Fts5PendingDoclist *pLeft, - Fts5PendingDoclist *pRight -){ - Fts5PendingDoclist *p1 = pLeft; - Fts5PendingDoclist *p2 = pRight; - Fts5PendingDoclist *pRet = 0; - Fts5PendingDoclist **ppOut = &pRet; - - while( p1 || p2 ){ - if( p1==0 ){ - *ppOut = p2; - p2 = 0; - }else if( p2==0 ){ - *ppOut = p1; - p1 = 0; - }else{ - int nCmp = MIN(p1->nTerm, p2->nTerm); - int res = memcmp(p1->pTerm, p2->pTerm, nCmp); - if( res==0 ) res = p1->nTerm - p2->nTerm; - - if( res>0 ){ - /* p2 is smaller */ - *ppOut = p2; - ppOut = &p2->pNext; - p2 = p2->pNext; - }else{ - /* p1 is smaller */ - *ppOut = p1; - ppOut = &p1->pNext; - p1 = p1->pNext; - } - *ppOut = 0; - } - } - - return pRet; -} - -/* -** Extract all tokens from hash table iHash and link them into a list -** in sorted order. The hash table is cleared before returning. It is -** the responsibility of the caller to free the elements of the returned -** list. -** -** If an error occurs, set the Fts5Index.rc error code. If an error has -** already occurred, this function is a no-op. -*/ -static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){ - const int nMergeSlot = 32; - Fts3Hash *pHash; - Fts3HashElem *pE; /* Iterator variable */ - Fts5PendingDoclist **ap; - Fts5PendingDoclist *pList; - int i; - - ap = fts5IdxMalloc(p, sizeof(Fts5PendingDoclist*) * nMergeSlot); - if( !ap ) return 0; - - pHash = &p->aHash[iHash]; - for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ - int i; - Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE); - assert( pDoclist->pNext==0 ); - for(i=0; ap[i]; i++){ - pDoclist = fts5PendingMerge(p, pDoclist, ap[i]); - ap[i] = 0; - } - ap[i] = pDoclist; - } - - pList = 0; - for(i=0; ipConfig; int i; for(i=0; i<=pConfig->nPrefix; i++){ - Fts3Hash *pHash = &p->aHash[i]; - Fts3HashElem *pE; /* Iterator variable */ - for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ - Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE); - fts5FreePendingDoclist(pDoclist); - } - fts3HashClear(pHash); + sqlite3Fts5HashClear(p->apHash[i]); } p->nPendingData = 0; } @@ -3012,44 +2817,6 @@ static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); } -/* -** Write the contents of pending-doclist object pDoclist to writer pWriter. -** -** If an error occurs, set the Fts5Index.rc error code. If an error has -** already occurred, this function is a no-op. -*/ -static void fts5WritePendingDoclist( - Fts5Index *p, /* FTS5 backend object */ - Fts5SegWriter *pWriter, /* Write to this writer object */ - Fts5PendingDoclist *pDoclist /* Doclist to write to pWriter */ -){ - Fts5PendingPoslist *pPoslist; /* Used to iterate through the doclist */ - - /* Append the term */ - fts5WriteAppendTerm(p, pWriter, pDoclist->nTerm, pDoclist->pTerm); - - /* Append the position list for each rowid */ - for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){ - int i = 0; - - /* Append the rowid itself */ - fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid); - - /* Append the size of the position list in bytes */ - fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n); - - /* Copy the position list to the output segment */ - while( ibuf.n){ - int iVal; - i += getVarint32(&pPoslist->buf.p[i], iVal); - fts5WriteAppendPoslistInt(p, pWriter, iVal); - } - } - - /* Write the doclist terminator */ - fts5WriteAppendZerobyte(p, pWriter); -} - /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. @@ -3386,6 +3153,53 @@ static void fts5IndexWork( } } +typedef struct Fts5FlushCtx Fts5FlushCtx; +struct Fts5FlushCtx { + Fts5Index *pIdx; + Fts5SegWriter writer; +}; + +static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){ + Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; + int rc = SQLITE_OK; + fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm); + return rc; +} + +static int fts5FlushTermDone(void *pCtx){ + Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; + int rc = SQLITE_OK; + /* Write the doclist terminator */ + fts5WriteAppendZerobyte(p->pIdx, &p->writer); + return rc; +} + +static int fts5FlushNewEntry( + void *pCtx, + i64 iRowid, + const u8 *aPoslist, + int nPoslist +){ + Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; + int rc = SQLITE_OK; + int i = 0; + + /* Append the rowid itself */ + fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid); + + /* Append the size of the position list in bytes */ + fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist); + + /* Copy the position list to the output segment */ + while( ipIdx, &p->writer, iVal); + } + + return rc; +} + /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. @@ -3404,24 +3218,19 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ - Fts5SegWriter writer; - Fts5PendingDoclist *pList; - Fts5PendingDoclist *pIter; - Fts5PendingDoclist *pNext; - Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ + int rc; + Fts5FlushCtx ctx; - pList = fts5PendingList(p, iHash); - assert( pList!=0 || p->rc!=SQLITE_OK ); - fts5WriteInit(p, &writer, iHash, iSegid); + fts5WriteInit(p, &ctx.writer, iHash, iSegid); + ctx.pIdx = p; - for(pIter=pList; pIter; pIter=pNext){ - pNext = pIter->pNext; - fts5WritePendingDoclist(p, &writer, pIter); - fts5FreePendingDoclist(pIter); - } - fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); + rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, + fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone + ); + if( p->rc==SQLITE_OK ) p->rc = rc; + fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); /* Edit the Fts5Structure and write it back to the database. */ if( pStruct->nLevel==0 ){ @@ -3452,7 +3261,7 @@ static void fts5IndexFlush(Fts5Index *p){ /* If an error has already occured this call is a no-op. */ if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; - assert( p->aHash ); + assert( p->apHash ); /* Flush the terms and each prefix index to disk */ for(i=0; i<=pConfig->nPrefix; i++){ @@ -3555,7 +3364,13 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); - sqlite3_free(p->aHash); + if( p->apHash ){ + int i; + for(i=0; i<=p->pConfig->nPrefix; i++){ + sqlite3Fts5HashFree(p->apHash[i]); + } + sqlite3_free(p->apHash); + } sqlite3_free(p->zDataTbl); sqlite3_free(p); return rc; @@ -4315,7 +4130,7 @@ static void fts5SetupPrefixIter( if( aBuf && pStruct ){ Fts5DoclistIter *pDoclist; int i; - i64 iLastRowid; + i64 iLastRowid = 0; Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ Fts5Buffer doclist; diff --git a/main.mk b/main.mk index 1a75c39827..5ac76de2d8 100644 --- a/main.mk +++ b/main.mk @@ -77,6 +77,7 @@ LIBOBJ += fts5_aux.o LIBOBJ += fts5_buffer.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o +LIBOBJ += fts5_hash.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5parse.o @@ -232,6 +233,7 @@ SRC += \ $(TOP)/ext/fts5/fts5.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ + $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c @@ -599,6 +601,9 @@ fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c +fts5_hash.o: $(TOP)/ext/fts5/fts5_hash.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_hash.c + fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c diff --git a/manifest b/manifest index e3d5197922..c888cfba5a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\suninitialized\svariable\scausing\sa\sproblem\sduring\sfts5\stable\sinitialization. -D 2014-08-09T18:22:59.679 +C Replace\sthe\shash\stable\sborrowed\sfrom\sfts3. +D 2014-08-11T19:44:52.686 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,12 +105,13 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 15e585ed0194f94a1da360808f29184f9d44554c F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a -F ext/fts5/fts5Int.h 410001da21bcc3d09b4290d4858352d0985ac7a6 +F ext/fts5/fts5Int.h f17a25546d598fdc5cc47f576d38063fd9290963 F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 -F ext/fts5/fts5_index.c 75b2ebfa97ad6054bba98cb923cd2d3c6cc5b112 +F ext/fts5/fts5_hash.c 2af412d00f65ad427f18acbe421c113413cdef06 +F ext/fts5/fts5_index.c ccef8703b6228a39090b0a03b83f163e69627ff2 F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -156,7 +157,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 8118631727a27fa88eb38a07ac3b86ecb86e9eb0 +F main.mk c4fff232b880b91bf665cd2951465de61178e444 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1162,7 +1163,7 @@ F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5 F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce F tool/lemon.c 3ff0fec22f92dfb54e62eeb48772eddffdbeb0d6 F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc -F tool/loadfts.c 3bdd46090112c84df44a4fbae740af3836108b3f +F tool/loadfts.c b5b3206ddd58d89ec8d54038c784bcadd6195915 F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6 F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383 F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670 @@ -1201,7 +1202,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2821825f7a481755a333dcdcad780b3e24448f20 -R c0a232bfa9626e6e9a9c306fc05ca763 +P a14fa876f0eb66028e302b908967cc4a05ede9fc +R b81a5fabd4e838059b5d12635ffcd939 U dan -Z 9113dc9c4d427c4fad9a129f5cfa7a9b +Z 39c621bf94a400035f58731d1ee0f6cd diff --git a/manifest.uuid b/manifest.uuid index 332f5fc368..602ab0978e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a14fa876f0eb66028e302b908967cc4a05ede9fc \ No newline at end of file +617e2fac1c128212254f71b1a8fddaf0d1d90262 \ No newline at end of file diff --git a/tool/loadfts.c b/tool/loadfts.c index 18bd355a4d..7da07b15dd 100644 --- a/tool/loadfts.c +++ b/tool/loadfts.c @@ -69,6 +69,7 @@ static void showHelp(const char *zArgv0){ " -fts [345] FTS version to use (default=5)\n" " -idx [01] Create a mapping from filename to rowid (default=0)\n" " -dir Root of directory tree to load data from (default=.)\n" +" -trans Number of inserts per transaction (default=1)\n" , zArgv0 ); exit(1); @@ -96,6 +97,7 @@ static void sqlite_error_out(const char *zText, sqlite3 *db){ */ typedef struct VisitContext VisitContext; struct VisitContext { + int nRowPerTrans; sqlite3 *db; /* Database handle */ sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */ }; @@ -112,7 +114,13 @@ void visit_file(void *pCtx, const char *zPath){ sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC); sqlite3_step(p->pInsert); rc = sqlite3_reset(p->pInsert); - if( rc!=SQLITE_OK ) sqlite_error_out("insert", p->db); + if( rc!=SQLITE_OK ){ + sqlite_error_out("insert", p->db); + }else if( p->nRowPerTrans>0 + && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0 + ){ + sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0); + } } /* @@ -150,6 +158,7 @@ int main(int argc, char **argv){ const char *zDir = "."; /* Directory to scan */ int i; int rc; + int nRowPerTrans = 0; sqlite3 *db; char *zSql; VisitContext sCtx; @@ -163,6 +172,9 @@ int main(int argc, char **argv){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } + if( strcmp(zOpt, "-trans")==0 ){ + nRowPerTrans = atoi(zArg); + } else if( strcmp(zOpt, "-idx")==0 ){ bMap = atoi(zArg); if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); @@ -189,13 +201,16 @@ int main(int argc, char **argv){ /* Compile the INSERT statement to write data to the FTS table. */ memset(&sCtx, 0, sizeof(VisitContext)); sCtx.db = db; + sCtx.nRowPerTrans = nRowPerTrans; rc = sqlite3_prepare_v2(db, "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 ); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); /* Load all files in the directory hierarchy into the FTS table. */ + if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0); traverse(zDir, (void*)&sCtx, visit_file); + if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); /* Clean up and exit. */ sqlite3_finalize(sCtx.pInsert);