From: drh Date: Tue, 21 Apr 2009 15:05:18 +0000 (+0000) Subject: New comments and minor refactoring of rowhash.c. (CVS 6529) X-Git-Tag: version-3.6.15~221 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ade9760aeac023d8ec26f9f19dd841bb9b6878e0;p=thirdparty%2Fsqlite.git New comments and minor refactoring of rowhash.c. (CVS 6529) FossilOrigin-Name: b8cb4f3e2473afaee7c147a6b3f0972f69391a9a --- diff --git a/manifest b/manifest index 1473931c6c..416c7b1394 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sa\sredundant\stest\sfrom\ssqlite3_shutdown().\s(CVS\s6528) -D 2009-04-21T12:02:56 +C New\scomments\sand\sminor\srefactoring\sof\srowhash.c.\s(CVS\s6529) +D 2009-04-21T15:05:19 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in fa5998fe08bd8c0fdc7f9f66cea16c0279f39da8 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -154,7 +154,7 @@ F src/prepare.c 72d74e6d3b9c8eb0663b33ec6438aa718096ac79 F src/printf.c ea2d76000cc5f4579d7e9cb2f5460433eec0d384 F src/random.c 676b9d7ac820fe81e6fb2394ac8c10cff7f38628 F src/resolve.c 094e44450371fb27869eb8bf679aacbe51fdc56d -F src/rowhash.c f1ebc89222c4095caf40d18176aed408669ddaa5 +F src/rowhash.c c4ab59c04edf7de128e08e27dcb54f72bfbea9bc F src/rowset.c badb9f36b3a2ced9ee9551f4ce730f5fab442791 F src/select.c 35225756c247484f473678e5bd191d70a6e4dba0 F src/shell.c 0a11f831603f17fea20ca97133c0f64e716af4a7 @@ -201,7 +201,7 @@ F src/update.c 8ededddcde6f7b6da981dd0429a5d34518a475b7 F src/utf.c 9541d28f40441812c0b40f00334372a0542c00ff F src/util.c 828c552a22a1d5b650b8a5ea0009546715c45d93 F src/vacuum.c 07121a727beeee88f27d704a00313ad6a7c9bef0 -F src/vdbe.c 60db222db8d0f04a7fd2c754e99754eb83d6ed24 +F src/vdbe.c 6df1766d2c2961cf8224d935db57c4a6ec948481 F src/vdbe.h 35a648bc3279a120da24f34d9a25213ec15daf8a F src/vdbeInt.h d3adfeccc750643ae7861f2d29f579d3dad28785 F src/vdbeapi.c 015c9d0fb7047657a13a7bb6aa886f75e43db02d @@ -721,7 +721,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81 F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e -P f61e4cd93682fd98bea2a71d346f9eaa68454390 -R e41290121cd78e7346039a2f88271146 +P 6f481ceb503c7df74d9417a5a7f019ff56261ea8 +R a4e163b71977e7fa78f31b9c9adb7e88 U drh -Z feaa88976462a8a95a140c3c3f221904 +Z 8c403435bade6e8de78fced09d0d2883 diff --git a/manifest.uuid b/manifest.uuid index a2f7e631a6..8df6d3dd1c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6f481ceb503c7df74d9417a5a7f019ff56261ea8 \ No newline at end of file +b8cb4f3e2473afaee7c147a6b3f0972f69391a9a \ No newline at end of file diff --git a/src/rowhash.c b/src/rowhash.c index 100c8db863..35ddbbfc51 100644 --- a/src/rowhash.c +++ b/src/rowhash.c @@ -10,21 +10,56 @@ ** ************************************************************************* ** -** This file contains the implementation of the "row-hash" data structure. +** This file contains the implementation of the RowHash data structure. +** A RowHash has the following properties: ** -** $Id: rowhash.c,v 1.1 2009/04/21 09:02:47 danielk1977 Exp $ +** * A RowHash stores an unordered "bag" of 64-bit integer rowids. +** There is no other content. +** +** * Primative operations are CREATE, INSERT, TEST, and DESTROY. +** There is no way to remove individual elements from the RowHash +** once they are inserted. +** +** * INSERT operations are batched. TEST operation will ignore +** elements in the current INSERT batch. Only elements inserted +** in prior batches will be seen by a TEST. +** +** The insert batch number is a parameter to the TEST primitive. The +** hash table is rebuilt whenever the batch number increases. TEST +** operations only look for INSERTs that occurred in prior batches. +** +** The caller is responsible for insuring that there are no duplicate +** INSERTs. +** +** $Id: rowhash.c,v 1.2 2009/04/21 15:05:19 drh Exp $ */ #include "sqliteInt.h" -typedef struct RowHashElem RowHashElem; -typedef struct RowHashBlock RowHashBlock; - /* -** Size of heap allocations made by this module. This limit is -** never exceeded. +** An upper bound on the size of heap allocations made by this module. +** Limiting the size of allocations helps to avoid memory fragmentation. */ #define ROWHASH_ALLOCATION 1024 +/* +** If there are less than this number of elements in the RowHash, do not +** bother building a hash-table. Just do a linear search. +*/ +#define ROWHASH_LINEAR_SEARCH_LIMIT 10 + +/* +** This value is what we want the average length of the collision hash +** chain to be. +*/ +#define ROWHASH_COLLISION_LENGTH 3 + + +/* Forward references to data structures. */ +typedef struct RowHashElem RowHashElem; +typedef struct RowHashBlock RowHashBlock; +typedef union RowHashPtr RowHashPtr; +typedef struct RowHashPage RowHashPage; + /* ** Number of elements in the RowHashBlock.aElem[] array. This array is ** sized to make RowHashBlock very close to (without exceeding) @@ -39,34 +74,61 @@ typedef struct RowHashBlock RowHashBlock; ** Number of pointers that fit into a single allocation of ** ROWHASH_ALLOCATION bytes. */ -#define ROWHASH_POINTER_PER_PAGE (ROWHASH_ALLOCATION/sizeof(void *)) +#define ROWHASH_POINTER_PER_PAGE (ROWHASH_ALLOCATION/sizeof(RowHashPtr)) /* -** If there are less than this number of elements in the block-list, do not -** bother building a hash-table. Just do a linear search of the list when -** querying. +** A page of pointers used to construct a hash table. +** +** The hash table is actually a tree composed of instances of this +** object. Leaves of the tree use the a[].pElem pointer to point +** to RowHashElem entries. Interior nodes of the tree use the +** a[].pPage element to point to subpages. +** +** The hash table is split into a tree in order to avoid having +** to make large memory allocations, since large allocations can +** result in unwanted memory fragmentation. */ -#define ROWHASH_LINEAR_SEARCH_LIMIT 10 +struct RowHashPage { + union RowHashPtr { + RowHashPage *pPage; /* Used by interior nodes. Pointer to subtree. */ + RowHashElem *pElem; /* Used by leaves. Pointer to hash entry. */ + } a[ROWHASH_POINTER_PER_PAGE]; +}; /* -** Element stored in the hash-table. +** Each 64-bit integer in a RowHash is stored as an instance of +** this object. +** +** Instances of this object are not allocated separately. They are +** allocated in large blocks using the RowHashBlock object as a container. */ struct RowHashElem { - i64 iVal; - RowHashElem *pNext; + i64 iVal; /* The value being stored. A rowid. */ + RowHashElem *pNext; /* Next element with the same hash */ }; /* -** The following structure is either exactly ROWHASH_ALLOCATION bytes in -** size or just slightly less. It stores up to ROWHASH_ELEM_PER_BLOCK -** RowHashElem structures. +** In order to avoid many small allocations of RowHashElem objects, +** multiple RowHashElem objects are allocated at once, as an instance +** of this object, and then used as needed. +** +** A single RowHash object will allocate one or more of these RowHashBlock +** objects. As many are allocated as are needed to store all of the +** content. All RowHashBlocks are kept on a linked list formed using +** RowHashBlock.data.pNext so that they can be freed when the RowHash +** is destroyed. +** +** The linked list of RowHashBlock objects also provides a way to sequentially +** scan all elements in the RowHash. This sequential scan is used when +** rebuilding the hash table. The hash table is rebuilt after every +** batch of inserts. */ struct RowHashBlock { struct RowHashBlockData { - int nElem; - RowHashBlock *pNext; + int nUsed; /* Num of aElem[] currently used in this block */ + RowHashBlock *pNext; /* Next RowHashBlock object in list of them all */ } data; - RowHashElem aElem[ROWHASH_ELEM_PER_BLOCK]; + RowHashElem aElem[ROWHASH_ELEM_PER_BLOCK]; /* Available RowHashElem objects */ }; /* @@ -75,24 +137,23 @@ struct RowHashBlock { */ struct RowHash { /* Variables populated by sqlite3RowhashInsert() */ - int nEntry; /* Total number of entries in block-list */ - RowHashBlock *pBlock; /* Linked list of entries */ + int nEntry; /* Number of used entries over all RowHashBlocks */ + RowHashBlock *pBlock; /* Linked list of RowHashBlocks */ /* Variables populated by makeHashTable() */ - int iSet; /* Most recent iSet parameter passed to Test() */ + int iBatch; /* The current insert batch number */ int iMod; /* Number of buckets in hash table */ - int nLeaf; /* Number of leaf pages in hash table */ - int nHeight; /* Height of tree containing leaf pages */ - void *pHash; /* Pointer to root of tree */ + int nHeight; /* Height of tree of hash pages */ + RowHashPage *pHash; /* Pointer to root of hash table tree */ int nLinearLimit; /* Linear search limit (used if pHash==0) */ }; /* -** Allocate a tree of height nHeight with *pnLeaf leaf pages. Set *pp to -** point to the root of the tree. If the maximum number of leaf pages in a -** tree of height nHeight is less than *pnLeaf, allocate a tree with the -** maximum possible number of leaves for height nHeight. +** Allocate a hash table tree of height nHeight with *pnLeaf leaf pages. +** Set *pp to point to the root of the tree. If the maximum number of leaf +** pages in a tree of height nHeight is less than *pnLeaf, allocate only +** that part of the tree that is necessary to account for all leaves. ** ** Before returning, subtract the number of leaves in the tree allocated ** from *pnLeaf. @@ -100,18 +161,18 @@ struct RowHash { ** This routine returns SQLITE_NOMEM if a malloc() fails, or SQLITE_OK ** otherwise. */ -static int allocTable(void **pp, int nHeight, int *pnLeaf){ - void **ap = (void **)sqlite3MallocZero(ROWHASH_ALLOCATION); - if( !ap ){ +static int allocHashTable(RowHashPage **pp, int nHeight, int *pnLeaf){ + RowHashPage *p = (RowHashPage *)sqlite3MallocZero(sizeof(*p)); + if( !p ){ return SQLITE_NOMEM; } - *pp = (void *)ap; + *pp = p; if( nHeight==0 ){ (*pnLeaf)--; }else{ int ii; for(ii=0; ii0; ii++){ - if( allocTable(&ap[ii], nHeight-1, pnLeaf) ){ + if( allocHashTable(&p->a[ii].pPage, nHeight-1, pnLeaf) ){ return SQLITE_NOMEM; } } @@ -120,85 +181,76 @@ static int allocTable(void **pp, int nHeight, int *pnLeaf){ } /* -** Delete the tree of height nHeight passed as the first argument. +** Delete the hash table tree of height nHeight passed as the first argument. */ -static void deleteTable(void **ap, int nHeight){ - if( ap ){ +static void deleteHashTable(RowHashPage *p, int nHeight){ + if( p ){ if( nHeight>0 ){ int ii; for(ii=0; iia[ii].pPage, nHeight-1); } } - sqlite3_free(ap); + sqlite3_free(p); } } -/* -** Delete the hash-table stored in p->pHash. The p->pHash pointer is -** set to zero before returning. This function is the inverse of -** allocHashTable() -*/ -static void deleteHashTable(RowHash *p){ - deleteTable(p->pHash, p->nHeight); - p->pHash = 0; -} - -/* -** Allocate the hash table structure based on the current values of -** p->nLeaf and p->nHeight. -*/ -static int allocHashTable(RowHash *p){ - int nLeaf = p->nLeaf; - assert( p->pHash==0 ); - assert( p->nLeaf>0 ); - return allocTable(&p->pHash, p->nHeight, &nLeaf); -} - /* ** Find the hash-bucket associated with value iVal. Return a pointer to it. +** +** By "hash-bucket", we mean the RowHashPage.a[].pElem pointer that +** corresponds to a particular hash entry. */ -static void **findHashBucket(RowHash *p, i64 iVal){ +static RowHashElem **findHashBucket(RowHash *pRowHash, i64 iVal){ int aOffset[16]; - int n = p->nHeight; - void **ap = p->pHash; - int h = (((u64)iVal) % p->iMod); - for(n=0; nnHeight; n++){ + int n; + RowHashPage *pPage = pRowHash->pHash; + int h = (((u64)iVal) % pRowHash->iMod); + + assert( pRowHash->nHeight < sizeof(aOffset)/sizeof(aOffset[0]) ); + for(n=0; nnHeight; n++){ int h1 = h / ROWHASH_POINTER_PER_PAGE; aOffset[n] = h - (h1 * ROWHASH_POINTER_PER_PAGE); h = h1; } aOffset[n] = h; - for(n=p->nHeight; n>0; n--){ - ap = (void **)ap[aOffset[n]]; + for(n=pRowHash->nHeight; n>0; n--){ + pPage = pPage->a[aOffset[n]].pPage; } - return &ap[aOffset[0]]; + return &pPage->a[aOffset[0]].pElem; } /* -** Build a hash table to query with sqlite3RowhashTest() based on the -** set of values stored in the linked list of RowHashBlock structures. +** Build a new hash table tree in p->pHash. The new hash table should +** contain all p->nEntry entries in the p->pBlock list. If there +** existed a prior tree, delete the old tree first before constructing +** the new one. +** +** If the number of entries (p->nEntry) is less than +** ROWHASH_LINEAR_SEARCH_LIMIT, then we are guessing that a linear +** search is going to be faster than a lookup, so do not bother +** building the hash table. */ -static int makeHashTable(RowHash *p, int iSet){ +static int makeHashTable(RowHash *p, int iBatch){ RowHashBlock *pBlock; int iMod; - int nLeaf; + int nLeaf, n; /* Delete the old hash table. */ - deleteHashTable(p); - assert( p->iSet!=iSet ); - p->iSet = iSet; + deleteHashTable(p->pHash, p->nHeight); + assert( p->iBatch!=iBatch ); + p->iBatch = iBatch; + /* Skip building the hash table if the number of elements is small */ if( p->nEntrynLinearLimit = p->nEntry; + p->pHash = 0; return SQLITE_OK; } /* Determine how many leaves the hash-table will comprise. */ - nLeaf = 1 + (p->nEntry / ROWHASH_POINTER_PER_PAGE); - iMod = nLeaf*ROWHASH_POINTER_PER_PAGE; - p->nLeaf = nLeaf; - p->iMod = iMod; + nLeaf = 1 + (p->nEntry / (ROWHASH_POINTER_PER_PAGE*ROWHASH_COLLISION_LENGTH)); + p->iMod = iMod = nLeaf*ROWHASH_POINTER_PER_PAGE; /* Set nHeight to the height of the tree that contains the leaf pages. If ** RowHash.nHeight is zero, then the whole hash-table fits on a single @@ -207,22 +259,23 @@ static int makeHashTable(RowHash *p, int iSet){ ** to arrays of pointers to leaf pages. And so on. */ p->nHeight = 0; - while( nLeaf>1 ){ - nLeaf = (nLeaf+ROWHASH_POINTER_PER_PAGE-1) / ROWHASH_POINTER_PER_PAGE; + n = nLeaf; + while( n>1 ){ + n = (n+ROWHASH_POINTER_PER_PAGE-1) / ROWHASH_POINTER_PER_PAGE; p->nHeight++; } /* Allocate the hash-table. */ - if( allocHashTable(p) ){ + if( allocHashTable(&p->pHash, p->nHeight, &nLeaf) ){ return SQLITE_NOMEM; } /* Insert all values into the hash-table. */ for(pBlock=p->pBlock; pBlock; pBlock=pBlock->data.pNext){ - RowHashElem * const pEnd = &pBlock->aElem[pBlock->data.nElem]; + RowHashElem * const pEnd = &pBlock->aElem[pBlock->data.nUsed]; RowHashElem *pIter; for(pIter=pBlock->aElem; pIteriVal); + RowHashElem **ppElem = findHashBucket(p, pIter->iVal); pIter->pNext = *ppElem; *ppElem = pIter; } @@ -232,21 +285,35 @@ static int makeHashTable(RowHash *p, int iSet){ } /* -** Test if value iVal is in the hash table. If so, set *pExists to 1 -** before returning. If iVal is not in the hash table, set *pExists to 0. +** Check to see if iVal has been inserted into the hash table "p" +** in some batch prior to iBatch. If so, set *pExists to 1. +** If not, set *pExists to 0. ** -** Return SQLITE_OK if all goes as planned. If a malloc() fails, return -** SQLITE_NOMEM. +** The hash table is rebuilt whenever iBatch changes. A hash table +** rebuild might encounter an out-of-memory condition. If that happens, +** return SQLITE_NOMEM. If there are no problems, return SQLITE_OK. +** +** The initial "batch" is 0. So, if there were prior calls to +** sqlite3RowhashInsert() and then this routine is invoked with iBatch==0, +** because all prior inserts where in the same batch, none of the prior +** inserts will be visible and this routine will indicate not found. +** Hence, the first invocation of this routine should probably use +** a batch number of 1. */ -int sqlite3RowhashTest(RowHash *p, int iSet, i64 iVal, int *pExists){ +int sqlite3RowhashTest( + RowHash *p, /* The RowHash to search in */ + int iBatch, /* Look for values inserted in batches prior to this batch */ + i64 iVal, /* The rowid value we are looking for */ + int *pExists /* Store 0 or 1 hear to indicate not-found or found */ +){ *pExists = 0; if( p ){ assert( p->pBlock ); - if( iSet!=p->iSet && makeHashTable(p, iSet) ){ + if( iBatch!=p->iBatch && makeHashTable(p, iBatch) ){ return SQLITE_NOMEM; } if( p->pHash ){ - RowHashElem *pElem = *(RowHashElem **)findHashBucket(p, iVal); + RowHashElem *pElem = *findHashBucket(p, iVal); for(; pElem; pElem=pElem->pNext){ if( pElem->iVal==iVal ){ *pExists = 1; @@ -268,7 +335,8 @@ int sqlite3RowhashTest(RowHash *p, int iSet, i64 iVal, int *pExists){ } /* -** Insert value iVal into the RowHash object. +** Insert value iVal into the RowHash object. Allocate a new RowHash +** object if necessary. ** ** Return SQLITE_OK if all goes as planned. If a malloc() fails, return ** SQLITE_NOMEM. @@ -287,19 +355,19 @@ int sqlite3RowhashInsert(RowHash **pp, i64 iVal){ /* If the current RowHashBlock is full, or if the first RowHashBlock has ** not yet been allocated, allocate one now. */ - if( !p->pBlock || p->pBlock->data.nElem==ROWHASH_ELEM_PER_BLOCK ){ + if( !p->pBlock || p->pBlock->data.nUsed==ROWHASH_ELEM_PER_BLOCK ){ RowHashBlock *pBlock = (RowHashBlock*)sqlite3Malloc(sizeof(RowHashBlock)); if( !pBlock ){ return SQLITE_NOMEM; } - pBlock->data.nElem = 0; + pBlock->data.nUsed = 0; pBlock->data.pNext = p->pBlock; p->pBlock = pBlock; } /* Add iVal to the current RowHashBlock. */ - p->pBlock->aElem[p->pBlock->data.nElem].iVal = iVal; - p->pBlock->data.nElem++; + p->pBlock->aElem[p->pBlock->data.nUsed].iVal = iVal; + p->pBlock->data.nUsed++; p->nEntry++; return SQLITE_OK; } @@ -310,7 +378,7 @@ int sqlite3RowhashInsert(RowHash **pp, i64 iVal){ void sqlite3RowhashDestroy(RowHash *p){ if( p ){ RowHashBlock *pBlock, *pNext; - deleteHashTable(p); + deleteHashTable(p->pHash, p->nHeight); for(pBlock=p->pBlock; pBlock; pBlock=pNext){ pNext = pBlock->data.pNext; sqlite3_free(pBlock); @@ -318,4 +386,3 @@ void sqlite3RowhashDestroy(RowHash *p){ sqlite3_free(p); } } - diff --git a/src/vdbe.c b/src/vdbe.c index 1cde7f531c..64760d392c 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -43,7 +43,7 @@ ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. ** -** $Id: vdbe.c,v 1.833 2009/04/21 09:02:47 danielk1977 Exp $ +** $Id: vdbe.c,v 1.834 2009/04/21 15:05:19 drh Exp $ */ #include "sqliteInt.h" #include "vdbeInt.h" @@ -4604,10 +4604,11 @@ case OP_RowSetRead: { /* jump, out3 */ /* Opcode: RowHash P1 P2 P3 P4 ** -** Register P3 is assumed to hold an integer value. If register P1 +** Register P3 is assumed to hold a 64-bit integer value. If register P1 ** contains a rowid-hash object and the rowid-hash object contains ** the value held in P3, jump to register P2. Otherwise, insert the -** integer in P3 into the rowid-hash container. +** integer in P3 into the rowid-hash container and continue on to the +** next opcode. ** ** The rowid-hash is optimized for the case where successive sets ** of integers, where each set contains no duplicates. Each set