From: drh <> Date: Sat, 8 Feb 2025 11:15:41 +0000 (+0000) Subject: Improvements to the hash table used to store symbols in the schema, so that X-Git-Tag: major-release~322^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f62d053b49c287544c9b807cf9a067b7dca252fa;p=thirdparty%2Fsqlite.git Improvements to the hash table used to store symbols in the schema, so that it works better (requires fewer calls to sqlite3StrICmp()) for large schemas, and uses less code space. FossilOrigin-Name: 0318b68c845c84eded757c67f820e1783551574ac9e5670be640c4bfe22a934b --- diff --git a/manifest b/manifest index 23bd8c8395..dedc3aa948 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\scomments\son\sthe\sParse.nMaxArgs\sfield\sso\sthat\sthey\sare\scorrect.\s\sAdd\nassert()s\sto\sensure\sthey\sare\scorrect.\s\sOther\sParse\schanges\sto\sreduce\sthe\namount\sof\smemset()\sneeded\sto\sinitialize\sit. -D 2025-02-07T19:09:20.404 +C Improvements\sto\sthe\shash\stable\sused\sto\sstore\ssymbols\sin\sthe\sschema,\sso\sthat\nit\sworks\sbetter\s(requires\sfewer\scalls\sto\ssqlite3StrICmp())\sfor\slarge\sschemas,\nand\suses\sless\scode\sspace. +D 2025-02-08T11:15:41.775 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d @@ -735,8 +735,8 @@ F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007 F src/fkey.c 928ed2517e8732113d2b9821aa37af639688d752f4ea9ac6e0e393d713eeb76f F src/func.c 0712a5b03fdfc8af0cda6d076bfe231b66388d3d5a28b46dc1a94b90d41cac6a F src/global.c a19e4b1ca1335f560e9560e590fc13081e21f670643367f99cb9e8f9dc7d615b -F src/hash.c 9ee4269fb1d6632a6fecfb9479c93a1f29271bddbbaf215dd60420bcb80c7220 -F src/hash.h 3340ab6e1d13e725571d7cee6d3e3135f0779a7d8e76a9ce0a85971fa3953c51 +F src/hash.c ab8e8cf8733ccef6fd00831fff56a0fbdfa886505c08778338b8d0dc2f9d003d +F src/hash.h 46b92795a95bfefb210f52f0c316e9d7cdbcdd7e7fcfb0d8be796d3a5767cddf F src/hwtime.h f9c2dfb84dce7acf95ce6d289e46f5f9d3d1afd328e53da8f8e9008e3b3caae6 F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71 F src/insert.c ccadada52dc508ab8229e343425ab2504db57cfcdf8271f0f9ce1c2c6cad97c1 @@ -2209,8 +2209,11 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 45e462c0060e51c3375a226d636148e3415ee6020e544ecc84861c7aef4ecf7b -R 07888caf6f9e73972781470e8ad03e81 +P c56092507c96723030589ddd9121bc993d615a7acd453305fc3b1dbb9e30554c +R 51c0e52e763f3a11eebcce49c33f5a3b +T *branch * hash-improvements +T *sym-hash-improvements * +T -sym-trunk * U drh -Z 434b297f89532d0b5b94eab3e74d43a4 +Z 9b37fd3883a56be3a46fd4cef0eab379 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index fb16c00a62..764f1c6d8a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c56092507c96723030589ddd9121bc993d615a7acd453305fc3b1dbb9e30554c +0318b68c845c84eded757c67f820e1783551574ac9e5670be640c4bfe22a934b diff --git a/src/hash.c b/src/hash.c index 8ec043f119..c822cb5f1f 100644 --- a/src/hash.c +++ b/src/hash.c @@ -55,11 +55,19 @@ void sqlite3HashClear(Hash *pH){ static unsigned int strHash(const char *z){ unsigned int h = 0; unsigned char c; - while( (c = (unsigned char)*z++)!=0 ){ /*OPTIMIZATION-IF-TRUE*/ + while( z[0] ){ /*OPTIMIZATION-IF-TRUE*/ /* Knuth multiplicative hashing. (Sorting & Searching, p. 510). ** 0x9e3779b1 is 2654435761 which is the closest prime number to - ** (2**32)*golden_ratio, where golden_ratio = (sqrt(5) - 1)/2. */ - h += sqlite3UpperToLower[c]; + ** (2**32)*golden_ratio, where golden_ratio = (sqrt(5) - 1)/2. + ** + ** Only bits 0xdf for ASCII and bits 0xbf for EBCDIC each octet are + ** hashed since the omitted bits determine the upper/lower case difference. + */ +#ifdef SQLITE_EBCDIC + h += 0xbf & (unsigned char)*(z++); +#else + h += 0xdf & (unsigned char)*(z++); +#endif h *= 0x9e3779b1; } return h; @@ -132,9 +140,8 @@ static int rehash(Hash *pH, unsigned int new_size){ pH->htsize = new_size = sqlite3MallocSize(new_ht)/sizeof(struct _ht); memset(new_ht, 0, new_size*sizeof(struct _ht)); for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - unsigned int h = strHash(elem->pKey) % new_size; next_elem = elem->next; - insertElement(pH, &new_ht[h], elem); + insertElement(pH, &new_ht[elem->h % new_size], elem); } return 1; } @@ -154,21 +161,20 @@ static HashElem *findElementWithHash( unsigned int h; /* The computed hash */ static HashElem nullElement = { 0, 0, 0, 0 }; + h = strHash(pKey); if( pH->ht ){ /*OPTIMIZATION-IF-TRUE*/ struct _ht *pEntry; - h = strHash(pKey) % pH->htsize; - pEntry = &pH->ht[h]; + pEntry = &pH->ht[h % pH->htsize]; elem = pEntry->chain; count = pEntry->count; }else{ - h = 0; elem = pH->first; count = pH->count; } if( pHash ) *pHash = h; while( count ){ assert( elem!=0 ); - if( sqlite3StrICmp(elem->pKey,pKey)==0 ){ + if( h==elem->h && sqlite3StrICmp(elem->pKey,pKey)==0 ){ return elem; } elem = elem->next; @@ -180,10 +186,9 @@ static HashElem *findElementWithHash( /* Remove a single entry from the hash table given a pointer to that ** element and a hash on the element's key. */ -static void removeElementGivenHash( +static void removeElement( Hash *pH, /* The pH containing "elem" */ - HashElem* elem, /* The element to be removed from the pH */ - unsigned int h /* Hash value for the element */ + HashElem *elem /* The element to be removed from the pH */ ){ struct _ht *pEntry; if( elem->prev ){ @@ -195,7 +200,7 @@ static void removeElementGivenHash( elem->next->prev = elem->prev; } if( pH->ht ){ - pEntry = &pH->ht[h]; + pEntry = &pH->ht[elem->h % pH->htsize]; if( pEntry->chain==elem ){ pEntry->chain = elem->next; } @@ -246,7 +251,7 @@ void *sqlite3HashInsert(Hash *pH, const char *pKey, void *data){ if( elem->data ){ void *old_data = elem->data; if( data==0 ){ - removeElementGivenHash(pH,elem,h); + removeElement(pH,elem); }else{ elem->data = data; elem->pKey = pKey; @@ -257,14 +262,12 @@ void *sqlite3HashInsert(Hash *pH, const char *pKey, void *data){ new_elem = (HashElem*)sqlite3Malloc( sizeof(HashElem) ); if( new_elem==0 ) return data; new_elem->pKey = pKey; + new_elem->h = h; new_elem->data = data; pH->count++; - if( pH->count>=10 && pH->count > 2*pH->htsize ){ - if( rehash(pH, pH->count*2) ){ - assert( pH->htsize>0 ); - h = strHash(pKey) % pH->htsize; - } + if( pH->count>=5 && pH->count > 2*pH->htsize ){ + rehash(pH, pH->count*3); } - insertElement(pH, pH->ht ? &pH->ht[h] : 0, new_elem); + insertElement(pH, pH->ht ? &pH->ht[new_elem->h % pH->htsize] : 0, new_elem); return 0; } diff --git a/src/hash.h b/src/hash.h index 3f491e45c0..cff65d6e50 100644 --- a/src/hash.h +++ b/src/hash.h @@ -60,6 +60,7 @@ struct HashElem { HashElem *next, *prev; /* Next and previous elements in the table */ void *data; /* Data associated with this element */ const char *pKey; /* Key associated with this element */ + unsigned int h; /* hash for pKey */ }; /*