From: Justin Viiret Date: Mon, 28 Mar 2016 22:32:26 +0000 (+1100) Subject: util-hash-lookup3: Add hashlittle_safe() variant X-Git-Tag: suricata-3.0.1~14 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c8e01a3d62c116ee5cad4d4a86c300d55d3781e9;p=thirdparty%2Fsuricata.git util-hash-lookup3: Add hashlittle_safe() variant By default, hashlittle() will read off the end of the key, up to the next four-byte boundary, although the data beyond the end of the key doesn't affect the hash. This read causes uninitialized read warnings from Valgrind and Address Sanitizer. Here we add hashlittle_safe(), which avoids reading off the end of the buffer (using the code inside the VALGRIND-guarded block in the original hashlittle() implementation). --- diff --git a/src/util-hash-lookup3.c b/src/util-hash-lookup3.c index f765521d6e..d13fa16af2 100644 --- a/src/util-hash-lookup3.c +++ b/src/util-hash-lookup3.c @@ -449,6 +449,180 @@ uint32_t hashlittle( const void *key, size_t length, uint32_t initval) } +/* +------------------------------------------------------------------------------- +hashlittle_safe() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * Note that unlike hashlittle() above, we use the "safe" version of this + * block that is #ifdef VALGRIND above, in order to avoid warnings from + * Valgrind or Address Sanitizer. + */ + + const uint8_t *k8 = (const uint8_t *)k; + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + + /* * hashlittle2: return 2 32-bit hash values * diff --git a/src/util-hash-lookup3.h b/src/util-hash-lookup3.h index 1e37e9104e..7ef4cf5d06 100644 --- a/src/util-hash-lookup3.h +++ b/src/util-hash-lookup3.h @@ -52,6 +52,11 @@ void hashword2 (const uint32_t *k, /* the key, an array of uint32_t val uint32_t hashlittle( const void *key, size_t length, uint32_t initval); +/* A variant of hashlittle() that ensures avoids accesses beyond the last byte + * of the string, which will cause warnings from tools like Valgrind or Address + * Sanitizer. */ +uint32_t hashlittle_safe(const void *key, size_t length, uint32_t initval); + void hashlittle2(const void *key, /* the key to hash */ size_t length, /* length of the key */ uint32_t *pc, /* IN: primary initval, OUT: primary hash */