From: Philippe Antoine Date: Sun, 22 Sep 2024 19:38:50 +0000 (+0200) Subject: util/hash: use randomized hash algorithm X-Git-Tag: suricata-7.0.7~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=db5a7a2febf6a2a862809fabfd35d238d16d6386;p=thirdparty%2Fsuricata.git util/hash: use randomized hash algorithm For datasets and http ranges Ticket: 7209 Prevents abusive hash collisions from known djb2 algorithm (cherry picked from commit 26da953f6dad3793d29f27ce7ab6628a2db8f471) --- diff --git a/src/app-layer-htp-range.c b/src/app-layer-htp-range.c index f0d75a9750..5af62a1404 100644 --- a/src/app-layer-htp-range.c +++ b/src/app-layer-htp-range.c @@ -26,7 +26,7 @@ #include "util-misc.h" //ParseSizeStringU64 #include "util-thash.h" //HashTable #include "util-memcmp.h" //SCBufferCmp -#include "util-hash-string.h" //StringHashDjb2 +#include "util-hash-lookup3.h" //hashlittle_safe #include "util-validate.h" //DEBUG_VALIDATE_BUG_ON #include "util-byte.h" //StringParseUint32 @@ -102,10 +102,10 @@ static bool ContainerUrlRangeCompare(void *a, void *b) return false; } -static uint32_t ContainerUrlRangeHash(void *s) +static uint32_t ContainerUrlRangeHash(uint32_t hash_seed, void *s) { HttpRangeContainerFile *cur = s; - uint32_t h = StringHashDjb2(cur->key, cur->len); + uint32_t h = hashlittle_safe(cur->key, cur->len, hash_seed); return h; } diff --git a/src/datasets-ipv4.c b/src/datasets-ipv4.c index f1192a0db5..67f8778fd2 100644 --- a/src/datasets-ipv4.c +++ b/src/datasets-ipv4.c @@ -25,6 +25,7 @@ #include "conf.h" #include "datasets.h" #include "datasets-ipv4.h" +#include "util-hash-lookup3.h" #include "util-thash.h" #include "util-print.h" @@ -45,15 +46,10 @@ bool IPv4Compare(void *a, void *b) return (memcmp(as->ipv4, bs->ipv4, sizeof(as->ipv4)) == 0); } -uint32_t IPv4Hash(void *s) +uint32_t IPv4Hash(uint32_t hash_seed, void *s) { const IPv4Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->ipv4); i++) { - hash = ((hash << 5) + hash) + str->ipv4[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->ipv4, 1, hash_seed); } // data stays in hash diff --git a/src/datasets-ipv4.h b/src/datasets-ipv4.h index 277acc6b88..799d22acbb 100644 --- a/src/datasets-ipv4.h +++ b/src/datasets-ipv4.h @@ -33,7 +33,7 @@ typedef struct IPv4Type { int IPv4Set(void *dst, void *src); bool IPv4Compare(void *a, void *b); -uint32_t IPv4Hash(void *s); +uint32_t IPv4Hash(uint32_t hash_seed, void *s); void IPv4Free(void *s); #endif /* __DATASETS_IPV4_H__ */ diff --git a/src/datasets-ipv6.c b/src/datasets-ipv6.c index f907320f00..ac96374da7 100644 --- a/src/datasets-ipv6.c +++ b/src/datasets-ipv6.c @@ -25,6 +25,7 @@ #include "conf.h" #include "datasets.h" #include "datasets-ipv6.h" +#include "util-hash-lookup3.h" #include "util-thash.h" #include "util-print.h" @@ -45,15 +46,10 @@ bool IPv6Compare(void *a, void *b) return (memcmp(as->ipv6, bs->ipv6, sizeof(as->ipv6)) == 0); } -uint32_t IPv6Hash(void *s) +uint32_t IPv6Hash(uint32_t hash_seed, void *s) { const IPv6Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->ipv6); i++) { - hash = ((hash << 5) + hash) + str->ipv6[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->ipv6, 4, hash_seed); } // data stays in hash diff --git a/src/datasets-ipv6.h b/src/datasets-ipv6.h index a4cabcaf78..51f273664c 100644 --- a/src/datasets-ipv6.h +++ b/src/datasets-ipv6.h @@ -33,7 +33,7 @@ typedef struct IPv6Type { int IPv6Set(void *dst, void *src); bool IPv6Compare(void *a, void *b); -uint32_t IPv6Hash(void *s); +uint32_t IPv6Hash(uint32_t hash_seed, void *s); void IPv6Free(void *s); #endif /* __DATASETS_IPV4_H__ */ diff --git a/src/datasets-md5.c b/src/datasets-md5.c index 3b1d8f3fc0..517cdff5be 100644 --- a/src/datasets-md5.c +++ b/src/datasets-md5.c @@ -25,6 +25,8 @@ #include "conf.h" #include "datasets.h" #include "datasets-md5.h" +#include "util-hash-lookup3.h" + #include "util-thash.h" #include "util-print.h" #include "util-base64.h" // decode base64 @@ -46,15 +48,10 @@ bool Md5StrCompare(void *a, void *b) return (memcmp(as->md5, bs->md5, sizeof(as->md5)) == 0); } -uint32_t Md5StrHash(void *s) +uint32_t Md5StrHash(uint32_t hash_seed, void *s) { const Md5Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->md5); i++) { - hash = ((hash << 5) + hash) + str->md5[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->md5, sizeof(str->md5) / 4, hash_seed); } // data stays in hash diff --git a/src/datasets-md5.h b/src/datasets-md5.h index ad4d0e28a3..2740ba540a 100644 --- a/src/datasets-md5.h +++ b/src/datasets-md5.h @@ -33,7 +33,7 @@ typedef struct Md5Type { int Md5StrSet(void *dst, void *src); bool Md5StrCompare(void *a, void *b); -uint32_t Md5StrHash(void *s); +uint32_t Md5StrHash(uint32_t hash_seed, void *s); void Md5StrFree(void *s); #endif /* __DATASETS_MD5_H__ */ diff --git a/src/datasets-sha256.c b/src/datasets-sha256.c index 346397d6d6..0929915353 100644 --- a/src/datasets-sha256.c +++ b/src/datasets-sha256.c @@ -25,9 +25,8 @@ #include "conf.h" #include "datasets.h" #include "datasets-sha256.h" +#include "util-hash-lookup3.h" #include "util-thash.h" -#include "util-print.h" -#include "util-base64.h" // decode base64 int Sha256StrSet(void *dst, void *src) { @@ -46,15 +45,10 @@ bool Sha256StrCompare(void *a, void *b) return (memcmp(as->sha256, bs->sha256, sizeof(as->sha256)) == 0); } -uint32_t Sha256StrHash(void *s) +uint32_t Sha256StrHash(uint32_t hash_seed, void *s) { Sha256Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->sha256); i++) { - hash = ((hash << 5) + hash) + str->sha256[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->sha256, sizeof(str->sha256) / 4, hash_seed); } // data stays in hash diff --git a/src/datasets-sha256.h b/src/datasets-sha256.h index 8793cc17a5..a9c81ac75a 100644 --- a/src/datasets-sha256.h +++ b/src/datasets-sha256.h @@ -33,7 +33,7 @@ typedef struct Sha256Type { int Sha256StrSet(void *dst, void *src); bool Sha256StrCompare(void *a, void *b); -uint32_t Sha256StrHash(void *s); +uint32_t Sha256StrHash(uint32_t hash_seed, void *s); void Sha256StrFree(void *s); #endif /* __DATASETS_SHA256_H__ */ diff --git a/src/datasets-string.c b/src/datasets-string.c index 4a572898ce..0a8f499ae3 100644 --- a/src/datasets-string.c +++ b/src/datasets-string.c @@ -28,6 +28,7 @@ #include "util-thash.h" #include "util-print.h" #include "util-base64.h" // decode base64 +#include "util-hash-lookup3.h" #include "rust.h" #if 0 @@ -85,17 +86,10 @@ bool StringCompare(void *a, void *b) return (memcmp(as->ptr, bs->ptr, as->len) == 0); } -uint32_t StringHash(void *s) +uint32_t StringHash(uint32_t hash_seed, void *s) { - uint32_t hash = 5381; StringType *str = s; - - for (uint32_t i = 0; i < str->len; i++) { - int c = str->ptr[i]; - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - } - - return hash; + return hashlittle_safe(str->ptr, str->len, hash_seed); } // base data stays in hash diff --git a/src/datasets-string.h b/src/datasets-string.h index b1513e3584..2cd624833b 100644 --- a/src/datasets-string.h +++ b/src/datasets-string.h @@ -34,7 +34,7 @@ typedef struct StringType { int StringSet(void *dst, void *src); bool StringCompare(void *a, void *b); -uint32_t StringHash(void *s); +uint32_t StringHash(uint32_t hash_seed, void *s); void StringFree(void *s); int StringAsBase64(const void *s, char *out, size_t out_size); diff --git a/src/util-thash.c b/src/util-thash.c index d9500e7262..9ee376f97d 100644 --- a/src/util-thash.c +++ b/src/util-thash.c @@ -293,7 +293,7 @@ static int THashInitConfig(THashTableContext *ctx, const char *cnf_prefix) } THashTableContext *THashInit(const char *cnf_prefix, size_t data_size, - int (*DataSet)(void *, void *), void (*DataFree)(void *), uint32_t (*DataHash)(void *), + int (*DataSet)(void *, void *), void (*DataFree)(void *), uint32_t (*DataHash)(uint32_t, void *), bool (*DataCompare)(void *, void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize) { THashTableContext *ctx = SCCalloc(1, sizeof(*ctx)); @@ -459,7 +459,7 @@ static uint32_t THashGetKey(const THashConfig *cnf, void *data) { uint32_t key; - key = cnf->DataHash(data); + key = cnf->DataHash(cnf->hash_rand, data); key %= cnf->hash_size; return key; diff --git a/src/util-thash.h b/src/util-thash.h index 9618d5c064..3e6883503f 100644 --- a/src/util-thash.h +++ b/src/util-thash.h @@ -130,7 +130,7 @@ typedef struct THashDataConfig_ { uint32_t data_size; int (*DataSet)(void *dst, void *src); void (*DataFree)(void *); - uint32_t (*DataHash)(void *); + uint32_t (*DataHash)(uint32_t, void *); bool (*DataCompare)(void *, void *); } THashConfig; @@ -169,7 +169,7 @@ typedef struct THashTableContext_ { THashTableContext *THashInit(const char *cnf_prefix, size_t data_size, int (*DataSet)(void *dst, void *src), void (*DataFree)(void *), - uint32_t (*DataHash)(void *), bool (*DataCompare)(void *, void *), bool reset_memcap, + uint32_t (*DataHash)(uint32_t, void *), bool (*DataCompare)(void *, void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize); void THashShutdown(THashTableContext *ctx);