From: Philippe Antoine Date: Sun, 22 Sep 2024 19:38:50 +0000 (+0200) Subject: util/hash: use randomized hash algorithm X-Git-Tag: suricata-8.0.0-beta1~839 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=26da953f6dad3793d29f27ce7ab6628a2db8f471;p=thirdparty%2Fsuricata.git util/hash: use randomized hash algorithm For datasets and http ranges Ticket: 7209 Prevents abusive hash collisions from known djb2 algorithm --- diff --git a/src/app-layer-htp-range.c b/src/app-layer-htp-range.c index 55587487f5..b1f2b62423 100644 --- a/src/app-layer-htp-range.c +++ b/src/app-layer-htp-range.c @@ -26,7 +26,7 @@ #include "util-misc.h" //ParseSizeStringU64 #include "util-thash.h" //HashTable #include "util-memcmp.h" //SCBufferCmp -#include "util-hash-string.h" //StringHashDjb2 +#include "util-hash-lookup3.h" //hashlittle_safe #include "util-validate.h" //DEBUG_VALIDATE_BUG_ON #include "util-byte.h" //StringParseUint32 @@ -102,10 +102,10 @@ static bool ContainerUrlRangeCompare(void *a, void *b) return false; } -static uint32_t ContainerUrlRangeHash(void *s) +static uint32_t ContainerUrlRangeHash(uint32_t hash_seed, void *s) { HttpRangeContainerFile *cur = s; - uint32_t h = StringHashDjb2(cur->key, cur->len); + uint32_t h = hashlittle_safe(cur->key, cur->len, hash_seed); return h; } diff --git a/src/datasets-ipv4.c b/src/datasets-ipv4.c index f1192a0db5..67f8778fd2 100644 --- a/src/datasets-ipv4.c +++ b/src/datasets-ipv4.c @@ -25,6 +25,7 @@ #include "conf.h" #include "datasets.h" #include "datasets-ipv4.h" +#include "util-hash-lookup3.h" #include "util-thash.h" #include "util-print.h" @@ -45,15 +46,10 @@ bool IPv4Compare(void *a, void *b) return (memcmp(as->ipv4, bs->ipv4, sizeof(as->ipv4)) == 0); } -uint32_t IPv4Hash(void *s) +uint32_t IPv4Hash(uint32_t hash_seed, void *s) { const IPv4Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->ipv4); i++) { - hash = ((hash << 5) + hash) + str->ipv4[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->ipv4, 1, hash_seed); } // data stays in hash diff --git a/src/datasets-ipv4.h b/src/datasets-ipv4.h index db7abd6851..4a840e9aa6 100644 --- a/src/datasets-ipv4.h +++ b/src/datasets-ipv4.h @@ -33,7 +33,7 @@ typedef struct IPv4Type { int IPv4Set(void *dst, void *src); bool IPv4Compare(void *a, void *b); -uint32_t IPv4Hash(void *s); +uint32_t IPv4Hash(uint32_t hash_seed, void *s); void IPv4Free(void *s); #endif /* SURICATA_DATASETS_IPV4_H */ diff --git a/src/datasets-ipv6.c b/src/datasets-ipv6.c index f907320f00..ac96374da7 100644 --- a/src/datasets-ipv6.c +++ b/src/datasets-ipv6.c @@ -25,6 +25,7 @@ #include "conf.h" #include "datasets.h" #include "datasets-ipv6.h" +#include "util-hash-lookup3.h" #include "util-thash.h" #include "util-print.h" @@ -45,15 +46,10 @@ bool IPv6Compare(void *a, void *b) return (memcmp(as->ipv6, bs->ipv6, sizeof(as->ipv6)) == 0); } -uint32_t IPv6Hash(void *s) +uint32_t IPv6Hash(uint32_t hash_seed, void *s) { const IPv6Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->ipv6); i++) { - hash = ((hash << 5) + hash) + str->ipv6[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->ipv6, 4, hash_seed); } // data stays in hash diff --git a/src/datasets-ipv6.h b/src/datasets-ipv6.h index f3f59a0a96..c75ad194d6 100644 --- a/src/datasets-ipv6.h +++ b/src/datasets-ipv6.h @@ -33,7 +33,7 @@ typedef struct IPv6Type { int IPv6Set(void *dst, void *src); bool IPv6Compare(void *a, void *b); -uint32_t IPv6Hash(void *s); +uint32_t IPv6Hash(uint32_t hash_seed, void *s); void IPv6Free(void *s); #endif /* __DATASETS_IPV4_H__ */ diff --git a/src/datasets-md5.c b/src/datasets-md5.c index 53d828d178..28fd37d830 100644 --- a/src/datasets-md5.c +++ b/src/datasets-md5.c @@ -25,6 +25,8 @@ #include "conf.h" #include "datasets.h" #include "datasets-md5.h" +#include "util-hash-lookup3.h" + #include "util-thash.h" #include "util-print.h" @@ -45,15 +47,10 @@ bool Md5StrCompare(void *a, void *b) return (memcmp(as->md5, bs->md5, sizeof(as->md5)) == 0); } -uint32_t Md5StrHash(void *s) +uint32_t Md5StrHash(uint32_t hash_seed, void *s) { const Md5Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->md5); i++) { - hash = ((hash << 5) + hash) + str->md5[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->md5, sizeof(str->md5) / 4, hash_seed); } // data stays in hash diff --git a/src/datasets-md5.h b/src/datasets-md5.h index e6030540df..88c1ff1dfd 100644 --- a/src/datasets-md5.h +++ b/src/datasets-md5.h @@ -33,7 +33,7 @@ typedef struct Md5Type { int Md5StrSet(void *dst, void *src); bool Md5StrCompare(void *a, void *b); -uint32_t Md5StrHash(void *s); +uint32_t Md5StrHash(uint32_t hash_seed, void *s); void Md5StrFree(void *s); #endif /* SURICATA_DATASETS_MD5_H */ diff --git a/src/datasets-sha256.c b/src/datasets-sha256.c index 02f97ae877..240939c084 100644 --- a/src/datasets-sha256.c +++ b/src/datasets-sha256.c @@ -25,8 +25,8 @@ #include "conf.h" #include "datasets.h" #include "datasets-sha256.h" +#include "util-hash-lookup3.h" #include "util-thash.h" -#include "util-print.h" int Sha256StrSet(void *dst, void *src) { @@ -45,15 +45,10 @@ bool Sha256StrCompare(void *a, void *b) return (memcmp(as->sha256, bs->sha256, sizeof(as->sha256)) == 0); } -uint32_t Sha256StrHash(void *s) +uint32_t Sha256StrHash(uint32_t hash_seed, void *s) { Sha256Type *str = s; - uint32_t hash = 5381; - - for (int i = 0; i < (int)sizeof(str->sha256); i++) { - hash = ((hash << 5) + hash) + str->sha256[i]; /* hash * 33 + c */ - } - return hash; + return hashword((uint32_t *)str->sha256, sizeof(str->sha256) / 4, hash_seed); } // data stays in hash diff --git a/src/datasets-sha256.h b/src/datasets-sha256.h index 716fa93dcf..4f99b85a96 100644 --- a/src/datasets-sha256.h +++ b/src/datasets-sha256.h @@ -33,7 +33,7 @@ typedef struct Sha256Type { int Sha256StrSet(void *dst, void *src); bool Sha256StrCompare(void *a, void *b); -uint32_t Sha256StrHash(void *s); +uint32_t Sha256StrHash(uint32_t hash_seed, void *s); void Sha256StrFree(void *s); #endif /* SURICATA_DATASETS_SHA256_H */ diff --git a/src/datasets-string.c b/src/datasets-string.c index 2e60a3ca85..85fe864f52 100644 --- a/src/datasets-string.c +++ b/src/datasets-string.c @@ -27,6 +27,7 @@ #include "datasets-string.h" #include "util-thash.h" #include "util-print.h" +#include "util-hash-lookup3.h" #include "rust.h" #if 0 @@ -84,17 +85,10 @@ bool StringCompare(void *a, void *b) return (memcmp(as->ptr, bs->ptr, as->len) == 0); } -uint32_t StringHash(void *s) +uint32_t StringHash(uint32_t hash_seed, void *s) { - uint32_t hash = 5381; StringType *str = s; - - for (uint32_t i = 0; i < str->len; i++) { - int c = str->ptr[i]; - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - } - - return hash; + return hashlittle_safe(str->ptr, str->len, hash_seed); } uint32_t StringGetLength(void *s) diff --git a/src/datasets-string.h b/src/datasets-string.h index 1d5463cd9c..745754fc49 100644 --- a/src/datasets-string.h +++ b/src/datasets-string.h @@ -34,7 +34,7 @@ typedef struct StringType { int StringSet(void *dst, void *src); bool StringCompare(void *a, void *b); -uint32_t StringHash(void *s); +uint32_t StringHash(uint32_t hash_seed, void *s); uint32_t StringGetLength(void *s); void StringFree(void *s); int StringAsBase64(const void *s, char *out, size_t out_size); diff --git a/src/detect-engine-threshold.c b/src/detect-engine-threshold.c index 0f1e0e9ed5..c9ca8fa4a4 100644 --- a/src/detect-engine-threshold.c +++ b/src/detect-engine-threshold.c @@ -141,10 +141,10 @@ static inline int CompareAddress(const Address *a, const Address *b) return 0; } -static uint32_t ThresholdEntryHash(void *ptr) +static uint32_t ThresholdEntryHash(uint32_t seed, void *ptr) { const ThresholdEntry *e = ptr; - uint32_t hash = hashword(e->key, sizeof(e->key) / sizeof(uint32_t), 0); + uint32_t hash = hashword(e->key, sizeof(e->key) / sizeof(uint32_t), seed); switch (e->key[TRACK]) { case TRACK_BOTH: hash += HashAddress(&e->addr2); diff --git a/src/util-thash.c b/src/util-thash.c index 74c74e245a..3787454a37 100644 --- a/src/util-thash.c +++ b/src/util-thash.c @@ -300,9 +300,10 @@ static int THashInitConfig(THashTableContext *ctx, const char *cnf_prefix) } THashTableContext *THashInit(const char *cnf_prefix, size_t data_size, - int (*DataSet)(void *, void *), void (*DataFree)(void *), uint32_t (*DataHash)(void *), - bool (*DataCompare)(void *, void *), bool (*DataExpired)(void *, SCTime_t), - uint32_t (*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize) + int (*DataSet)(void *, void *), void (*DataFree)(void *), + uint32_t (*DataHash)(uint32_t, void *), bool (*DataCompare)(void *, void *), + bool (*DataExpired)(void *, SCTime_t), uint32_t (*DataSize)(void *), bool reset_memcap, + uint64_t memcap, uint32_t hashsize) { THashTableContext *ctx = SCCalloc(1, sizeof(*ctx)); BUG_ON(!ctx); @@ -531,7 +532,7 @@ static uint32_t THashGetKey(const THashConfig *cnf, void *data) { uint32_t key; - key = cnf->DataHash(data); + key = cnf->DataHash(cnf->hash_rand, data); key %= cnf->hash_size; return key; diff --git a/src/util-thash.h b/src/util-thash.h index 346c528a29..803a5f477c 100644 --- a/src/util-thash.h +++ b/src/util-thash.h @@ -130,7 +130,7 @@ typedef struct THashDataConfig_ { uint32_t data_size; int (*DataSet)(void *dst, void *src); void (*DataFree)(void *); - uint32_t (*DataHash)(void *); + uint32_t (*DataHash)(uint32_t, void *); bool (*DataCompare)(void *, void *); bool (*DataExpired)(void *, SCTime_t ts); uint32_t (*DataSize)(void *); @@ -171,7 +171,7 @@ typedef struct THashTableContext_ { THashTableContext *THashInit(const char *cnf_prefix, size_t data_size, int (*DataSet)(void *dst, void *src), void (*DataFree)(void *), - uint32_t (*DataHash)(void *), bool (*DataCompare)(void *, void *), + uint32_t (*DataHash)(uint32_t, void *), bool (*DataCompare)(void *, void *), bool (*DataExpired)(void *, SCTime_t), uint32_t (*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize);