From: Philippe Antoine Date: Tue, 18 Mar 2025 09:55:39 +0000 (+0100) Subject: detect: add configurable limits for datasets X-Git-Tag: suricata-8.0.0-beta1~262 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a7713db709b8a0be5fc5e5809ab58e9b14a16e85;p=thirdparty%2Fsuricata.git detect: add configurable limits for datasets Ticket: 7615 Avoids signatures setting extreme hash sizes, which would lead to very high memory use. Default to allowing: - 65536 per dataset - 16777216 total To override these built-in defaults: ```yaml datasets: # Limits for per rule dataset instances to avoid rules using too many # resources. limits: # Max value for per dataset `hashsize` setting #single-hashsize: 65536 # Max combined hashsize values for all datasets. #total-hashsizes: 16777216 ``` --- diff --git a/src/datasets.c b/src/datasets.c index 8e7126550f..131bcff34b 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -39,11 +39,16 @@ #include "util-misc.h" #include "util-path.h" #include "util-debug.h" +#include "util-validate.h" SCMutex sets_lock = SCMUTEX_INITIALIZER; static Dataset *sets = NULL; static uint32_t set_ids = 0; +uint32_t dataset_max_one_hashsize = 65536; +uint32_t dataset_max_total_hashsize = 16777216; +uint32_t dataset_used_hashsize = 0; + int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep); static inline void DatasetUnlockData(THashData *d) @@ -321,6 +326,34 @@ Dataset *DatasetFind(const char *name, enum DatasetTypes type) return set; } +static bool DatasetCheckHashsize(const char *name, uint32_t hash_size) +{ + if (dataset_max_one_hashsize > 0 && hash_size > dataset_max_one_hashsize) { + SCLogError("hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)", + hash_size, name, dataset_max_one_hashsize); + return false; + } + // we cannot underflow as we know from conf loading that + // dataset_max_total_hashsize >= dataset_max_one_hashsize if dataset_max_total_hashsize > 0 + if (dataset_max_total_hashsize > 0 && + dataset_max_total_hashsize - hash_size < dataset_used_hashsize) { + SCLogError("hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in " + "use %u)", + hash_size, name, dataset_max_total_hashsize, dataset_used_hashsize); + return false; + } + + return true; +} + +static void DatasetUpdateHashsize(const char *name, uint32_t hash_size) +{ + if (dataset_max_total_hashsize > 0) { + dataset_used_hashsize += hash_size; + SCLogDebug("set %s adding with hash_size %u", name, hash_size); + } +} + Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize) { @@ -374,6 +407,10 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, hashsize = default_hashsize; } + if (!DatasetCheckHashsize(name, hashsize)) { + goto out_err; + } + set = DatasetAlloc(name); if (set == NULL) { goto out_err; @@ -452,6 +489,10 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, set->next = sets; sets = set; + /* hash size accounting */ + DEBUG_VALIDATE_BUG_ON(set->hash->config.hash_size != hashsize); + DatasetUpdateHashsize(set->name, set->hash->config.hash_size); + SCMutexUnlock(&sets_lock); return set; out_err: @@ -493,6 +534,10 @@ void DatasetReload(void) continue; } set->hidden = true; + if (dataset_max_total_hashsize > 0) { + DEBUG_VALIDATE_BUG_ON(set->hash->config.hash_size > dataset_used_hashsize); + dataset_used_hashsize -= set->hash->config.hash_size; + } SCLogDebug("Set %s at %p hidden successfully", set->name, set); set = set->next; } @@ -560,6 +605,27 @@ int DatasetsInit(void) uint32_t default_hashsize = 0; GetDefaultMemcap(&default_memcap, &default_hashsize); if (datasets != NULL) { + const char *str = NULL; + if (ConfGet("datasets.limits.total-hashsizes", &str) == 1) { + if (ParseSizeStringU32(str, &dataset_max_total_hashsize) < 0) { + FatalError("failed to parse datasets.limits.total-hashsizes value: %s", str); + } + } + if (ConfGet("datasets.limits.single-hashsize", &str) == 1) { + if (ParseSizeStringU32(str, &dataset_max_one_hashsize) < 0) { + FatalError("failed to parse datasets.limits.single-hashsize value: %s", str); + } + } + if (dataset_max_total_hashsize > 0 && + dataset_max_total_hashsize < dataset_max_one_hashsize) { + FatalError("total-hashsizes (%u) cannot be smaller than single-hashsize (%u)", + dataset_max_total_hashsize, dataset_max_one_hashsize); + } + if (dataset_max_total_hashsize > 0 && dataset_max_one_hashsize == 0) { + // the total limit also applies for single limit + dataset_max_one_hashsize = dataset_max_total_hashsize; + } + int list_pos = 0; ConfNode *iter = NULL; TAILQ_FOREACH(iter, &datasets->head, next) { diff --git a/src/tests/fuzz/confyaml.c b/src/tests/fuzz/confyaml.c index 1945528599..05995ea56f 100644 --- a/src/tests/fuzz/confyaml.c +++ b/src/tests/fuzz/confyaml.c @@ -112,4 +112,8 @@ app-layer:\n\ enabled: yes\n\ detect:\n\ inspection-recursion-limit: 0\n\ +datasets:\n\ + maximums:\n\ + single_hashsize: 65536\n\ + total_hashsizes: 16777216\n\ "; diff --git a/src/util-thash.c b/src/util-thash.c index d840ae26d2..a511049e07 100644 --- a/src/util-thash.c +++ b/src/util-thash.c @@ -324,16 +324,11 @@ THashTableContext *THashInit(const char *cnf_prefix, uint32_t data_size, ctx->config.hash_size = hashsize > 0 ? hashsize : THASH_DEFAULT_HASHSIZE; /* Reset memcap in case of loading from file to the highest possible value unless defined by the rule keyword */ -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // limit memcap size to default when fuzzing - SC_ATOMIC_SET(ctx->config.memcap, THASH_DEFAULT_MEMCAP); -#else if (memcap > 0) { SC_ATOMIC_SET(ctx->config.memcap, memcap); } else { SC_ATOMIC_SET(ctx->config.memcap, reset_memcap ? UINT64_MAX : THASH_DEFAULT_MEMCAP); } -#endif ctx->config.prealloc = THASH_DEFAULT_PREALLOC; SC_ATOMIC_INIT(ctx->counter); diff --git a/suricata.yaml.in b/suricata.yaml.in index 5117b09928..a30a57aaec 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -1235,6 +1235,14 @@ datasets: #memcap: 100 MiB #hashsize: 2048 + # Limits for per rule dataset instances to avoid rules using too many + # resources. + limits: + # Max value for per dataset `hashsize` setting + #single-hashsize: 65536 + # Max combined hashsize values for all datasets. + #total-hashsizes: 16777216 + rules: # Set to true to allow absolute filenames and filenames that use # ".." components to reference parent directories in rules that specify