From: Shivani Bhardwaj Date: Thu, 10 Sep 2020 11:45:00 +0000 (+0530) Subject: datasets: allow memcap, hashsize be set via yaml or rule X-Git-Tag: suricata-6.0.0-rc1~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5ac94fc4076676d77bbc8b31c5515db7b3545401;p=thirdparty%2Fsuricata.git datasets: allow memcap, hashsize be set via yaml or rule It is now possible to set the memcap and hashsize via suricata.yaml and rules. Rule example: alert http any any -> any any (http.user_agent; dataset:isset,ua-seen,type string,load datasets.csv,memcap 100mb,hashsize 2048; sid:1;) suricata.yaml example: datasets: ua-seen: type: string load: datasets.csv memcap: 20mb hashsize: 2048 --- diff --git a/src/datasets.c b/src/datasets.c index 736b38f92e..b237d87037 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -33,6 +33,7 @@ #include "util-crypt.h" // encode base64 #include "util-base64.h" // decode base64 #include "util-byte.h" +#include "util-misc.h" SCMutex sets_lock = SCMUTEX_INITIALIZER; static Dataset *sets = NULL; @@ -216,6 +217,7 @@ static int DatasetLoadMd5(Dataset *set) (uint32_t)strlen(line), line); } } + THashConsolidateMemcap(set->hash); fclose(fp); SCLogConfig("dataset: %s loaded %u records", set->name, cnt); @@ -281,6 +283,7 @@ static int DatasetLoadSha256(Dataset *set) cnt++; } } + THashConsolidateMemcap(set->hash); fclose(fp); SCLogConfig("dataset: %s loaded %u records", set->name, cnt); @@ -356,6 +359,7 @@ static int DatasetLoadString(Dataset *set) SCLogDebug("line with rep %s, %s", line, r); } } + THashConsolidateMemcap(set->hash); fclose(fp); SCLogConfig("dataset: %s loaded %u records", set->name, cnt); @@ -416,8 +420,8 @@ Dataset *DatasetFind(const char *name, enum DatasetTypes type) return set; } -Dataset *DatasetGet(const char *name, enum DatasetTypes type, - const char *save, const char *load) +Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, + uint64_t memcap, uint32_t hashsize) { if (strlen(name) > DATASET_NAME_MAX_LEN) { return NULL; @@ -489,24 +493,24 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, switch (type) { case DATASET_TYPE_MD5: - set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, - Md5StrFree, Md5StrHash, Md5StrCompare, load != NULL ? 1 : 0); + set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash, + Md5StrCompare, load != NULL ? 1 : 0, memcap, hashsize); if (set->hash == NULL) goto out_err; if (DatasetLoadMd5(set) < 0) goto out_err; break; case DATASET_TYPE_STRING: - set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, - StringFree, StringHash, StringCompare, load != NULL ? 1 : 0); + set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash, + StringCompare, load != NULL ? 1 : 0, memcap, hashsize); if (set->hash == NULL) goto out_err; if (DatasetLoadString(set) < 0) goto out_err; break; case DATASET_TYPE_SHA256: - set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, - Sha256StrFree, Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0); + set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree, + Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0, memcap, hashsize); if (set->hash == NULL) goto out_err; if (DatasetLoadSha256(set) < 0) @@ -609,6 +613,8 @@ int DatasetsInit(void) char save[PATH_MAX] = ""; char load[PATH_MAX] = ""; + uint64_t memcap = 0; + uint32_t hashsize = 0; const char *set_name = iter->name; if (strlen(set_name) > DATASET_NAME_MAX_LEN) { @@ -636,13 +642,34 @@ int DatasetsInit(void) } } + ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap"); + if (set_memcap) { + if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "memcap value cannot be" + " deduced: %s, resetting to default", + set_memcap->val); + memcap = 0; + } + } + ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize"); + if (set_hashsize) { + if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "hashsize value cannot be" + " deduced: %s, resetting to default", + set_hashsize->val); + hashsize = 0; + } + } char conf_str[1024]; snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name); SCLogDebug("(%d) set %s type %s. Conf %s", n, set_name, set_type->val, conf_str); if (strcmp(set_type->val, "md5") == 0) { - Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load); + Dataset *dset = + DatasetGet(set_name, DATASET_TYPE_MD5, save, load, memcap, hashsize); if (dset == NULL) FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name); SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val); @@ -650,7 +677,8 @@ int DatasetsInit(void) n++; } else if (strcmp(set_type->val, "sha256") == 0) { - Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load); + Dataset *dset = + DatasetGet(set_name, DATASET_TYPE_SHA256, save, load, memcap, hashsize); if (dset == NULL) FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name); SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val); @@ -658,7 +686,8 @@ int DatasetsInit(void) n++; } else if (strcmp(set_type->val, "string") == 0) { - Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load); + Dataset *dset = + DatasetGet(set_name, DATASET_TYPE_STRING, save, load, memcap, hashsize); if (dset == NULL) FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name); SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val); diff --git a/src/datasets.h b/src/datasets.h index 71003b28c2..bd22cb13c7 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -51,8 +51,8 @@ typedef struct Dataset { enum DatasetTypes DatasetGetTypeFromString(const char *s); Dataset *DatasetFind(const char *name, enum DatasetTypes type); -Dataset *DatasetGet(const char *name, enum DatasetTypes type, - const char *save, const char *load); +Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, + uint64_t memcap, uint32_t hashsize); int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len); int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len); DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, diff --git a/src/detect-datarep.c b/src/detect-datarep.c index d4802e0cfb..72c85b9c3c 100644 --- a/src/detect-datarep.c +++ b/src/detect-datarep.c @@ -38,6 +38,7 @@ #include "util-byte.h" #include "util-debug.h" #include "util-print.h" +#include "util-misc.h" #define PARSE_REGEX "([a-z]+)(?:,\\s*([\\-_A-z0-9\\s\\.]+)){1,4}" static DetectParseRegex parse_regex; @@ -91,12 +92,9 @@ int DetectDatarepBufferMatch(DetectEngineThreadCtx *det_ctx, return 0; } -static int DetectDatarepParse(const char *str, - char *cmd, int cmd_len, - char *name, int name_len, - enum DatasetTypes *type, - char *load, size_t load_size, - uint16_t *rep_value) +static int DetectDatarepParse(const char *str, char *cmd, int cmd_len, char *name, int name_len, + enum DatasetTypes *type, char *load, size_t load_size, uint16_t *rep_value, + uint64_t *memcap, uint32_t *hashsize) { bool cmd_set = false; bool name_set = false; @@ -169,6 +167,24 @@ static int DetectDatarepParse(const char *str, SCLogDebug("load %s", val); strlcpy(load, val, load_size); } + if (strcmp(key, "memcap") == 0) { + if (ParseSizeStringU64(val, memcap) < 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "invalid value for memcap: %s," + " resetting to default", + val); + *memcap = 0; + } + } + if (strcmp(key, "hashsize") == 0) { + if (ParseSizeStringU32(val, hashsize) < 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "invalid value for hashsize: %s," + " resetting to default", + val); + *hashsize = 0; + } + } } SCLogDebug("key: %s, value: %s", key, val); @@ -279,6 +295,8 @@ static int DetectDatarepSetup (DetectEngineCtx *de_ctx, Signature *s, const char enum DatasetTypes type = DATASET_TYPE_NOTSET; char load[PATH_MAX]; uint16_t value = 0; + uint64_t memcap = 0; + uint32_t hashsize = 0; if (DetectBufferGetActiveList(de_ctx, s) == -1) { SCLogError(SC_ERR_INVALID_SIGNATURE, @@ -293,8 +311,8 @@ static int DetectDatarepSetup (DetectEngineCtx *de_ctx, Signature *s, const char SCReturnInt(-1); } - if (!DetectDatarepParse(rawstr, cmd_str, sizeof(cmd_str), name, - sizeof(name), &type, load, sizeof(load), &value)) { + if (!DetectDatarepParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load, + sizeof(load), &value, &memcap, &hashsize)) { return -1; } @@ -316,7 +334,7 @@ static int DetectDatarepSetup (DetectEngineCtx *de_ctx, Signature *s, const char return -1; } - Dataset *set = DatasetGet(name, type, /* no save */ NULL, load); + Dataset *set = DatasetGet(name, type, /* no save */ NULL, load, memcap, hashsize); if (set == NULL) { SCLogError(SC_ERR_UNKNOWN_VALUE, "failed to set up datarep set '%s'.", name); diff --git a/src/detect-dataset.c b/src/detect-dataset.c index ff8d13f00d..e3de8c00cb 100644 --- a/src/detect-dataset.c +++ b/src/detect-dataset.c @@ -37,6 +37,7 @@ #include "util-debug.h" #include "util-print.h" +#include "util-misc.h" #define PARSE_REGEX "([a-z]+)(?:,\\s*([\\-_A-z0-9\\s\\.]+)){1,4}" static DetectParseRegex parse_regex; @@ -99,12 +100,9 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx, return 0; } -static int DetectDatasetParse(const char *str, - char *cmd, int cmd_len, - char *name, int name_len, - enum DatasetTypes *type, - char *load, size_t load_size, - char *save, size_t save_size) +static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *name, int name_len, + enum DatasetTypes *type, char *load, size_t load_size, char *save, size_t save_size, + uint64_t *memcap, uint32_t *hashsize) { bool cmd_set = false; bool name_set = false; @@ -195,6 +193,24 @@ static int DetectDatasetParse(const char *str, strlcpy(save, val, save_size); state_set = true; } + if (strcmp(key, "memcap") == 0) { + if (ParseSizeStringU64(val, memcap) < 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "invalid value for memcap: %s," + " resetting to default", + val); + *memcap = 0; + } + } + if (strcmp(key, "hashsize") == 0) { + if (ParseSizeStringU32(val, hashsize) < 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "invalid value for hashsize: %s," + " resetting to default", + val); + *hashsize = 0; + } + } } SCLogDebug("key: %s, value: %s", key, val); @@ -314,6 +330,8 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst DetectDatasetData *cd = NULL; SigMatch *sm = NULL; uint8_t cmd = 0; + uint64_t memcap = 0; + uint32_t hashsize = 0; char cmd_str[16] = "", name[DATASET_NAME_MAX_LEN + 1] = ""; enum DatasetTypes type = DATASET_TYPE_NOTSET; char load[PATH_MAX] = ""; @@ -332,8 +350,8 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst SCReturnInt(-1); } - if (!DetectDatasetParse(rawstr, cmd_str, sizeof(cmd_str), name, - sizeof(name), &type, load, sizeof(load), save, sizeof(save))) { + if (!DetectDatasetParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load, + sizeof(load), save, sizeof(save), &memcap, &hashsize)) { return -1; } @@ -371,7 +389,7 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst } SCLogDebug("name '%s' load '%s' save '%s'", name, load, save); - Dataset *set = DatasetGet(name, type, save, load); + Dataset *set = DatasetGet(name, type, save, load, memcap, hashsize); if (set == NULL) { SCLogError(SC_ERR_INVALID_SIGNATURE, "failed to set up dataset '%s'.", name); diff --git a/src/util-thash.c b/src/util-thash.c index 4c5cadbdc2..6ba12b32cc 100644 --- a/src/util-thash.c +++ b/src/util-thash.c @@ -290,12 +290,9 @@ static void THashInitConfig(THashTableContext *ctx, const char *cnf_prefix) return; } -THashTableContext* THashInit(const char *cnf_prefix, size_t data_size, - int (*DataSet)(void *, void *), - void (*DataFree)(void *), - uint32_t (*DataHash)(void *), - bool (*DataCompare)(void *, void *), - bool reset_memcap) +THashTableContext *THashInit(const char *cnf_prefix, size_t data_size, + int (*DataSet)(void *, void *), void (*DataFree)(void *), uint32_t (*DataHash)(void *), + bool (*DataCompare)(void *, void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize) { THashTableContext *ctx = SCCalloc(1, sizeof(*ctx)); BUG_ON(!ctx); @@ -308,8 +305,14 @@ THashTableContext* THashInit(const char *cnf_prefix, size_t data_size, /* set defaults */ ctx->config.hash_rand = (uint32_t)RandomGet(); - ctx->config.hash_size = THASH_DEFAULT_HASHSIZE; - ctx->config.memcap = reset_memcap ? UINT64_MAX : THASH_DEFAULT_MEMCAP; + ctx->config.hash_size = hashsize > 0 ? hashsize : THASH_DEFAULT_HASHSIZE; + /* Reset memcap in case of loading from file to the highest possible value + unless defined by the rule keyword */ + if (memcap > 0) { + ctx->config.memcap = memcap; + } else { + ctx->config.memcap = reset_memcap ? UINT64_MAX : THASH_DEFAULT_MEMCAP; + } ctx->config.prealloc = THASH_DEFAULT_PREALLOC; SC_ATOMIC_INIT(ctx->counter); @@ -321,6 +324,14 @@ THashTableContext* THashInit(const char *cnf_prefix, size_t data_size, return ctx; } +/* \brief Set memcap to current memuse + * */ +void THashConsolidateMemcap(THashTableContext *ctx) +{ + ctx->config.memcap = MAX(SC_ATOMIC_GET(ctx->memuse), THASH_DEFAULT_MEMCAP); + SCLogDebug("memcap after load set to: %lu", ctx->config.memcap); +} + /** \brief shutdown the flow engine * \warning Not thread safe */ void THashShutdown(THashTableContext *ctx) diff --git a/src/util-thash.h b/src/util-thash.h index 17edb88928..fe6544a0d7 100644 --- a/src/util-thash.h +++ b/src/util-thash.h @@ -185,12 +185,10 @@ typedef struct THashTableContext_ { } \ } while (0) -THashTableContext* THashInit(const char *cnf_prefix, size_t data_size, - int (*DataSet)(void *dst, void *src), - void (*DataFree)(void *), - uint32_t (*DataHash)(void *), - bool (*DataCompare)(void *, void *), - bool reset_memcap); +THashTableContext *THashInit(const char *cnf_prefix, size_t data_size, + int (*DataSet)(void *dst, void *src), void (*DataFree)(void *), + uint32_t (*DataHash)(void *), bool (*DataCompare)(void *, void *), bool reset_memcap, + uint64_t memcap, uint32_t hashsize); void THashShutdown(THashTableContext *ctx); @@ -215,5 +213,6 @@ THashDataQueue *THashDataQueueNew(void); void THashCleanup(THashTableContext *ctx); int THashWalk(THashTableContext *, THashFormatFunc, THashOutputFunc, void *); int THashRemoveFromHash (THashTableContext *ctx, void *data); +void THashConsolidateMemcap(THashTableContext *ctx); #endif /* __THASH_H__ */