From: Eric Leblond Date: Sun, 2 Mar 2025 16:34:38 +0000 (+0100) Subject: datajson: introduce feature X-Git-Tag: suricata-8.0.0-rc1~70 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dd94dc6cc6e6498a98d5723564b8b48e6ec6e3a4;p=thirdparty%2Fsuricata.git datajson: introduce feature This patch introduces new option to dataset keyword. Where regular dataset allows match from sets, dataset with json format allows the same but also adds JSON data to the alert event. This data is coming from the set definition it self. For example, an ipv4 set will look like: [{"ip": "10.16.1.11", "test": "success","context":3}] The syntax is a JSON array but it can also be a JSON object with an array inside. The idea is to directly used data coming from the API of a threat intel management software. The syntax of the keyword is the following: dataset:isset,src_ip,type ip,load src.lst,format json, \ enrichment_key src_ip, value_key ip; Compare to dataset, it just have a supplementary option key that is used to indicate in which subobject the JSON value should be added. The information is added in the even under the alert.extra subobject: "alert": { "extra": { "src_ip": { "ip": "10.6.1.11", "test": "success", "context": 3 }, The main interest of the feature is to be able to contextualize a match. For example, if you have an IOC source, you can do [ {"buffer": "value1", "actor":"APT28","Country":"FR"}, {"buffer": "value2", "actor":"APT32","Country":"NL"} ] This way, a single dataset is able to produce context to the event where it was not possible before and multiple signatures had to be used. The format introduced in datajson is an evolution of the historical datarep format. This has some limitations. For example, if a user fetch IOCs from a threat intel server there is a large change that the format will be JSON or XML. Suricata has no support for the second but can support the first one. Keeping the key value may seem redundant but it is useful to have it directly accessible in the extra data to be able to query it independantly of the signature (where it can be multiple metadata or even be a transformed metadata). In some case, when interacting with data (mostly coming from threat intel servers), the JSON array containing the data to use is not at the root of the object and it is ncessary to access a subobject. This patch implements this with support of key in level1.level2. This is done via the `array_key` option that contains the path to the data. Ticket: #7372 --- diff --git a/rust/suricatasc/src/unix/commands.rs b/rust/suricatasc/src/unix/commands.rs index 707e934c8f..6b784e0079 100644 --- a/rust/suricatasc/src/unix/commands.rs +++ b/rust/suricatasc/src/unix/commands.rs @@ -71,12 +71,11 @@ impl<'a> CommandParser<'a> { } pub fn parse(&self, input: &str) -> Result { - let parts: Vec<&str> = input.split(' ').map(|s| s.trim()).collect(); + let mut parts: Vec<&str> = input.split(' ').map(|s| s.trim()).collect(); if parts.is_empty() { return Err(CommandParseError::Other("No command provided".to_string())); } let command = parts[0]; - let args = &parts[1..]; let spec = self .commands @@ -91,6 +90,13 @@ impl<'a> CommandParser<'a> { // Calculate the number of required arguments for better error reporting. let required = spec.iter().filter(|e| e.required).count(); + let optional = spec.iter().filter(|e| !e.required).count(); + // Handle the case where the command has only required arguments and allow + // last one to contain spaces. + if optional == 0 { + parts = input.splitn(required + 1, ' ').collect(); + } + let args = &parts[1..]; let mut json_args = HashMap::new(); @@ -386,6 +392,28 @@ fn command_defs() -> Result>, serde_json::Error> { "type": "string", }, ], + "dataset-add-json": [ + { + "name": "setname", + "required": true, + "type": "string", + }, + { + "name": "settype", + "required": true, + "type": "string", + }, + { + "name": "datavalue", + "required": true, + "type": "string", + }, + { + "name": "datajson", + "required": true, + "type": "string", + }, + ], "get-flow-stats-by-id": [ { "name": "flow_id", diff --git a/src/Makefile.am b/src/Makefile.am index c70254f3f3..8365f98451 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -54,6 +54,7 @@ noinst_HEADERS = \ conf-yaml-loader.h \ conf.h \ counters.h \ + datajson.h \ datasets-ipv4.h \ datasets-ipv6.h \ datasets-md5.h \ @@ -653,6 +654,7 @@ libsuricata_c_a_SOURCES = \ conf-yaml-loader.c \ conf.c \ counters.c \ + datajson.c \ datasets-ipv4.c \ datasets-ipv6.c \ datasets-md5.c \ diff --git a/src/datajson.c b/src/datajson.c new file mode 100644 index 0000000000..98cb838eae --- /dev/null +++ b/src/datajson.c @@ -0,0 +1,985 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#include "suricata-common.h" +#include "suricata.h" +#include "rust.h" +#include "datasets.h" +#include "datajson.h" +#include "datasets-ipv4.h" +#include "datasets-ipv6.h" +#include "datasets-md5.h" +#include "datasets-sha256.h" +#include "datasets-string.h" +#include "util-byte.h" +#include "util-ip.h" +#include "util-debug.h" + +static int DatajsonAdd( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json); + +static inline void DatajsonUnlockData(THashData *d) +{ + (void)THashDecrUsecnt(d); + THashDataUnlock(d); +} + +/* return true if number is a float or an integer */ +static bool IsFloat(const char *in, size_t ins) +{ + char *endptr; + float val = strtof(in, &endptr); + const char *end_ins = in + ins - 1; + if (val != 0 && (endptr == end_ins)) { + return true; + } + /* if value is 0 then we need to check if some parsing has been done */ + if (val == 0 && (endptr == in)) { + return false; + } + return true; +} + +static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out) +{ + json_error_t jerror; + json_t *msg = json_loads(in, 0, &jerror); + if (msg == NULL) { + /* JANSSON does not see an integer, float or a string as valid JSON. + So we need to exclude them from failure. */ + if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) { + SCLogWarning("dataset: Invalid json: %s: '%s'\n", jerror.text, in); + return -1; + } + } else { + json_decref(msg); + } + rep_out->len = ins; + rep_out->value = SCStrndup(in, ins); + if (rep_out->value == NULL) { + return -1; + } + return 0; +} + +static json_t *GetSubObjectByKey(json_t *json, const char *key) +{ + if (!json || !key || !json_is_object(json)) { + return NULL; + } + + const char *current_key = key; + json_t *current = json; + while (current_key) { + const char *dot = strchr(current_key, '.'); + + size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key); + char key_buffer[key_len + 1]; + strlcpy(key_buffer, current_key, key_len + 1); + + if (json_is_object(current) == false) { + return NULL; + } + current = json_object_get(current, key_buffer); + if (current == NULL) { + return NULL; + } + current_key = dot ? dot + 1 : NULL; + } + return current; +} + +static int ParseJsonFile(const char *file, json_t **array, char *key) +{ + json_t *json; + json_error_t error; + /* assume we have one single JSON element in FILE */ + json = json_load_file(file, 0, &error); + if (json == NULL) { + FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text); + return -1; + } + + if (key == NULL || strlen(key) == 0) { + *array = json; + } else { + *array = GetSubObjectByKey(json, key); + if (*array == NULL) { + SCLogError("dataset: %s failed to get key '%s'", file, key); + json_decref(json); + return -1; + } + json_incref(*array); + json_decref(json); + } + if (!json_is_array(*array)) { + FatalErrorOnInit("not an array"); + json_decref(*array); + return -1; + } + return 0; +} + +/** + * \retval 1 data was added to the hash + * \retval 0 data was not added to the hash as it is already there + * \retval -1 failed to add data to the hash + */ +static int DatajsonAddString( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json }; + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddMd5( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != SC_MD5_LEN) + return -2; + + Md5Type lookup = { .json = *json }; + memcpy(lookup.md5, data, SC_MD5_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddSha256( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != SC_SHA256_LEN) + return -2; + + Sha256Type lookup = { .json = *json }; + memcpy(lookup.sha256, data, SC_SHA256_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddIPv4( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len < SC_IPV4_LEN) + return -2; + + IPv4Type lookup = { .json = *json }; + memcpy(lookup.ipv4, data, SC_IPV4_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddIPv6( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != SC_IPV6_LEN) + return -2; + + IPv6Type lookup = { .json = *json }; + memcpy(lookup.ipv6, data, SC_IPV6_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAdd( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json) +{ + if (set == NULL) + return -1; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatajsonAddString(set, data, data_len, json); + case DATASET_TYPE_MD5: + return DatajsonAddMd5(set, data, data_len, json); + case DATASET_TYPE_SHA256: + return DatajsonAddSha256(set, data, data_len, json); + case DATASET_TYPE_IPV4: + return DatajsonAddIPv4(set, data, data_len, json); + case DATASET_TYPE_IPV6: + return DatajsonAddIPv6(set, data, data_len, json); + default: + break; + } + return -1; +} + +static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = 0; + json_t *json; + bool found = false; + SCLogDebug("dataset: array_key '%s' %p", array_key, array_key); + if (ParseJsonFile(set->load, &json, array_key) == -1) { + SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load); + return -1; + } + + int add_ret; + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *val = json_string_value(key); + + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)val, strlen(val), &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return -1; + } + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadMd5FromJSON(Dataset *set, char *array_key, char *json_key) +{ + int add_ret; + uint32_t cnt = 0; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *hash_string = json_string_value(key); + if (strlen(hash_string) != SC_MD5_HEX_LEN) { + FatalErrorOnInit("Not correct length for a hash"); + continue; + } + + uint8_t hash[SC_MD5_LEN]; + if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) { + FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); + continue; + } + + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)hash, SC_MD5_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return -1; + } + + return cnt; +} + +static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = DatajsonLoadMd5FromJSON(set, array_key, json_key); + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadSHA256FromJSON(Dataset *set, char *array_key, char *json_key) +{ + int add_ret; + uint32_t cnt = 0; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *hash_string = json_string_value(key); + if (strlen(hash_string) != SC_SHA256_HEX_LEN) { + FatalErrorOnInit("Not correct length for a hash"); + continue; + } + + uint8_t hash[SC_SHA256_LEN]; + if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) { + FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); + continue; + } + + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)hash, SC_SHA256_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return -1; + } + return cnt; +} + +static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = DatajsonLoadSHA256FromJSON(set, array_key, json_key); + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadIPv4FromJSON(Dataset *set, char *array_key, char *json_key) +{ + uint32_t cnt = 0; + int add_ret; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *ip_string = json_string_value(key); + struct in_addr in; + if (inet_pton(AF_INET, ip_string, &in) != 1) { + FatalErrorOnInit( + "datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string); + continue; + } + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return 0; + } + + return cnt; +} + +static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + uint32_t cnt = DatajsonLoadIPv4FromJSON(set, array_key, json_key); + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadIPv6FromJSON(Dataset *set, char *array_key, char *json_key) +{ + uint32_t cnt = 0; + int add_ret; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *ip_string = json_string_value(key); + struct in6_addr in6; + int ret = DatasetParseIpv6String(set, ip_string, &in6); + if (ret < 0) { + FatalErrorOnInit("unable to parse IP address"); + continue; + } + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return 0; + } + return cnt; +} + +static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = DatajsonLoadIPv6FromJSON(set, array_key, json_key); + + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize, char *json_key_value, char *json_array_key) +{ + uint64_t default_memcap = 0; + uint32_t default_hashsize = 0; + if (strlen(name) > DATASET_NAME_MAX_LEN) { + SCLogError("dataset name too long"); + return NULL; + } + + DatasetLock(); + Dataset *set = DatasetSearchByName(name); + if (set) { + if (type != DATASET_TYPE_NOTSET && set->type != type) { + SCLogError("dataset %s already " + "exists and is of type %u", + set->name, set->type); + DatasetUnlock(); + return NULL; + } + + if (load == NULL || strlen(load) == 0) { + // OK, rule keyword doesn't have to set state/load, + // even when yaml set has set it. + } else { + if ((load == NULL && strlen(set->load) > 0) || + (load != NULL && strcmp(set->load, load) != 0)) { + SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load); + DatasetUnlock(); + return NULL; + } + } + + DatasetUnlock(); + return set; + } + + if (type == DATASET_TYPE_NOTSET) { + SCLogError("dataset %s not defined", name); + goto out_err; + } + + set = DatasetAlloc(name); + if (set == NULL) { + SCLogError("dataset %s allocation failed", name); + goto out_err; + } + + strlcpy(set->name, name, sizeof(set->name)); + set->type = type; + if (load && strlen(load)) { + strlcpy(set->load, load, sizeof(set->load)); + SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load); + } + + static const char conf_format_str[] = "datasets.%s.hash"; + char cnf_name[DATASET_NAME_MAX_LEN + (sizeof(conf_format_str) / sizeof(char))]; + int p_ret = snprintf(cnf_name, sizeof(cnf_name), conf_format_str, name); + if (p_ret == 0) { + SCLogError("Can't build configuration variable for set: '%s'", name); + goto out_err; + } + + DatasetGetDefaultMemcap(&default_memcap, &default_hashsize); + switch (type) { + case DATASET_TYPE_MD5: + set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree, + Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadMd5(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + case DATASET_TYPE_STRING: + set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree, + StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadString(set, json_key_value, json_array_key) < 0) { + SCLogError("dataset %s loading failed", name); + goto out_err; + } + break; + case DATASET_TYPE_SHA256: + set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree, + Sha256StrHash, Sha256StrCompare, NULL, Sha256StrJsonGetLength, + load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadSha256(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV4: + set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash, + IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadIPv4(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV6: + set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash, + IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadIPv6(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + } + + SCLogDebug( + "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load); + + DatasetAppendSet(set); + + DatasetUnlock(); + return set; +out_err: + if (set) { + if (set->hash) { + THashShutdown(set->hash); + } + SCFree(set); + } + DatasetUnlock(); + return NULL; +} + +static DataJsonResultType DatajsonLookupString( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + StringType lookup = { + .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0 + }; + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + StringType *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupMd5( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != SC_MD5_LEN) + return rrep; + + Md5Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.md5, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Md5Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupSha256( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != SC_SHA256_LEN) + return rrep; + + Sha256Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.sha256, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Sha256Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupIPv4( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != SC_IPV4_LEN) + return rrep; + + IPv4Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv4, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv4Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupIPv6( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + /* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */ + if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN) + return rrep; + + IPv6Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv6, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv6Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = 0 } }; + if (set == NULL) + return rrep; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatajsonLookupString(set, data, data_len); + case DATASET_TYPE_MD5: + return DatajsonLookupMd5(set, data, data_len); + case DATASET_TYPE_SHA256: + return DatajsonLookupSha256(set, data, data_len); + case DATASET_TYPE_IPV4: + return DatajsonLookupIPv4(set, data, data_len); + case DATASET_TYPE_IPV6: + return DatajsonLookupIPv6(set, data, data_len); + default: + break; + } + return rrep; +} + +typedef int (*DatajsonOpFunc)( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json); + +static int DatajsonOpSerialized(Dataset *set, const char *string, const char *json, + DatajsonOpFunc DatajsonOpString, DatajsonOpFunc DatajsonOpMd5, + DatajsonOpFunc DatajsonOpSha256, DatajsonOpFunc DatajsonOpIPv4, + DatajsonOpFunc DatajsonOpIPv6) +{ + int ret; + + if (set == NULL) + return -1; + if (strlen(string) == 0) + return -1; + + DataJsonType jvalue = { .value = NULL, .len = 0 }; + if (json) { + if (ParseJsonLine(json, strlen(json), &jvalue) < 0) { + SCLogNotice("bad json value for dataset %s/%s", set->name, set->load); + return -1; + } + } + + switch (set->type) { + case DATASET_TYPE_STRING: { + uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(string)); + uint8_t decoded[decoded_size]; + uint32_t num_decoded = SCBase64Decode( + (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded); + if (num_decoded == 0) + goto operror; + ret = DatajsonOpString(set, decoded, num_decoded, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_MD5: { + if (strlen(string) != SC_MD5_HEX_LEN) + goto operror; + uint8_t hash[SC_MD5_LEN]; + if (HexToRaw((const uint8_t *)string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) + goto operror; + ret = DatajsonOpMd5(set, hash, SC_MD5_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_SHA256: { + if (strlen(string) != SC_SHA256_HEX_LEN) + goto operror; + uint8_t hash[SC_SHA256_LEN]; + if (HexToRaw((const uint8_t *)string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) + goto operror; + ret = DatajsonOpSha256(set, hash, SC_SHA256_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV4: { + struct in_addr in; + if (inet_pton(AF_INET, string, &in) != 1) + goto operror; + ret = DatajsonOpIPv4(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV6: { + struct in6_addr in6; + if (DatasetParseIpv6String(set, string, &in6) != 0) { + SCLogError("Dataset failed to import %s as IPv6", string); + goto operror; + } + ret = DatajsonOpIPv6(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + } + SCFree(jvalue.value); + return -1; +operror: + SCFree(jvalue.value); + return -2; +} + +/** \brief add serialized data to json set + * \retval int 1 added + * \retval int 0 already in hash + * \retval int -1 API error (not added) + * \retval int -2 DATA error + */ +int DatajsonAddSerialized(Dataset *set, const char *value, const char *json) +{ + return DatajsonOpSerialized(set, value, json, DatajsonAddString, DatajsonAddMd5, + DatajsonAddSha256, DatajsonAddIPv4, DatajsonAddIPv6); +} diff --git a/src/datajson.h b/src/datajson.h new file mode 100644 index 0000000000..449f1d0573 --- /dev/null +++ b/src/datajson.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#ifndef SURICATA_DATAJSON_H +#define SURICATA_DATAJSON_H + +#include +#include "datasets.h" + +#define DATAJSON_JSON_LENGTH 1024 + +typedef struct DataJsonType { + char *value; + size_t len; +} DataJsonType; + +typedef struct DataJsonResultType { + bool found; + DataJsonType json; +} DataJsonResultType; + +/* Common functions */ + +Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize, char *json_key_value, char *json_array_key); + +DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len); + +int DatajsonAddSerialized(Dataset *set, const char *value, const char *json); + +#endif /* SURICATA_DATAJSON_H*/ diff --git a/src/datasets-ipv4.c b/src/datasets-ipv4.c index 67f8778fd2..92dd49ece7 100644 --- a/src/datasets-ipv4.c +++ b/src/datasets-ipv4.c @@ -38,6 +38,17 @@ int IPv4Set(void *dst, void *src) return 0; } +int IPv4JsonSet(void *dst, void *src) +{ + IPv4Type *src_s = src; + IPv4Type *dst_s = dst; + memcpy(dst_s->ipv4, src_s->ipv4, sizeof(dst_s->ipv4)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + bool IPv4Compare(void *a, void *b) { const IPv4Type *as = a; @@ -56,3 +67,17 @@ uint32_t IPv4Hash(uint32_t hash_seed, void *s) void IPv4Free(void *s) { } + +void IPv4JsonFree(void *s) +{ + const IPv4Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t IPv4JsonGetLength(void *s) +{ + const IPv4Type *as = s; + return as->json.len; +} diff --git a/src/datasets-ipv4.h b/src/datasets-ipv4.h index 4a840e9aa6..9bb0c16641 100644 --- a/src/datasets-ipv4.h +++ b/src/datasets-ipv4.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_IPV4_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct IPv4Type { uint8_t ipv4[4]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } IPv4Type; int IPv4Set(void *dst, void *src); +int IPv4JsonSet(void *dst, void *src); bool IPv4Compare(void *a, void *b); uint32_t IPv4Hash(uint32_t hash_seed, void *s); void IPv4Free(void *s); +void IPv4JsonFree(void *s); +uint32_t IPv4JsonGetLength(void *s); #endif /* SURICATA_DATASETS_IPV4_H */ diff --git a/src/datasets-ipv6.c b/src/datasets-ipv6.c index ac96374da7..2888b34826 100644 --- a/src/datasets-ipv6.c +++ b/src/datasets-ipv6.c @@ -24,6 +24,7 @@ #include "suricata-common.h" #include "conf.h" #include "datasets.h" +#include "datajson.h" #include "datasets-ipv6.h" #include "util-hash-lookup3.h" #include "util-thash.h" @@ -38,6 +39,17 @@ int IPv6Set(void *dst, void *src) return 0; } +int IPv6JsonSet(void *dst, void *src) +{ + IPv6Type *src_s = src; + IPv6Type *dst_s = dst; + memcpy(dst_s->ipv6, src_s->ipv6, sizeof(dst_s->ipv6)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + bool IPv6Compare(void *a, void *b) { const IPv6Type *as = a; @@ -56,3 +68,17 @@ uint32_t IPv6Hash(uint32_t hash_seed, void *s) void IPv6Free(void *s) { } + +void IPv6JsonFree(void *s) +{ + const IPv6Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t IPv6JsonGetLength(void *s) +{ + const IPv6Type *as = s; + return as->json.len; +} diff --git a/src/datasets-ipv6.h b/src/datasets-ipv6.h index c75ad194d6..4251c77a28 100644 --- a/src/datasets-ipv6.h +++ b/src/datasets-ipv6.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_IPV6_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct IPv6Type { uint8_t ipv6[16]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } IPv6Type; int IPv6Set(void *dst, void *src); +int IPv6JsonSet(void *dst, void *src); bool IPv6Compare(void *a, void *b); uint32_t IPv6Hash(uint32_t hash_seed, void *s); void IPv6Free(void *s); +void IPv6JsonFree(void *s); +uint32_t IPv6JsonGetLength(void *s); #endif /* __DATASETS_IPV4_H__ */ diff --git a/src/datasets-md5.c b/src/datasets-md5.c index 28fd37d830..92bcae3442 100644 --- a/src/datasets-md5.c +++ b/src/datasets-md5.c @@ -24,6 +24,7 @@ #include "suricata-common.h" #include "conf.h" #include "datasets.h" +#include "datajson.h" #include "datasets-md5.h" #include "util-hash-lookup3.h" @@ -39,6 +40,16 @@ int Md5StrSet(void *dst, void *src) return 0; } +int Md5StrJsonSet(void *dst, void *src) +{ + Md5Type *src_s = src; + Md5Type *dst_s = dst; + memcpy(dst_s->md5, src_s->md5, sizeof(dst_s->md5)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + bool Md5StrCompare(void *a, void *b) { const Md5Type *as = a; @@ -57,3 +68,17 @@ uint32_t Md5StrHash(uint32_t hash_seed, void *s) void Md5StrFree(void *s) { } + +void Md5StrJsonFree(void *s) +{ + const Md5Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t Md5StrJsonGetLength(void *s) +{ + const Md5Type *as = s; + return as->json.len; +} diff --git a/src/datasets-md5.h b/src/datasets-md5.h index 88c1ff1dfd..5fdbd795c7 100644 --- a/src/datasets-md5.h +++ b/src/datasets-md5.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_MD5_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct Md5Type { uint8_t md5[16]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } Md5Type; int Md5StrSet(void *dst, void *src); +int Md5StrJsonSet(void *dst, void *src); bool Md5StrCompare(void *a, void *b); uint32_t Md5StrHash(uint32_t hash_seed, void *s); void Md5StrFree(void *s); +void Md5StrJsonFree(void *s); +uint32_t Md5StrJsonGetLength(void *s); #endif /* SURICATA_DATASETS_MD5_H */ diff --git a/src/datasets-sha256.c b/src/datasets-sha256.c index 240939c084..b7fd48aed1 100644 --- a/src/datasets-sha256.c +++ b/src/datasets-sha256.c @@ -24,6 +24,7 @@ #include "suricata-common.h" #include "conf.h" #include "datasets.h" +#include "datajson.h" #include "datasets-sha256.h" #include "util-hash-lookup3.h" #include "util-thash.h" @@ -37,6 +38,16 @@ int Sha256StrSet(void *dst, void *src) return 0; } +int Sha256StrJsonSet(void *dst, void *src) +{ + Sha256Type *src_s = src; + Sha256Type *dst_s = dst; + memcpy(dst_s->sha256, src_s->sha256, sizeof(dst_s->sha256)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + bool Sha256StrCompare(void *a, void *b) { Sha256Type *as = a; @@ -56,3 +67,17 @@ void Sha256StrFree(void *s) { // no dynamic data } + +void Sha256StrJsonFree(void *s) +{ + const Sha256Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t Sha256StrJsonGetLength(void *s) +{ + const Sha256Type *as = s; + return as->json.len; +} diff --git a/src/datasets-sha256.h b/src/datasets-sha256.h index 4f99b85a96..16c5932b18 100644 --- a/src/datasets-sha256.h +++ b/src/datasets-sha256.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_SHA256_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct Sha256Type { uint8_t sha256[32]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } Sha256Type; int Sha256StrSet(void *dst, void *src); +int Sha256StrJsonSet(void *dst, void *src); bool Sha256StrCompare(void *a, void *b); uint32_t Sha256StrHash(uint32_t hash_seed, void *s); void Sha256StrFree(void *s); +void Sha256StrJsonFree(void *s); +uint32_t Sha256StrJsonGetLength(void *s); #endif /* SURICATA_DATASETS_SHA256_H */ diff --git a/src/datasets-string.c b/src/datasets-string.c index da6e039bc3..c9c2b3b4be 100644 --- a/src/datasets-string.c +++ b/src/datasets-string.c @@ -73,6 +73,24 @@ int StringSet(void *dst, void *src) return 0; } +int StringJsonSet(void *dst, void *src) +{ + StringType *src_s = src; + StringType *dst_s = dst; + SCLogDebug("dst %p src %p, src_s->ptr %p src_s->len %u", dst, src, src_s->ptr, src_s->len); + + dst_s->len = src_s->len; + dst_s->ptr = SCMalloc(dst_s->len); + BUG_ON(dst_s->ptr == NULL); + memcpy(dst_s->ptr, src_s->ptr, dst_s->len); + + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + SCLogDebug("dst %p src %p, dst_s->ptr %p dst_s->len %u", dst, src, dst_s->ptr, dst_s->len); + return 0; +} + bool StringCompare(void *a, void *b) { const StringType *as = a; @@ -102,3 +120,18 @@ void StringFree(void *s) StringType *str = s; SCFree(str->ptr); } + +void StringJsonFree(void *s) +{ + StringType *str = s; + SCFree(str->ptr); + if (str->json.value) { + SCFree(str->json.value); + } +} + +uint32_t StringJsonGetLength(void *s) +{ + StringType *str = s; + return str->json.len + str->len; +} diff --git a/src/datasets-string.h b/src/datasets-string.h index 745754fc49..c3952d8ccf 100644 --- a/src/datasets-string.h +++ b/src/datasets-string.h @@ -25,18 +25,25 @@ #define SURICATA_DATASETS_STRING_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct StringType { uint32_t len; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; uint8_t *ptr; } StringType; int StringSet(void *dst, void *src); +int StringJsonSet(void *dst, void *src); bool StringCompare(void *a, void *b); uint32_t StringHash(uint32_t hash_seed, void *s); uint32_t StringGetLength(void *s); void StringFree(void *s); +void StringJsonFree(void *s); int StringAsBase64(const void *s, char *out, size_t out_size); +uint32_t StringJsonGetLength(void *s); #endif /* SURICATA_DATASETS_STRING_H */ diff --git a/src/datasets.c b/src/datasets.c index 7addb37274..90ba78ee4c 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -32,7 +32,9 @@ #include "datasets-md5.h" #include "datasets-sha256.h" #include "datasets-reputation.h" +#include "datajson.h" #include "util-conf.h" +#include "util-mem.h" #include "util-thash.h" #include "util-print.h" #include "util-byte.h" @@ -57,7 +59,6 @@ static inline void DatasetUnlockData(THashData *d) THashDataUnlock(d); } static bool DatasetIsStatic(const char *save, const char *load); -static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize); enum DatasetTypes DatasetGetTypeFromString(const char *s) { @@ -74,7 +75,23 @@ enum DatasetTypes DatasetGetTypeFromString(const char *s) return DATASET_TYPE_NOTSET; } -static Dataset *DatasetAlloc(const char *name) +void DatasetAppendSet(Dataset *set) +{ + set->next = sets; + sets = set; +} + +void DatasetLock(void) +{ + SCMutexLock(&sets_lock); +} + +void DatasetUnlock(void) +{ + SCMutexUnlock(&sets_lock); +} + +Dataset *DatasetAlloc(const char *name) { Dataset *set = SCCalloc(1, sizeof(*set)); if (set) { @@ -83,7 +100,7 @@ static Dataset *DatasetAlloc(const char *name) return set; } -static Dataset *DatasetSearchByName(const char *name) +Dataset *DatasetSearchByName(const char *name) { Dataset *set = sets; while (set) { @@ -118,7 +135,7 @@ static int DatasetLoadIPv4(Dataset *set) return 0; } -static int ParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6) +int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6) { /* Checking IPv6 case */ char *got_colon = strchr(line, ':'); @@ -249,8 +266,8 @@ enum DatasetGetPathType { TYPE_LOAD, }; -static void DatasetGetPath(const char *in_path, - char *out_path, size_t out_size, enum DatasetGetPathType type) +static void DatasetGetPath( + const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type) { char path[PATH_MAX]; struct stat st; @@ -372,7 +389,7 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, } } - GetDefaultMemcap(&default_memcap, &default_hashsize); + DatasetGetDefaultMemcap(&default_memcap, &default_hashsize); if (hashsize == 0) { hashsize = default_hashsize; } @@ -547,7 +564,7 @@ void DatasetPostReloadCleanup(void) * despite 2048 commented out in the default yaml. */ #define DATASETS_HASHSIZE_DEFAULT 4096 -static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize) +void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize) { const char *str = NULL; if (SCConfGet("datasets.defaults.memcap", &str) == 1) { @@ -576,7 +593,7 @@ int DatasetsInit(void) SCConfNode *datasets = SCConfGetNode("datasets"); uint64_t default_memcap = 0; uint32_t default_hashsize = 0; - GetDefaultMemcap(&default_memcap, &default_hashsize); + DatasetGetDefaultMemcap(&default_memcap, &default_hashsize); if (datasets != NULL) { const char *str = NULL; if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) { @@ -1378,7 +1395,7 @@ static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc D } case DATASET_TYPE_IPV6: { struct in6_addr in6; - if (ParseIpv6String(set, string, &in6) != 0) { + if (DatasetParseIpv6String(set, string, &in6) != 0) { SCLogError("Dataset failed to import %s as IPv6", string); return -2; } diff --git a/src/datasets.h b/src/datasets.h index 1abfa889ba..60787ae9fe 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -28,6 +28,11 @@ void DatasetsSave(void); void DatasetReload(void); void DatasetPostReloadCleanup(void); +typedef enum { + DATASET_FORMAT_CSV = 0, + DATASET_FORMAT_JSON, +} DatasetFormats; + enum DatasetTypes { #define DATASET_TYPE_NOTSET 0 DATASET_TYPE_STRING = 1, @@ -53,6 +58,11 @@ typedef struct Dataset { } Dataset; enum DatasetTypes DatasetGetTypeFromString(const char *s); +void DatasetAppendSet(Dataset *set); +Dataset *DatasetAlloc(const char *name); +void DatasetLock(void); +void DatasetUnlock(void); +Dataset *DatasetSearchByName(const char *name); Dataset *DatasetFind(const char *name, enum DatasetTypes type); Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize); @@ -62,6 +72,9 @@ int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len); DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep); +void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize); +int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6); + int DatasetAddSerialized(Dataset *set, const char *string); int DatasetRemoveSerialized(Dataset *set, const char *string); int DatasetLookupSerialized(Dataset *set, const char *string); diff --git a/src/decode.c b/src/decode.c index d55d282242..164985930c 100644 --- a/src/decode.c +++ b/src/decode.c @@ -145,9 +145,38 @@ PacketAlert *PacketAlertCreate(void) return pa_array; } +void PacketAlertRecycle(PacketAlert *pa_array) +{ + if (pa_array != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + if (pa_array[i].json_info.next != NULL) { + struct ExtraDataJsonList *current_json = pa_array[i].json_info.next; + while (current_json) { + struct ExtraDataJsonList *next_json = current_json->next; + SCFree(current_json); + current_json = next_json; + } + } + pa_array[i].json_info.json_string = NULL; + pa_array[i].json_info.next = NULL; + } + } +} + void PacketAlertFree(PacketAlert *pa) { if (pa != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + /* first item is not allocated so start at second one */ + if (pa[i].json_info.next != NULL) { + struct ExtraDataJsonList *allocated_json = pa[i].json_info.next; + while (allocated_json) { + struct ExtraDataJsonList *next_json = allocated_json->next; + SCFree(allocated_json); + allocated_json = next_json; + } + } + } SCFree(pa); } } diff --git a/src/decode.h b/src/decode.h index 5a65805d4f..dccd3f48fc 100644 --- a/src/decode.h +++ b/src/decode.h @@ -238,6 +238,11 @@ typedef uint16_t Port; #define PKT_IS_TOSERVER(p) (((p)->flowflags & FLOW_PKT_TOSERVER)) #define PKT_IS_TOCLIENT(p) (((p)->flowflags & FLOW_PKT_TOCLIENT)) +struct ExtraDataJsonList { + char *json_string; + struct ExtraDataJsonList *next; +}; + /* structure to store the sids/gids/etc the detection engine * found in this packet */ typedef struct PacketAlert_ { @@ -247,6 +252,7 @@ typedef struct PacketAlert_ { const struct Signature_ *s; uint64_t tx_id; /* Used for sorting */ int64_t frame_id; + struct ExtraDataJsonList json_info; } PacketAlert; /** @@ -288,6 +294,7 @@ typedef struct PacketAlerts_ { } PacketAlerts; PacketAlert *PacketAlertCreate(void); +void PacketAlertRecycle(PacketAlert *pa_array); void PacketAlertFree(PacketAlert *pa); diff --git a/src/detect-dataset.c b/src/detect-dataset.c index 5d9a932bda..18a96f6380 100644 --- a/src/detect-dataset.c +++ b/src/detect-dataset.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2020 Open Information Security Foundation +/* Copyright (C) 2018-2025 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -28,6 +28,7 @@ #include "detect.h" #include "threads.h" #include "datasets.h" +#include "datajson.h" #include "detect-dataset.h" #include "detect-parse.h" @@ -60,6 +61,51 @@ void DetectDatasetRegister (void) sigmatch_table[DETECT_DATASET].Free = DetectDatasetFree; } +/* + 1 match + 0 no match + -1 can't match + */ +static int DetectDatajsonBufferMatch(DetectEngineThreadCtx *det_ctx, const DetectDatasetData *sd, + const uint8_t *data, const uint32_t data_len) +{ + if (data == NULL || data_len == 0) + return 0; + + switch (sd->cmd) { + case DETECT_DATASET_CMD_ISSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatajsonLookup(sd->set, data, data_len); + SCLogDebug("r found: %d, len: %zu", r.found, r.json.len); + if (!r.found) + return 0; + if (r.json.len > 0) { + /* we need to add 3 on length check for the added quotes and colon when + building the json string */ + if ((det_ctx->json_content_len < SIG_JSON_CONTENT_ARRAY_LEN) && + (r.json.len + strlen(sd->json_key) + 3 < SIG_JSON_CONTENT_ITEM_LEN)) { + snprintf(det_ctx->json_content[det_ctx->json_content_len].json_content, + SIG_JSON_CONTENT_ITEM_LEN, "\"%s\":%s", sd->json_key, r.json.value); + det_ctx->json_content[det_ctx->json_content_len].id = sd->id; + det_ctx->json_content_len++; + } + } + return 1; + } + case DETECT_DATASET_CMD_ISNOTSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatajsonLookup(sd->set, data, data_len); + SCLogDebug("r found: %d, len: %zu", r.found, r.json.len); + if (r.found) + return 0; + return 1; + } + default: + DEBUG_VALIDATE_BUG_ON("unknown dataset with json command"); + } + return 0; +} + /* 1 match 0 no match @@ -72,6 +118,10 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx, if (data == NULL || data_len == 0) return 0; + if (sd->format == DATASET_FORMAT_JSON) { + return DetectDatajsonBufferMatch(det_ctx, sd, data, data_len); + } + switch (sd->cmd) { case DETECT_DATASET_CMD_ISSET: { //PrintRawDataFp(stdout, data, data_len); @@ -110,18 +160,22 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx, static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *name, int name_len, enum DatasetTypes *type, char *load, size_t load_size, char *save, size_t save_size, - uint64_t *memcap, uint32_t *hashsize) + uint64_t *memcap, uint32_t *hashsize, DatasetFormats *format, char *value_key, + size_t value_key_size, char *array_key, size_t array_key_size, char *enrichment_key, + size_t enrichment_key_size) { bool cmd_set = false; bool name_set = false; bool load_set = false; bool save_set = false; bool state_set = false; + bool format_set = false; char copy[strlen(str)+1]; strlcpy(copy, str, sizeof(copy)); char *xsaveptr = NULL; char *key = strtok_r(copy, ",", &xsaveptr); + while (key != NULL) { while (*key != '\0' && isblank(*key)) { key++; @@ -203,7 +257,41 @@ static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *nam strlcpy(load, val, load_size); strlcpy(save, val, save_size); state_set = true; + } else if (strcmp(key, "format") == 0) { + if (format_set) { + SCLogWarning("'format' can only appear once"); + return -1; + } + SCLogDebug("format %s", val); + if (strcmp(val, "csv") == 0) { + *format = DATASET_FORMAT_CSV; + } else if (strcmp(val, "json") == 0) { + *format = DATASET_FORMAT_JSON; + } else { + SCLogWarning("unknown format %s", val); + return -1; + } + format_set = true; + } else if (strcmp(key, "value_key") == 0) { + if (strlen(val) > value_key_size) { + SCLogWarning("'key' value too long (limit is %zu)", value_key_size); + return -1; + } + strlcpy(value_key, val, value_key_size); + } else if (strcmp(key, "array_key") == 0) { + if (strlen(val) > array_key_size) { + SCLogWarning("'key' value too long (limit is %zu)", array_key_size); + return -1; + } + strlcpy(array_key, val, array_key_size); + } else if (strcmp(key, "enrichment_key") == 0) { + if (strlen(val) > enrichment_key_size) { + SCLogWarning("'key' value too long (limit is %zu)", enrichment_key_size); + return -1; + } + strlcpy(enrichment_key, val, enrichment_key_size); } + if (strcmp(key, "memcap") == 0) { if (ParseSizeStringU64(val, memcap) < 0) { SCLogWarning("invalid value for memcap: %s," @@ -361,6 +449,10 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst enum DatasetTypes type = DATASET_TYPE_NOTSET; char load[PATH_MAX] = ""; char save[PATH_MAX] = ""; + DatasetFormats format = DATASET_FORMAT_CSV; + char value_key[SIG_JSON_CONTENT_KEY_LEN] = ""; + char array_key[SIG_JSON_CONTENT_KEY_LEN] = ""; + char enrichment_key[SIG_JSON_CONTENT_KEY_LEN] = ""; if (DetectBufferGetActiveList(de_ctx, s) == -1) { SCLogError("datasets are only supported for sticky buffers"); @@ -374,7 +466,9 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst } if (!DetectDatasetParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load, - sizeof(load), save, sizeof(save), &memcap, &hashsize)) { + sizeof(load), save, sizeof(save), &memcap, &hashsize, &format, value_key, + sizeof(value_key), array_key, sizeof(array_key), enrichment_key, + sizeof(enrichment_key))) { return -1; } @@ -383,14 +477,33 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst } else if (strcmp(cmd_str,"isnotset") == 0) { cmd = DETECT_DATASET_CMD_ISNOTSET; } else if (strcmp(cmd_str,"set") == 0) { + if (format == DATASET_FORMAT_JSON) { + SCLogError("json format is not supported for 'set' command"); + return -1; + } cmd = DETECT_DATASET_CMD_SET; } else if (strcmp(cmd_str,"unset") == 0) { + if (format == DATASET_FORMAT_JSON) { + SCLogError("json format is not supported for 'unset' command"); + return -1; + } cmd = DETECT_DATASET_CMD_UNSET; } else { SCLogError("dataset action \"%s\" is not supported.", cmd_str); return -1; } + if (format == DATASET_FORMAT_JSON) { + if (strlen(save) != 0) { + SCLogError("json format is not supported with 'save' or 'state' option"); + return -1; + } + if (strlen(enrichment_key) == 0) { + SCLogError("json format needs an 'enrichment_key' parameter"); + return -1; + } + } + /* if just 'load' is set, we load data from the same dir as the * rule file. If load+save is used, we use data dir */ if (strlen(save) == 0 && strlen(load) != 0) { @@ -411,7 +524,13 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst } SCLogDebug("name '%s' load '%s' save '%s'", name, load, save); - Dataset *set = DatasetGet(name, type, save, load, memcap, hashsize); + Dataset *set = NULL; + + if (format == DATASET_FORMAT_JSON) { + set = DatajsonGet(name, type, load, memcap, hashsize, value_key, array_key); + } else { + set = DatasetGet(name, type, save, load, memcap, hashsize); + } if (set == NULL) { SCLogError("failed to set up dataset '%s'.", name); return -1; @@ -423,6 +542,11 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst cd->set = set; cd->cmd = cmd; + cd->format = format; + if (format == DATASET_FORMAT_JSON) { + strlcpy(cd->json_key, enrichment_key, sizeof(cd->json_key)); + } + cd->id = s; SCLogDebug("cmd %s, name %s", cmd_str, strlen(name) ? name : "(none)"); diff --git a/src/detect-dataset.h b/src/detect-dataset.h index 047a5b11cb..a4db07b219 100644 --- a/src/detect-dataset.h +++ b/src/detect-dataset.h @@ -25,10 +25,15 @@ #define SURICATA_DETECT_DATASET_H #include "datasets.h" +#include "datajson.h" typedef struct DetectDatasetData_ { Dataset *set; uint8_t cmd; + DatasetFormats format; + DataJsonType json; + char json_key[SIG_JSON_CONTENT_KEY_LEN]; + void *id; } DetectDatasetData; int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx, diff --git a/src/detect-engine-alert.c b/src/detect-engine-alert.c index 1378e8e0df..e3d67ae3b5 100644 --- a/src/detect-engine-alert.c +++ b/src/detect-engine-alert.c @@ -297,6 +297,30 @@ static inline PacketAlert PacketAlertSet( /* Set tx_id if the frame has it */ pa.tx_id = tx_id; pa.frame_id = (alert_flags & PACKET_ALERT_FLAG_FRAME) ? det_ctx->frame_id : 0; + pa.json_info.json_string = NULL; + pa.json_info.next = NULL; + if (det_ctx->json_content_len) { + /* We have some JSON attached in the current detection so let's try + to see if some need to be used for current signature. */ + struct ExtraDataJsonList *current_json = &pa.json_info; + for (size_t i = 0; i < det_ctx->json_content_len; i++) { + if (s == det_ctx->json_content[i].id) { + if (current_json->json_string != NULL) { + struct ExtraDataJsonList *next_json = + SCCalloc(1, sizeof(struct ExtraDataJsonList)); + if (next_json) { + current_json->next = next_json; + current_json = next_json; + current_json->next = NULL; + } else { + /* Allocation error, let's return now */ + return pa; + } + } + current_json->json_string = det_ctx->json_content[i].json_content; + } + } + } return pa; } diff --git a/src/detect-engine-register.h b/src/detect-engine-register.h index 0e5e52242c..ceb86ff98e 100644 --- a/src/detect-engine-register.h +++ b/src/detect-engine-register.h @@ -85,6 +85,7 @@ enum DetectKeywordId { DETECT_BYTE_EXTRACT, DETECT_DATASET, DETECT_DATAREP, + DETECT_DATAJSON, DETECT_BASE64_DECODE, DETECT_BASE64_DATA, DETECT_BSIZE, diff --git a/src/detect.c b/src/detect.c index 906a81200a..e6e297ca73 100644 --- a/src/detect.c +++ b/src/detect.c @@ -941,6 +941,7 @@ static DetectRunScratchpad DetectRunSetup(const DetectEngineCtx *de_ctx, det_ctx->base64_decoded_len = 0; det_ctx->raw_stream_progress = 0; det_ctx->match_array_cnt = 0; + det_ctx->json_content_len = 0; det_ctx->alert_queue_size = 0; p->alerts.drop.action = 0; diff --git a/src/detect.h b/src/detect.h index 7f08f50fb3..74ed556467 100644 --- a/src/detect.h +++ b/src/detect.h @@ -1228,6 +1228,16 @@ typedef struct PostRuleMatchWorkQueue { uint32_t size; /**< allocation size in number of elements. */ } PostRuleMatchWorkQueue; +#define SIG_JSON_CONTENT_ARRAY_LEN 16 +#define SIG_JSON_CONTENT_ITEM_LEN 1024 +#define SIG_JSON_CONTENT_KEY_LEN 32 + +/** structure to store the json content with info on sig that triggered it */ +typedef struct SigJsonContent { + void *id; + char json_content[SIG_JSON_CONTENT_ITEM_LEN]; +} SigJsonContent; + /** * Detection engine thread data. */ @@ -1268,6 +1278,9 @@ typedef struct DetectEngineThreadCtx_ { /* byte_* values */ uint64_t *byte_values; + SigJsonContent json_content[SIG_JSON_CONTENT_ARRAY_LEN]; + size_t json_content_len; + /* counter for the filestore array below -- up here for cache reasons. */ uint16_t filestore_cnt; diff --git a/src/output-json-alert.c b/src/output-json-alert.c index ed2bbb584a..003e3f56c5 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -253,6 +253,15 @@ void AlertJsonHeader(const Packet *p, const PacketAlert *pa, SCJsonBuilder *js, AlertJsonMetadata(pa, js); } + if (pa->json_info.json_string != NULL) { + SCJbOpenObject(js, "extra"); + const struct ExtraDataJsonList *json_info = &pa->json_info; + while (json_info) { + SCJbSetFormatted(js, json_info->json_string); + json_info = json_info->next; + } + SCJbClose(js); + } if (flags & LOG_JSON_RULE) { SCJbSetString(js, "rule", pa->s->sig_str); } diff --git a/src/packet.c b/src/packet.c index 5254c8cde9..1bff85a6fc 100644 --- a/src/packet.c +++ b/src/packet.c @@ -127,6 +127,7 @@ void PacketReinit(Packet *p) p->alerts.discarded = 0; p->alerts.suppressed = 0; p->alerts.drop.action = 0; + PacketAlertRecycle(p->alerts.alerts); p->pcap_cnt = 0; p->tunnel_rtv_cnt = 0; p->tunnel_tpr_cnt = 0; diff --git a/src/runmode-unix-socket.c b/src/runmode-unix-socket.c index e45fb3ae1e..8c28e4de10 100644 --- a/src/runmode-unix-socket.c +++ b/src/runmode-unix-socket.c @@ -55,6 +55,7 @@ #include "conf-yaml-loader.h" #include "datasets.h" +#include "datajson.h" #include "runmode-unix-socket.h" int unix_socket_mode_is_running = 0; @@ -806,6 +807,74 @@ TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data) } } +/** + * \brief Command to add data to a datajson + * + * \param cmd the content of command Arguments as a json_t object + * \param answer the json_t object that has to be used to answer + * \param data pointer to data defining the context here a PcapCommand:: + */ +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data) +{ + /* 1 get dataset name */ + json_t *narg = json_object_get(cmd, "setname"); + if (!json_is_string(narg)) { + json_object_set_new(answer, "message", json_string("setname is not a string")); + return TM_ECODE_FAILED; + } + const char *set_name = json_string_value(narg); + + /* 2 get the data type */ + json_t *targ = json_object_get(cmd, "settype"); + if (!json_is_string(targ)) { + json_object_set_new(answer, "message", json_string("settype is not a string")); + return TM_ECODE_FAILED; + } + const char *type = json_string_value(targ); + + /* 3 get value */ + json_t *varg = json_object_get(cmd, "datavalue"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datavalue is not string")); + return TM_ECODE_FAILED; + } + const char *value = json_string_value(varg); + + /* 4 get json */ + json_t *jarg = json_object_get(cmd, "datajson"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datajson is not string")); + return TM_ECODE_FAILED; + } + const char *json = json_string_value(jarg); + + SCLogDebug("datajson-add: %s type %s value %s json %s", set_name, type, value, json); + + enum DatasetTypes t = DatasetGetTypeFromString(type); + if (t == DATASET_TYPE_NOTSET) { + json_object_set_new(answer, "message", json_string("unknown settype")); + return TM_ECODE_FAILED; + } + + Dataset *set = DatasetFind(set_name, t); + if (set == NULL) { + json_object_set_new(answer, "message", json_string("set not found or wrong type")); + return TM_ECODE_FAILED; + } + + int r = DatajsonAddSerialized(set, value, json); + if (r == 1) { + json_object_set_new(answer, "message", json_string("data added")); + return TM_ECODE_OK; + } else if (r == 0) { + json_object_set_new(answer, "message", json_string("data already in set")); + return TM_ECODE_OK; + } else { + json_object_set_new(answer, "message", json_string("failed to add data")); + return TM_ECODE_FAILED; + } +} + static bool JsonU32Value(json_t *jarg, uint32_t *ret) { int64_t r = json_integer_value(jarg); diff --git a/src/runmode-unix-socket.h b/src/runmode-unix-socket.h index 8ea432f57d..dfd76b985d 100644 --- a/src/runmode-unix-socket.h +++ b/src/runmode-unix-socket.h @@ -38,6 +38,7 @@ TmEcode UnixSocketDatasetRemove(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketDatasetDump(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetClear(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketRegisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketUnregisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketRegisterTenant(json_t *cmd, json_t* answer, void *data); diff --git a/src/unix-manager.c b/src/unix-manager.c index daa05a2b92..067a89fb94 100644 --- a/src/unix-manager.c +++ b/src/unix-manager.c @@ -1111,6 +1111,8 @@ int UnixManagerInit(void) UnixManagerRegisterCommand("dataset-add", UnixSocketDatasetAdd, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-remove", UnixSocketDatasetRemove, &command, UNIX_CMD_TAKE_ARGS); + UnixManagerRegisterCommand( + "dataset-add-json", UnixSocketDatajsonAdd, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand( "get-flow-stats-by-id", UnixSocketGetFlowStatsById, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-dump", UnixSocketDatasetDump, NULL, 0);