From: Lukas Sismis Date: Sun, 21 Nov 2021 22:18:36 +0000 (+0100) Subject: dpdk: initial support with workers runmode X-Git-Tag: suricata-7.0.0-beta1~1109 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a7faed12450b85e9108868861723741fc93716fa;p=thirdparty%2Fsuricata.git dpdk: initial support with workers runmode Register a new runmode - DPDK. This enables a new flag on Suricata start (--dpdk). With the flag given, DPDK runmode is enabled. Runmode loads the configuration and then initializes EAL. If successful, it configures the physical NICs according to the configuration file. After that, worker threads are initialized and then are in continuous receive loop. --- diff --git a/src/Makefile.am b/src/Makefile.am index 0f648db56d..bfdead9fb8 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -424,6 +424,7 @@ noinst_HEADERS = \ respond-reject.h \ respond-reject-libnet11.h \ runmode-af-packet.h \ + runmode-dpdk.h \ runmode-erf-dag.h \ runmode-erf-file.h \ runmode-ipfw.h \ @@ -441,6 +442,7 @@ noinst_HEADERS = \ rust-context.h \ rust.h \ source-af-packet.h \ + source-dpdk.h \ source-erf-dag.h \ source-erf-file.h \ source-ipfw.h \ @@ -505,6 +507,7 @@ noinst_HEADERS = \ util-decode-mime.h \ util-detect.h \ util-device.h \ + util-dpdk.h \ util-ebpf.h \ util-enum.h \ util-error.h \ @@ -999,6 +1002,7 @@ libsuricata_c_a_SOURCES = \ respond-reject.c \ respond-reject-libnet11.c \ runmode-af-packet.c \ + runmode-dpdk.c \ runmode-erf-dag.c \ runmode-erf-file.c \ runmode-ipfw.c \ @@ -1015,6 +1019,7 @@ libsuricata_c_a_SOURCES = \ runmode-windivert.c \ rust-context.c \ source-af-packet.c \ + source-dpdk.c \ source-erf-dag.c \ source-erf-file.c \ source-ipfw.c \ @@ -1067,6 +1072,7 @@ libsuricata_c_a_SOURCES = \ util-decode-mime.c \ util-detect.c \ util-device.c \ + util-dpdk.c \ util-ebpf.c \ util-enum.c \ util-error.c \ diff --git a/src/conf.c b/src/conf.c index 633486a6f8..af469660e7 100644 --- a/src/conf.c +++ b/src/conf.c @@ -976,6 +976,61 @@ int ConfNodeIsSequence(const ConfNode *node) return node->is_seq == 0 ? 0 : 1; } +/** + * @brief Finds an interface from the list of interfaces. + * @param ifaces_node_name - name of the node which holds a list of intefaces + * @param iface - interfaces name + * @return NULL on failure otherwise a valid pointer + */ +ConfNode *ConfSetIfaceNode(const char *ifaces_node_name, const char *iface) +{ + ConfNode *if_node; + ConfNode *ifaces_list_node; + /* Find initial node which holds all interfaces */ + ifaces_list_node = ConfGetNode(ifaces_node_name); + if (ifaces_list_node == NULL) { + SCLogError(SC_ERR_CONF_YAML_ERROR, "unable to find %s config", ifaces_node_name); + return NULL; + } + + if_node = ConfFindDeviceConfig(ifaces_list_node, iface); + if (if_node == NULL) + SCLogNotice("unable to find interface %s in DPDK config", iface); + + return if_node; +} + +/** + * @brief Finds and sets root and default node of the interface. + * @param ifaces_node_name Node which holds list of interfaces + * @param iface Name of the interface e.g. eth3 + * @param if_root Node which will hold the interface configuration + * @param if_default Node which is the default configuration in the given list of interfaces + * @return 0 on success, -ENODEV when neither the root interface nor the default interface was found + */ +int ConfSetRootAndDefaultNodes( + const char *ifaces_node_name, const char *iface, ConfNode **if_root, ConfNode **if_default) +{ + const char *default_iface = "default"; + *if_root = ConfSetIfaceNode(ifaces_node_name, iface); + *if_default = ConfSetIfaceNode(ifaces_node_name, default_iface); + + if (*if_root == NULL && *if_default == NULL) { + SCLogError(SC_ERR_CONF_YAML_ERROR, + "unable to find configuration for the interface \"%s\" or the default " + "configuration (\"%s\")", + iface, default_iface); + return (-ENODEV); + } + + /* If there is no setting for current interface use default one as main iface */ + if (*if_root == NULL) { + *if_root = *if_default; + *if_default = NULL; + } + return 0; +} + #ifdef UNITTESTS /** diff --git a/src/conf.h b/src/conf.h index c71dded8ca..f9e5f6e32c 100644 --- a/src/conf.h +++ b/src/conf.h @@ -95,5 +95,8 @@ int ConfGetChildValueIntWithDefault(const ConfNode *base, const ConfNode *dflt, int ConfGetChildValueBoolWithDefault(const ConfNode *base, const ConfNode *dflt, const char *name, int *val); char *ConfLoadCompleteIncludePath(const char *); int ConfNodeIsSequence(const ConfNode *node); +ConfNode *ConfSetIfaceNode(const char *ifaces_node_name, const char *iface); +int ConfSetRootAndDefaultNodes( + const char *ifaces_node_name, const char *iface, ConfNode **if_root, ConfNode **if_default); #endif /* ! __CONF_H__ */ diff --git a/src/decode.h b/src/decode.h index a936083514..0673a66b81 100644 --- a/src/decode.h +++ b/src/decode.h @@ -69,6 +69,9 @@ enum PktSrcEnum { #include "source-af-packet.h" #include "source-netmap.h" #include "source-windivert.h" +#ifdef HAVE_DPDK +#include "source-dpdk.h" +#endif #ifdef HAVE_PF_RING_FLOW_OFFLOAD #include "source-pfring.h" #endif @@ -481,6 +484,9 @@ typedef struct Packet_ #ifdef WINDIVERT WinDivertPacketVars windivert_v; #endif /* WINDIVERT */ +#ifdef HAVE_DPDK + DPDKPacketVars dpdk_v; +#endif /* A chunk of memory that a plugin can use for its packet vars. */ uint8_t plugin_v[PLUGIN_VAR_SIZE]; diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c new file mode 100644 index 0000000000..e27b1518bd --- /dev/null +++ b/src/runmode-dpdk.c @@ -0,0 +1,1231 @@ +/* Copyright (C) 2021 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \ingroup dpdk + * + * @{ + */ + +/** + * \file + * + * \author Lukas Sismis + * + * DPDK runmode + * + */ + +#include "suricata-common.h" +#include "runmodes.h" +#include "runmode-dpdk.h" +#include "source-dpdk.h" +#include "util-runmodes.h" +#include "util-byte.h" +#include "util-cpu.h" +#include "util-dpdk.h" + +#ifdef HAVE_DPDK + +#define RSS_HKEY_LEN 40 +// General purpose RSS key for symmetric bidirectional flow distribution +uint8_t rss_hkey[] = { 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, + 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, + 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A }; + +// Calculates the closest multiple of y from x +#define ROUNDUP(x, y) ((((x) + ((y)-1)) / (y)) * (y)) + +/* Maximum DPDK EAL parameters count. */ +#define EAL_ARGS 48 + +struct Arguments { + uint16_t capacity; + char **argv; + uint16_t argc; +}; + +static char *AllocArgument(size_t arg_len); +static char *AllocAndSetArgument(const char *arg); +static char *AllocAndSetOption(const char *arg); + +static void ArgumentsInit(struct Arguments *args, unsigned capacity); +static void ArgumentsCleanup(struct Arguments *args); +static void ArgumentsAdd(struct Arguments *args, char *value); +static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg); +static void InitEal(void); + +static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str); +static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str); +static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues); +static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues); +static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int); +static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str); +static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int); +static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int); +static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int); +static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool); +static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool); +static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool); +static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool); +static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str); +static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str); +static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode); +static void ConfigInit(DPDKIfaceConfig **iconf); +static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface); +static DPDKIfaceConfig *ConfigParse(const char *iface); + +static void DeviceInitPortConf(const DPDKIfaceConfig *iconf, + const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf); +static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info, + const struct rte_eth_conf *port_conf); +static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf); +static int DeviceConfigureIPS(DPDKIfaceConfig *iconf); +static int DeviceConfigure(DPDKIfaceConfig *iconf); +static void *ParseDpdkConfigAndConfigureDevice(const char *iface); +static void DPDKDerefConfig(void *conf); + +#define DPDK_CONFIG_DEFAULT_THREADS "auto" +#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535 +#define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto" +#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024 +#define DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS 1024 +#define DPDK_CONFIG_DEFAULT_MTU 1500 +#define DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE 1 +#define DPDK_CONFIG_DEFAULT_MULTICAST_MODE 1 +#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION 1 +#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD 1 +#define DPDK_CONFIG_DEFAULT_COPY_MODE "none" +#define DPDK_CONFIG_DEFAULT_COPY_INTERFACE "none" + +DPDKIfaceConfigAttributes dpdk_yaml = { + .threads = "threads", + .promisc = "promisc", + .multicast = "multicast", + .checksum_checks = "checksum-checks", + .checksum_checks_offload = "checksum-checks-offload", + .mtu = "mtu", + .mempool_size = "mempool-size", + .mempool_cache_size = "mempool-cache-size", + .rx_descriptors = "rx-descriptors", + .tx_descriptors = "tx-descriptors", + .copy_mode = "copy-mode", + .copy_iface = "copy-iface", +}; + +static int GreatestDivisorUpTo(uint32_t num, uint32_t max_num) +{ + for (int i = max_num; i >= 2; i--) { + if (num % i == 0) { + return i; + } + } + return 1; +} + +static char *AllocArgument(size_t arg_len) +{ + SCEnter(); + char *ptr; + + arg_len += 1; // null character + ptr = (char *)SCCalloc(arg_len, sizeof(char)); + if (ptr == NULL) + FatalError(SC_ERR_MEM_ALLOC, "Could not allocate memory for an argument"); + + SCReturnPtr(ptr, "char *"); +} + +/** + * Allocates space for length of the given string and then copies contents + * @param arg String to set to the newly allocated space + * @return memory address if no error otherwise NULL (with errno set) + */ +static char *AllocAndSetArgument(const char *arg) +{ + SCEnter(); + if (arg == NULL) + FatalError(SC_ERR_DPDK_CONF, "Passed argument is NULL in DPDK config initialization"); + + char *ptr; + size_t arg_len = strlen(arg); + + ptr = AllocArgument(arg_len); + strlcpy(ptr, arg, arg_len + 1); + SCReturnPtr(ptr, "char *"); +} + +static char *AllocAndSetOption(const char *arg) +{ + SCEnter(); + if (arg == NULL) + FatalError(SC_ERR_DPDK_CONF, "Passed option is NULL in DPDK config initialization"); + + char *ptr = NULL; + size_t arg_len = strlen(arg); + uint8_t is_long_arg = arg_len > 1; + const char *dash_prefix = is_long_arg ? "--" : "-"; + size_t full_len = arg_len + strlen(dash_prefix); + + ptr = AllocArgument(full_len); + strlcpy(ptr, dash_prefix, strlen(dash_prefix) + 1); + strlcat(ptr, arg, full_len + 1); + SCReturnPtr(ptr, "char *"); +} + +static void ArgumentsInit(struct Arguments *args, unsigned capacity) +{ + SCEnter(); + args->argv = SCCalloc(capacity, sizeof(args->argv)); + if (args->argv == NULL) + FatalError(SC_ERR_MEM_ALLOC, "Could not allocate memory for Arguments structure"); + + args->capacity = capacity; + args->argc = 0; + SCReturn; +} + +static void ArgumentsCleanup(struct Arguments *args) +{ + SCEnter(); + for (int i = 0; i < args->argc; i++) { + if (args->argv[i] != NULL) { + SCFree(args->argv[i]); + args->argv[i] = NULL; + } + } + + SCFree(args->argv); + args->argv = NULL; + args->argc = 0; + args->capacity = 0; +} + +static void ArgumentsAdd(struct Arguments *args, char *value) +{ + SCEnter(); + if (args->argc + 1 > args->capacity) + FatalError(SC_ERR_DPDK_EAL_INIT, "No capacity for more arguments (Max: %" PRIu32 ")", + EAL_ARGS); + + args->argv[args->argc++] = value; + SCReturn; +} + +static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg) +{ + SCEnter(); + char *option; + char *argument; + + option = AllocAndSetOption(opt); + ArgumentsAdd(args, option); + + // Empty argument could mean option only (e.g. --no-huge) + if (arg == NULL || arg[0] == '\0') + SCReturn; + + argument = AllocAndSetArgument(arg); + ArgumentsAdd(args, argument); + SCReturn; +} + +static void InitEal() +{ + SCEnter(); + int retval; + ConfNode *param; + const ConfNode *eal_params = ConfGetNode("dpdk.eal-params"); + struct Arguments args; + char **eal_argv; + + if (eal_params == NULL) { + FatalError(SC_ERR_DPDK_CONF, "DPDK EAL parameters not found in the config"); + } + + ArgumentsInit(&args, EAL_ARGS); + ArgumentsAdd(&args, AllocAndSetArgument("suricata")); + + TAILQ_FOREACH (param, &eal_params->head, next) { + ArgumentsAddOptionAndArgument(&args, param->name, param->val); + } + + // creating a shallow copy for cleanup because rte_eal_init changes array contents + eal_argv = SCMalloc(args.argc * sizeof(args.argv)); + if (eal_argv == NULL) { + FatalError( + SC_ERR_MEM_ALLOC, "Failed to allocate memory for the array of DPDK EAL arguments"); + } + memcpy(eal_argv, args.argv, args.argc * sizeof(*args.argv)); + + rte_log_set_global_level(RTE_LOG_WARNING); + retval = rte_eal_init(args.argc, eal_argv); + + ArgumentsCleanup(&args); + SCFree(eal_argv); + + if (retval < 0) { // retval binded to the result of rte_eal_init + FatalError( + SC_ERR_DPDK_EAL_INIT, "DPDK EAL initialization error: %s", rte_strerror(-retval)); + } +} + +static void DPDKDerefConfig(void *conf) +{ + SCEnter(); + DPDKIfaceConfig *iconf = (DPDKIfaceConfig *)conf; + + if (SC_ATOMIC_SUB(iconf->ref, 1) == 1) { + if (iconf->pkt_mempool != NULL) { + rte_mempool_free(iconf->pkt_mempool); + } + + SCFree(iconf); + } + SCReturn; +} + +static void ConfigInit(DPDKIfaceConfig **iconf) +{ + SCEnter(); + DPDKIfaceConfig *ptr = NULL; + ptr = SCCalloc(1, sizeof(DPDKIfaceConfig)); + if (ptr == NULL) + FatalError(SC_ERR_DPDK_CONF, "Could not allocate memory for DPDKIfaceConfig"); + + ptr->pkt_mempool = NULL; + ptr->out_port_id = -1; // make sure no port is set + SC_ATOMIC_INIT(ptr->ref); + (void)SC_ATOMIC_ADD(ptr->ref, 1); + ptr->DerefFunc = DPDKDerefConfig; + ptr->flags = 0; + + *iconf = ptr; + SCReturn; +} + +static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str) +{ + SCEnter(); + int retval; + + if (entry_str == NULL || entry_str[0] == '\0') + FatalError(SC_ERR_INVALID_VALUE, "Interface name in DPDK config is NULL or empty"); + + retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->port_id); + if (retval < 0) + FatalError(SC_ERR_DPDK_CONF, "Interface \"%s\": %s", entry_str, rte_strerror(-retval)); + + strlcpy(iconf->iface, entry_str, sizeof(iconf->iface)); + SCReturn; +} + +static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) +{ + SCEnter(); + const char *active_runmode = RunmodeGetActive(); + + if (active_runmode && !strcmp("single", active_runmode)) { + iconf->threads = 1; + SCReturnInt(0); + } + + if (entry_str == NULL) { + SCLogError(SC_ERR_INVALID_VALUE, "Number of threads for interface \"%s\" not specified", + iconf->iface); + SCReturnInt(-EINVAL); + } + + if (strcmp(entry_str, "auto") == 0) { + iconf->threads = (int)UtilCpuGetNumProcessorsOnline(); + SCLogPerf("%u cores, so using %u threads", iconf->threads, iconf->threads); + SCReturnInt(0); + } + + if (StringParseInt32(&iconf->threads, 10, 0, entry_str) < 0) { + SCLogError(SC_ERR_INVALID_VALUE, + "Threads entry for interface %s contain non-numerical characters - \"%s\"", + iconf->iface, entry_str); + SCReturnInt(-EINVAL); + } + + if (iconf->threads < 0) { + SCLogError(SC_ERR_INVALID_VALUE, "Interface %s has a negative number of threads", + iconf->iface); + SCReturnInt(-ERANGE); + } + + SCReturnInt(0); +} + +static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues) +{ + SCEnter(); + iconf->nb_rx_queues = nb_queues; + if (iconf->nb_rx_queues < 1) { + SCLogError(SC_ERR_INVALID_VALUE, + "Interface %s requires to have positive number of RX queues", iconf->iface); + SCReturnInt(-ERANGE); + } + + SCReturnInt(0); +} + +static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues) +{ + SCEnter(); + iconf->nb_tx_queues = nb_queues; + if (iconf->nb_tx_queues < 1) { + SCLogError(SC_ERR_INVALID_VALUE, + "Interface %s requires to have positive number of TX queues", iconf->iface); + SCReturnInt(-ERANGE); + } + + SCReturnInt(0); +} + +static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int) +{ + SCEnter(); + if (entry_int <= 0) { + SCLogError(SC_ERR_INVALID_VALUE, "Interface %s requires to have positive memory pool size", + iconf->iface); + SCReturnInt(-ERANGE); + } + + iconf->mempool_size = entry_int; + SCReturnInt(0); +} + +static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str) +{ + SCEnter(); + if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "auto") == 0) { + // calculate the mempool size based on the mempool size (it needs to be already filled in) + // It is advised to have mempool cache size lower or equal to: + // RTE_MEMPOOL_CACHE_MAX_SIZE (by default 512) and "mempool-size / 1.5" + // and at the same time "mempool-size modulo cache_size == 0". + if (iconf->mempool_size == 0) { + SCLogError(SC_ERR_INVALID_VALUE, + "Cannot calculate mempool cache size of a mempool with size %d", + iconf->mempool_size); + SCReturnInt(-EINVAL); + } + + uint32_t max_cache_size = MAX(RTE_MEMPOOL_CACHE_MAX_SIZE, iconf->mempool_size / 1.5); + iconf->mempool_cache_size = GreatestDivisorUpTo(iconf->mempool_size, max_cache_size); + SCReturnInt(0); + } + + if (StringParseUint32(&iconf->mempool_cache_size, 10, 0, entry_str) < 0) { + SCLogError(SC_ERR_INVALID_VALUE, + "Mempool cache size entry for interface %s contain non-numerical " + "characters - \"%s\"", + iconf->iface, entry_str); + SCReturnInt(-EINVAL); + } + + if (iconf->mempool_cache_size <= 0 || iconf->mempool_cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) { + SCLogError(SC_ERR_INVALID_VALUE, + "Interface %s requires to have positive memory pool size and be less than %" PRIu32, + iconf->iface, RTE_MEMPOOL_CACHE_MAX_SIZE); + SCReturnInt(-ERANGE); + } + + SCReturnInt(0); +} + +static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int) +{ + SCEnter(); + if (entry_int <= 0) { + SCLogError(SC_ERR_INVALID_VALUE, + "Interface %s requires to have positive number of RX descriptors", iconf->iface); + SCReturnInt(-ERANGE); + } + + iconf->nb_rx_desc = entry_int; + SCReturnInt(0); +} + +static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int) +{ + SCEnter(); + if (entry_int <= 0) { + SCLogError(SC_ERR_INVALID_VALUE, + "Interface %s requires to have positive number of TX descriptors", iconf->iface); + SCReturnInt(-ERANGE); + } + + iconf->nb_tx_desc = entry_int; + SCReturnInt(0); +} + +static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int) +{ + SCEnter(); + if (entry_int < RTE_ETHER_MIN_MTU || entry_int > RTE_ETHER_MAX_JUMBO_FRAME_LEN) { + SCLogError(SC_ERR_INVALID_VALUE, + "Interface %s requires to have size of MTU between %" PRIu32 " and %" PRIu32, + iconf->iface, RTE_ETHER_MIN_MTU, RTE_ETHER_MAX_JUMBO_FRAME_LEN); + SCReturnInt(-ERANGE); + } + + iconf->mtu = entry_int; + SCReturnInt(0); +} + +static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool) +{ + SCEnter(); + if (entry_bool) + iconf->flags |= DPDK_PROMISC; + + SCReturnBool(true); +} + +static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool) +{ + SCEnter(); + if (entry_bool) + iconf->flags |= DPDK_MULTICAST; // enable + + SCReturnBool(true); +} + +static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool) +{ + SCEnter(); + if (entry_bool) + iconf->checksum_mode = CHECKSUM_VALIDATION_ENABLE; + + SCReturnInt(0); +} + +static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool) +{ + SCEnter(); + if (entry_bool) + iconf->flags |= DPDK_RX_CHECKSUM_OFFLOAD; + + SCReturnInt(0); +} + +static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str) +{ + SCEnter(); + int retval; + + if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "none") == 0) { + iconf->out_iface = NULL; + SCReturnInt(0); + } + + retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->out_port_id); + if (retval < 0) { + SCLogWarning(SC_ERR_DPDK_CONF, + "Name of the copy interface (%s) for the interface %s is not valid, changing to %s", + entry_str, iconf->iface, DPDK_CONFIG_DEFAULT_COPY_INTERFACE); + iconf->out_iface = DPDK_CONFIG_DEFAULT_COPY_INTERFACE; + } + + iconf->out_iface = entry_str; + SCReturnInt(0); +} + +static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str) +{ + SCEnter(); + if (entry_str == NULL) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "Interface %s has no copy mode specified, changing to %s ", iconf->iface, + DPDK_CONFIG_DEFAULT_COPY_MODE); + entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE; + } + + if (strcmp(entry_str, "none") != 0 && strcmp(entry_str, "tap") != 0 && + strcmp(entry_str, "ips") != 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "Copy mode \"%s\" is not one of the possible values (none|tap|ips) for interface " + "%s. Changing to %s", + entry_str, iconf->iface, DPDK_CONFIG_DEFAULT_COPY_MODE); + entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE; + } + + if (strcmp(entry_str, "none") == 0) { + iconf->copy_mode = DPDK_COPY_MODE_NONE; + } else if (strcmp(entry_str, "tap") == 0) { + iconf->copy_mode = DPDK_COPY_MODE_TAP; + } else if (strcmp(entry_str, "ips") == 0) { + iconf->copy_mode = DPDK_COPY_MODE_IPS; + } + + SCReturnInt(0); +} + +static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode) +{ + SCEnter(); + int retval; + + retval = ConfigSetCopyIface(iconf, iface); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfigSetCopyMode(iconf, mode); + if (retval < 0) + SCReturnInt(retval); + + if (iconf->copy_mode == DPDK_COPY_MODE_NONE) { + if (iconf->out_iface != NULL) + iconf->out_iface = NULL; + SCReturnInt(0); + } + + if (iconf->out_iface == NULL || strlen(iconf->out_iface) <= 0) { + SCLogError(SC_ERR_DPDK_CONF, "Copy mode enabled but interface not set"); + SCReturnInt(-EINVAL); + } + + if (iconf->copy_mode == DPDK_COPY_MODE_IPS) + SCLogInfo("DPDK IPS mode activated between %s and %s", iconf->iface, iconf->out_iface); + else if (iconf->copy_mode == DPDK_COPY_MODE_TAP) + SCLogInfo("DPDK IPS mode activated between %s and %s", iconf->iface, iconf->out_iface); + + SCReturnInt(0); +} + +static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) +{ + SCEnter(); + int retval; + ConfNode *if_root; + ConfNode *if_default; + const char *entry_str = NULL; + intmax_t entry_int = 0; + int entry_bool = 0; + const char *copy_iface_str = NULL; + const char *copy_mode_str = NULL; + + ConfigSetIface(iconf, iface); + + retval = ConfSetRootAndDefaultNodes("dpdk.interfaces", iconf->iface, &if_root, &if_default); + if (retval < 0) { + FatalError(SC_ERR_DPDK_CONF, "failed to find DPDK configuration for the interface %s", + iconf->iface); + } + + retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.threads, &entry_str) != 1 + ? ConfigSetThreads(iconf, DPDK_CONFIG_DEFAULT_THREADS) + : ConfigSetThreads(iconf, entry_str); + if (retval < 0) + SCReturnInt(retval); + + // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported + retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads); + if (retval < 0) + SCReturnInt(retval); + + // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported + retval = ConfigSetTxQueues(iconf, (uint16_t)iconf->threads); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfGetChildValueIntWithDefault( + if_root, if_default, dpdk_yaml.mempool_size, &entry_int) != 1 + ? ConfigSetMempoolSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE) + : ConfigSetMempoolSize(iconf, entry_int); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfGetChildValueWithDefault( + if_root, if_default, dpdk_yaml.mempool_cache_size, &entry_str) != 1 + ? ConfigSetMempoolCacheSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE) + : ConfigSetMempoolCacheSize(iconf, entry_str); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfGetChildValueIntWithDefault( + if_root, if_default, dpdk_yaml.rx_descriptors, &entry_int) != 1 + ? ConfigSetRxDescriptors(iconf, DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS) + : ConfigSetRxDescriptors(iconf, entry_int); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfGetChildValueIntWithDefault( + if_root, if_default, dpdk_yaml.tx_descriptors, &entry_int) != 1 + ? ConfigSetTxDescriptors(iconf, DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS) + : ConfigSetTxDescriptors(iconf, entry_int); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfGetChildValueIntWithDefault(if_root, if_default, dpdk_yaml.mtu, &entry_int) != 1 + ? ConfigSetMtu(iconf, DPDK_CONFIG_DEFAULT_MTU) + : ConfigSetMtu(iconf, entry_int); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfGetChildValueBoolWithDefault( + if_root, if_default, dpdk_yaml.promisc, &entry_bool) != 1 + ? ConfigSetPromiscuousMode(iconf, DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE) + : ConfigSetPromiscuousMode(iconf, entry_bool); + if (retval != true) + SCReturnInt(-EINVAL); + + retval = ConfGetChildValueBoolWithDefault( + if_root, if_default, dpdk_yaml.multicast, &entry_bool) != 1 + ? ConfigSetMulticast(iconf, DPDK_CONFIG_DEFAULT_MULTICAST_MODE) + : ConfigSetMulticast(iconf, entry_bool); + if (retval != true) + SCReturnInt(-EINVAL); + + retval = ConfGetChildValueBoolWithDefault( + if_root, if_default, dpdk_yaml.checksum_checks, &entry_bool) != 1 + ? ConfigSetChecksumChecks(iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION) + : ConfigSetChecksumChecks(iconf, entry_bool); + if (retval < 0) + SCReturnInt(retval); + + retval = ConfGetChildValueBoolWithDefault( + if_root, if_default, dpdk_yaml.checksum_checks_offload, &entry_bool) != 1 + ? ConfigSetChecksumOffload( + iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD) + : ConfigSetChecksumOffload(iconf, entry_bool); + if (retval < 0) + SCReturnInt(retval); + + retval = + ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.copy_mode, ©_mode_str) | + ConfGetChildValueWithDefault( + if_root, if_default, dpdk_yaml.copy_iface, ©_iface_str); + // if one of copy interface settings fail to load then the default values are set + retval = retval != 1 ? ConfigSetCopyIfaceSettings(iconf, DPDK_CONFIG_DEFAULT_COPY_INTERFACE, + DPDK_CONFIG_DEFAULT_COPY_MODE) + : ConfigSetCopyIfaceSettings(iconf, copy_iface_str, copy_mode_str); + if (retval < 0) + SCReturnInt(retval); + + SCReturnInt(0); +} + +static DPDKIfaceConfig *ConfigParse(const char *iface) +{ + SCEnter(); + int retval; + DPDKIfaceConfig *iconf = NULL; + if (iface == NULL) + FatalError(SC_ERR_DPDK_CONF, "DPDK interface is NULL"); + + ConfigInit(&iconf); + retval = ConfigLoad(iconf, iface); + if (retval < 0) { + iconf->DerefFunc(iconf); + SCReturnPtr(NULL, "void *"); + } + + SCReturnPtr(iconf, "DPDKIfaceConfig *"); +} + +static void DeviceSetPMDSpecificRSS(struct rte_eth_rss_conf *rss_conf, const char *driver_name) +{ +} + +static void DeviceInitPortConf(const DPDKIfaceConfig *iconf, + const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf) +{ + *port_conf = (struct rte_eth_conf){ + .rxmode = { + .mq_mode = ETH_MQ_RX_NONE, + .max_rx_pkt_len = iconf->mtu, + .offloads = 0, // turn every offload off to prevent any packet modification + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + .offloads = 0, + }, + }; + + // configure RX offloads + if (dev_info->rx_offload_capa & DEV_RX_OFFLOAD_RSS_HASH) { + if (iconf->nb_rx_queues > 1) { + SCLogConfig("RSS enabled on %s for %d queues", iconf->iface, iconf->nb_rx_queues); + port_conf->rx_adv_conf.rss_conf = (struct rte_eth_rss_conf){ + .rss_key = rss_hkey, + .rss_key_len = RSS_HKEY_LEN, + .rss_hf = ETH_RSS_IP, + }; + + DeviceSetPMDSpecificRSS(&port_conf->rx_adv_conf.rss_conf, dev_info->driver_name); + + uint64_t rss_hf_tmp = + port_conf->rx_adv_conf.rss_conf.rss_hf & dev_info->flow_type_rss_offloads; + if (port_conf->rx_adv_conf.rss_conf.rss_hf != rss_hf_tmp) { + SCLogWarning(SC_WARN_DPDK_CONF, + "Interface %s modified RSS hash function based on hardware support, " + "requested:%#" PRIx64 " configured:%#" PRIx64, + iconf->iface, port_conf->rx_adv_conf.rss_conf.rss_hf, rss_hf_tmp); + port_conf->rx_adv_conf.rss_conf.rss_hf = rss_hf_tmp; + } + port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS; + } else { + SCLogConfig("RSS not enabled on %s", iconf->iface); + port_conf->rx_adv_conf.rss_conf.rss_key = NULL; + port_conf->rx_adv_conf.rss_conf.rss_hf = 0; + } + } else { + SCLogConfig("RSS not supported on %s", iconf->iface); + } + + if (iconf->checksum_mode == CHECKSUM_VALIDATION_DISABLE) { + SCLogConfig("Checksum validation disabled on %s", iconf->iface); + } else if (dev_info->rx_offload_capa & DEV_RX_OFFLOAD_CHECKSUM) { + if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE && + iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD) { + SCLogConfig("IP, TCP and UDP checksum validation enabled and offloaded " + "on %s", + iconf->iface); + port_conf->rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM; + } else if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE && + !(iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD)) { + SCLogConfig("Suricata checksum validation enabled (but can be offloaded on %s)", + iconf->iface); + } + } + + if (iconf->mtu > RTE_ETHER_MAX_LEN) { + port_conf->rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + } + + if (dev_info->tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) { + port_conf->txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; + } +} + +static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info, + const struct rte_eth_conf *port_conf) +{ + SCEnter(); + int retval; + uint16_t mtu_size; + uint16_t mbuf_size; + struct rte_eth_rxconf rxq_conf; + struct rte_eth_txconf txq_conf; + + char mempool_name[64]; + snprintf(mempool_name, 64, "mempool_%.20s", iconf->iface); + // +4 for VLAN header + mtu_size = iconf->mtu + RTE_ETHER_CRC_LEN + RTE_ETHER_HDR_LEN + 4; + mbuf_size = ROUNDUP(mtu_size, 1024) + RTE_PKTMBUF_HEADROOM; + SCLogInfo("Creating a packet mbuf pool %s of size %d, cache size %d, mbuf size %d", + mempool_name, iconf->mempool_size, iconf->mempool_cache_size, mbuf_size); + + iconf->pkt_mempool = rte_pktmbuf_pool_create(mempool_name, iconf->mempool_size, + iconf->mempool_cache_size, 0, mbuf_size, (int)iconf->socket_id); + if (iconf->pkt_mempool == NULL) { + retval = -rte_errno; + SCLogError(SC_ERR_DPDK_INIT, + "Error (err=%d) during rte_pktmbuf_pool_create (mempool: %s) - %s", rte_errno, + mempool_name, rte_strerror(rte_errno)); + SCReturnInt(retval); + } + + for (uint16_t queue_id = 0; queue_id < iconf->nb_rx_queues; queue_id++) { + rxq_conf = dev_info->default_rxconf; + rxq_conf.offloads = port_conf->rxmode.offloads; + rxq_conf.rx_thresh.hthresh = 0; + rxq_conf.rx_thresh.pthresh = 0; + rxq_conf.rx_thresh.wthresh = 0; + rxq_conf.rx_free_thresh = 0; + rxq_conf.rx_drop_en = 0; + SCLogPerf( + "Creating Q %d of P %d using desc RX: %d TX: %d RX htresh: %d RX pthresh %d wtresh " + "%d free_tresh %d drop_en %d Offloads %lu", + queue_id, iconf->port_id, iconf->nb_rx_desc, iconf->nb_tx_desc, + rxq_conf.rx_thresh.hthresh, rxq_conf.rx_thresh.pthresh, rxq_conf.rx_thresh.wthresh, + rxq_conf.rx_free_thresh, rxq_conf.rx_drop_en, rxq_conf.offloads); + + retval = rte_eth_rx_queue_setup(iconf->port_id, queue_id, iconf->nb_rx_desc, + iconf->socket_id, &rxq_conf, iconf->pkt_mempool); + if (retval < 0) { + rte_mempool_free(iconf->pkt_mempool); + SCLogError(SC_ERR_DPDK_INIT, + "Error (err=%d) during initialization of device queue %u of port %u", retval, + queue_id, iconf->port_id); + SCReturnInt(retval); + } + } + + for (uint16_t queue_id = 0; queue_id < iconf->nb_tx_queues; queue_id++) { + txq_conf = dev_info->default_txconf; + txq_conf.offloads = port_conf->txmode.offloads; + SCLogPerf("Creating TX queue %d on port %d", queue_id, iconf->port_id); + retval = rte_eth_tx_queue_setup( + iconf->port_id, queue_id, iconf->nb_tx_desc, iconf->socket_id, &txq_conf); + if (retval < 0) { + rte_mempool_free(iconf->pkt_mempool); + SCLogError(SC_ERR_DPDK_INIT, + "Error (err=%d) during initialization of device queue %u of port %u", retval, + queue_id, iconf->port_id); + SCReturnInt(retval); + } + } + + SCReturnInt(0); +} + +static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf) +{ + SCEnter(); + int retval; + DPDKIfaceConfig *out_iconf = NULL; + ConfigInit(&out_iconf); + if (out_iconf == NULL) { + FatalError( + SC_ERR_DPDK_CONF, "Copy interface of the interface \"%s\" is NULL", iconf->iface); + } + + retval = ConfigLoad(out_iconf, iconf->out_iface); + if (retval < 0) { + SCLogError(SC_ERR_DPDK_CONF, "Fail to load config of interface %s", iconf->out_iface); + out_iconf->DerefFunc(out_iconf); + SCReturnInt(-EINVAL); + } + + if (iconf->nb_rx_queues != out_iconf->nb_tx_queues) { + // the other direction is validated when the copy interface is configured + SCLogError(SC_ERR_DPDK_CONF, + "Interface %s has configured %d RX queues but copy interface %s has %d TX queues" + " - number of queues must be equal", + iconf->iface, iconf->nb_rx_queues, out_iconf->iface, out_iconf->nb_tx_queues); + out_iconf->DerefFunc(out_iconf); + SCReturnInt(-EINVAL); + } else if (iconf->mtu != out_iconf->mtu) { + SCLogError(SC_ERR_DPDK_CONF, + "Interface %s has configured MTU of %dB but copy interface %s has MTU set to %dB" + " - MTU must be equal", + iconf->iface, iconf->mtu, out_iconf->iface, out_iconf->mtu); + out_iconf->DerefFunc(out_iconf); + SCReturnInt(-EINVAL); + } else if (iconf->copy_mode != out_iconf->copy_mode) { + SCLogError(SC_ERR_DPDK_CONF, "Copy modes of interfaces %s and %s are not equal", + iconf->iface, out_iconf->iface); + out_iconf->DerefFunc(out_iconf); + SCReturnInt(-EINVAL); + } else if (strcmp(iconf->iface, out_iconf->out_iface) != 0) { + // check if the other iface has the current iface set as a copy iface + SCLogError(SC_ERR_DPDK_CONF, "Copy interface of %s is not set to %s", out_iconf->iface, + iconf->iface); + out_iconf->DerefFunc(out_iconf); + SCReturnInt(-EINVAL); + } + + out_iconf->DerefFunc(out_iconf); + SCReturnInt(0); +} + +static int DeviceConfigureIPS(DPDKIfaceConfig *iconf) +{ + SCEnter(); + int retval; + + if (iconf->out_iface != NULL) { + retval = rte_eth_dev_get_port_by_name(iconf->out_iface, &iconf->out_port_id); + if (retval != 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during obtaining port id of %s", retval, + iconf->out_iface); + SCReturnInt(retval); + } + + if (rte_eth_dev_socket_id(iconf->port_id) != rte_eth_dev_socket_id(iconf->out_port_id)) { + SCLogWarning(SC_WARN_DPDK_CONF, "%s and %s are not on the same NUMA node", iconf->iface, + iconf->out_iface); + } + + retval = DeviceValidateOutIfaceConfig(iconf); + if (retval != 0) { + // Error will be written out by the validation function + SCReturnInt(retval); + } + } + SCReturnInt(0); +} + +static int DeviceConfigure(DPDKIfaceConfig *iconf) +{ + SCEnter(); + // configure device + int retval; + struct rte_eth_dev_info dev_info; + struct rte_eth_conf port_conf; + + retval = rte_eth_dev_get_port_by_name(iconf->iface, &(iconf->port_id)); + if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when getting port id of %s Is device enabled?", + retval, iconf->iface); + SCReturnInt(retval); + } + + if (!rte_eth_dev_is_valid_port(iconf->port_id)) { + SCLogError(SC_ERR_DPDK_INIT, "Specified port %d is invalid", iconf->port_id); + SCReturnInt(retval); + } + + retval = rte_eth_dev_socket_id(iconf->port_id); + if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) invalid socket id (port %s)", retval, + iconf->iface); + SCReturnInt(retval); + } else { + iconf->socket_id = retval; + } + + retval = rte_eth_dev_info_get(iconf->port_id, &dev_info); + if (retval != 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during getting device info (port %s)", retval, + iconf->iface); + SCReturnInt(retval); + } + + if (iconf->nb_rx_queues > dev_info.max_rx_queues) { + SCLogError(SC_ERR_DPDK_INIT, + "Number of configured RX queues of %s is higher than maximum allowed (%" PRIu16 ")", + iconf->iface, dev_info.max_rx_queues); + SCReturnInt(-ERANGE); + } + + if (iconf->nb_tx_queues > dev_info.max_tx_queues) { + SCLogError(SC_ERR_DPDK_INIT, + "Number of configured TX queues of %s is higher than maximum allowed (%" PRIu16 ")", + iconf->iface, dev_info.max_tx_queues); + SCReturnInt(-ERANGE); + } + + if (iconf->mtu > dev_info.max_mtu || iconf->mtu < dev_info.min_mtu) { + SCLogError(SC_ERR_DPDK_INIT, + "Loaded MTU of \"%s\" is out of bounds. " + "Min MTU: %" PRIu16 " Max MTU: %" PRIu16, + iconf->iface, dev_info.min_mtu, dev_info.max_mtu); + SCReturnInt(-ERANGE); + } + + // check if jumbo frames are set and are available + if (iconf->mtu > RTE_ETHER_MAX_LEN && + !(dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)) { + SCLogError(SC_ERR_DPDK_CONF, + "Jumbo frames not supported, " + "set MTU of \"%s\" to 1500B", + iconf->iface); + SCReturnInt(-EINVAL); + } + + DeviceInitPortConf(iconf, &dev_info, &port_conf); + if (port_conf.rxmode.offloads & DEV_RX_OFFLOAD_CHECKSUM) { + // Suricata does not need recalc checksums now + iconf->checksum_mode = CHECKSUM_VALIDATION_DISABLE; + } + + retval = rte_eth_dev_configure( + iconf->port_id, iconf->nb_rx_queues, iconf->nb_tx_queues, &port_conf); + if (retval != 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during configuring the device (port %u)", + retval, iconf->port_id); + SCReturnInt(retval); + } + + retval = rte_eth_dev_adjust_nb_rx_tx_desc( + iconf->port_id, &iconf->nb_rx_desc, &iconf->nb_tx_desc); + if (retval != 0) { + SCLogError(SC_ERR_DPDK_INIT, + "Error (err=%d) during adjustment of device queues descriptors (port %u)", retval, + iconf->port_id); + SCReturnInt(retval); + } + + retval = iconf->flags & DPDK_MULTICAST ? rte_eth_allmulticast_enable(iconf->port_id) + : rte_eth_allmulticast_disable(iconf->port_id); + if (retval == -ENOTSUP) { + retval = rte_eth_allmulticast_get(iconf->port_id); + // when multicast is enabled but set to disable or vice versa + if ((retval == 1 && !(iconf->flags & DPDK_MULTICAST)) || + (retval == 0 && (iconf->flags & DPDK_MULTICAST))) { + SCLogError(SC_ERR_DPDK_CONF, + "Allmulticast setting of port (%" PRIu16 + ") can not be configured. Set it to %s", + iconf->port_id, retval == 1 ? "true" : "false"); + } else if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to get multicast mode on port %u", + retval, iconf->port_id); + SCReturnInt(retval); + } + + if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to get multicast mode on port %u", + retval, iconf->port_id); + SCReturnInt(retval); + } + } else if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when en/disabling multicast on port %u", + retval, iconf->port_id); + SCReturnInt(retval); + } + + retval = iconf->flags & DPDK_PROMISC ? rte_eth_promiscuous_enable(iconf->port_id) + : rte_eth_promiscuous_disable(iconf->port_id); + if (retval == -ENOTSUP) { + retval = rte_eth_promiscuous_get(iconf->port_id); + if ((retval == 1 && !(iconf->flags & DPDK_PROMISC)) || + (retval == 0 && (iconf->flags & DPDK_PROMISC))) { + SCLogError(SC_ERR_DPDK_CONF, + "Promiscuous setting of port (%" PRIu16 ") can not be configured. Set it to %s", + iconf->port_id, retval == 1 ? "true" : "false"); + SCReturnInt(TM_ECODE_FAILED); + } else if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to get promiscuous mode on port %u", + retval, iconf->port_id); + SCReturnInt(retval); + } + } else if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when enabling promiscuous mode on port %u", + retval, iconf->port_id); + SCReturnInt(TM_ECODE_FAILED); + } + + // set maximum transmission unit + SCLogConfig("Setting MTU of %s to %dB", iconf->iface, iconf->mtu); + retval = rte_eth_dev_set_mtu(iconf->port_id, iconf->mtu); + if (retval == -ENOTSUP) { + SCLogWarning(SC_WARN_DPDK_CONF, + "Changing MTU on port %u is not supported, ignoring the setting...", + iconf->port_id); + // if it is not possible to set the MTU, retrieve it + retval = rte_eth_dev_get_mtu(iconf->port_id, &iconf->mtu); + if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to retrieve MTU from port %u", + retval, iconf->port_id); + SCReturnInt(retval); + } + } else if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when setting MTU to %u on port %u", retval, + iconf->mtu, iconf->port_id); + SCReturnInt(retval); + } + + retval = DeviceConfigureQueues(iconf, &dev_info, &port_conf); + if (retval < 0) { + SCReturnInt(retval); + } + + retval = DeviceConfigureIPS(iconf); + if (retval < 0) { + SCReturnInt(retval); + } + + SCReturnInt(0); +} + +static void *ParseDpdkConfigAndConfigureDevice(const char *iface) +{ + int retval; + DPDKIfaceConfig *iconf = ConfigParse(iface); + if (iconf == NULL) { + FatalError(SC_ERR_DPDK_CONF, "DPDK configuration could not be parsed"); + } + + if (DeviceConfigure(iconf) != 0) { + iconf->DerefFunc(iconf); + retval = rte_eal_cleanup(); + if (retval != 0) + FatalError(SC_ERR_DPDK_EAL_INIT, "EAL cleanup failed: %s", strerror(-retval)); + + FatalError(SC_ERR_DPDK_CONF, "Device %s fails to configure", iface); + } + + SC_ATOMIC_RESET(iconf->ref); + (void)SC_ATOMIC_ADD(iconf->ref, iconf->threads); + // This counter is increased by worker threads that individually pick queue IDs. + SC_ATOMIC_RESET(iconf->queue_id); + return iconf; +} + +/** + * \brief extract information from config file + * + * The returned structure will be freed by the thread init function. + * This is thus necessary to or copy the structure before giving it + * to thread or to reparse the file for each thread (and thus have + * new structure. + * + * After configuration is loaded, DPDK also configures the device according to the settings. + * + * \return a DPDKIfaceConfig corresponding to the interface name + */ + +static int DPDKConfigGetThreadsCount(void *conf) +{ + if (conf == NULL) + FatalError(SC_ERR_DPDK_CONF, "Configuration file is NULL"); + + DPDKIfaceConfig *dpdk_conf = (DPDKIfaceConfig *)conf; + return dpdk_conf->threads; +} + +#endif /* HAVE_DPDK */ + +const char *RunModeDpdkGetDefaultMode(void) +{ + return "workers"; +} + +void RunModeDpdkRegister(void) +{ + RunModeRegisterNewRunMode(RUNMODE_DPDK, "workers", + "Workers DPDK mode, each thread does all" + " tasks from acquisition to logging", + RunModeIdsDpdkWorkers); +} + +/** + * \brief Workers version of the DPDK processing. + * + * Start N threads with each thread doing all the work. + * + */ +int RunModeIdsDpdkWorkers(void) +{ + SCEnter(); +#ifdef HAVE_DPDK + int ret; + + RunModeInitialize(); + TimeModeSetLive(); + + InitEal(); + ret = RunModeSetLiveCaptureWorkers(ParseDpdkConfigAndConfigureDevice, DPDKConfigGetThreadsCount, + "ReceiveDPDK", "DecodeDPDK", thread_name_workers, NULL); + if (ret != 0) { + FatalError(SC_ERR_FATAL, "Unable to start runmode"); + } + + SCLogDebug("RunModeIdsDpdkWorkers initialised"); + +#endif /* HAVE_DPDK */ + SCReturnInt(0); +} + +/** + * @} + */ diff --git a/src/runmode-dpdk.h b/src/runmode-dpdk.h new file mode 100644 index 0000000000..650da88893 --- /dev/null +++ b/src/runmode-dpdk.h @@ -0,0 +1,45 @@ +/* Copyright (C) 2021 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** \file + * + * \author Lukas Sismis + */ + +#ifndef __RUNMODE_DPDK_H__ +#define __RUNMODE_DPDK_H__ + +typedef struct DPDKIfaceConfigAttributes_ { + const char *threads; + const char *promisc; + const char *multicast; + const char *checksum_checks; + const char *checksum_checks_offload; + const char *mtu; + const char *mempool_size; + const char *mempool_cache_size; + const char *rx_descriptors; + const char *tx_descriptors; + const char *copy_mode; + const char *copy_iface; +} DPDKIfaceConfigAttributes; + +int RunModeIdsDpdkWorkers(void); +void RunModeDpdkRegister(void); +const char *RunModeDpdkGetDefaultMode(void); + +#endif /* __RUNMODE_DPDK_H__ */ diff --git a/src/runmodes.c b/src/runmodes.c index 94618763ce..1a58687f83 100644 --- a/src/runmodes.c +++ b/src/runmodes.c @@ -156,6 +156,13 @@ static const char *RunModeTranslateModeToName(int runmode) #else return "WINDIVERT(DISABLED)"; #endif + case RUNMODE_DPDK: +#ifdef HAVE_DPDK + return "DPDK"; +#else + return "DPDK(DISABLED)"; +#endif + default: FatalError(SC_ERR_UNKNOWN_RUN_MODE, "Unknown runtime mode. Aborting"); } @@ -227,6 +234,7 @@ void RunModeRegisterRunModes(void) RunModeIdsNflogRegister(); RunModeUnixSocketRegister(); RunModeIpsWinDivertRegister(); + RunModeDpdkRegister(); #ifdef UNITTESTS UtRunModeRegister(); #endif @@ -348,6 +356,11 @@ void RunModeDispatch(int runmode, const char *custom_mode, case RUNMODE_WINDIVERT: custom_mode = RunModeIpsWinDivertGetDefaultMode(); break; +#endif +#ifdef HAVE_DPDK + case RUNMODE_DPDK: + custom_mode = RunModeDpdkGetDefaultMode(); + break; #endif default: FatalError(SC_ERR_FATAL, "Unknown runtime mode. Aborting"); diff --git a/src/runmodes.h b/src/runmodes.h index 63b75d2bd6..0ea8e48ac1 100644 --- a/src/runmodes.h +++ b/src/runmodes.h @@ -36,6 +36,7 @@ enum RunModes { RUNMODE_DAG, RUNMODE_AFP_DEV, RUNMODE_NETMAP, + RUNMODE_DPDK, RUNMODE_UNITTEST, RUNMODE_NAPATECH, RUNMODE_UNIX_SOCKET, @@ -110,6 +111,7 @@ int RunModeNeedsBypassManager(void); #include "runmode-unix-socket.h" #include "runmode-netmap.h" #include "runmode-windivert.h" +#include "runmode-dpdk.h" extern int threading_set_cpu_affinity; extern float threading_detect_ratio; diff --git a/src/source-dpdk.c b/src/source-dpdk.c new file mode 100644 index 0000000000..53c3344e6b --- /dev/null +++ b/src/source-dpdk.c @@ -0,0 +1,615 @@ +/* Copyright (C) 2021 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \defgroup dpdk DPDK running mode + * + * @{ + */ + +/** + * \file + * + * \author Lukas Sismis + * + * DPDK capture interface + * + */ + +#include "suricata-common.h" +#include "runmodes.h" +#include "source-dpdk.h" +#include "suricata.h" +#include "threads.h" +#include "threadvars.h" +#include "tm-threads.h" +#include "tmqh-packetpool.h" +#include "util-privs.h" + +#ifndef HAVE_DPDK + +TmEcode NoDPDKSupportExit(ThreadVars *, const void *, void **); + +void TmModuleReceiveDPDKRegister(void) +{ + tmm_modules[TMM_RECEIVEDPDK].name = "ReceiveDPDK"; + tmm_modules[TMM_RECEIVEDPDK].ThreadInit = NoDPDKSupportExit; + tmm_modules[TMM_RECEIVEDPDK].Func = NULL; + tmm_modules[TMM_RECEIVEDPDK].ThreadExitPrintStats = NULL; + tmm_modules[TMM_RECEIVEDPDK].ThreadDeinit = NULL; + tmm_modules[TMM_RECEIVEDPDK].cap_flags = 0; + tmm_modules[TMM_RECEIVEDPDK].flags = TM_FLAG_RECEIVE_TM; +} + +/** + * \brief Registration Function for DecodeDPDK. + */ +void TmModuleDecodeDPDKRegister(void) +{ + tmm_modules[TMM_DECODEDPDK].name = "DecodeDPDK"; + tmm_modules[TMM_DECODEDPDK].ThreadInit = NoDPDKSupportExit; + tmm_modules[TMM_DECODEDPDK].Func = NULL; + tmm_modules[TMM_DECODEDPDK].ThreadExitPrintStats = NULL; + tmm_modules[TMM_DECODEDPDK].ThreadDeinit = NULL; + tmm_modules[TMM_DECODEDPDK].cap_flags = 0; + tmm_modules[TMM_DECODEDPDK].flags = TM_FLAG_DECODE_TM; +} + +/** + * \brief this function prints an error message and exits. + */ +TmEcode NoDPDKSupportExit(ThreadVars *tv, const void *initdata, void **data) +{ + FatalError(SC_ERR_NO_DPDK, + "Error creating thread %s: you do not have " + "support for DPDK enabled, on Linux host please recompile " + "with --enable-dpdk", + tv->name); +} + +#else /* We have DPDK support */ + +#include "util-dpdk.h" +#include + +#define BURST_SIZE 32 + +/** + * \brief Structure to hold thread specific variables. + */ +typedef struct DPDKThreadVars_ { + /* counters */ + uint64_t pkts; + ThreadVars *tv; + TmSlot *slot; + LiveDevice *livedev; + ChecksumValidationMode checksum_mode; + /* references to packet and drop counters */ + uint16_t capture_dpdk_packets; + uint16_t capture_dpdk_rx_errs; + uint16_t capture_dpdk_tx_errs; + unsigned int flags; + int threads; + /* for IPS */ + DpdkCopyModeEnum copy_mode; + uint16_t out_port_id; + /* Entry in the peers_list */ + + uint64_t bytes; + uint64_t accepted; + uint64_t dropped; + uint16_t port_id; + uint16_t queue_id; + struct rte_mempool *pkt_mempool; + struct rte_mbuf *received_mbufs[BURST_SIZE]; + struct timeval machine_start_time; +} DPDKThreadVars; + +static TmEcode ReceiveDPDKThreadInit(ThreadVars *, const void *, void **); +static void ReceiveDPDKThreadExitStats(ThreadVars *, void *); +static TmEcode ReceiveDPDKThreadDeinit(ThreadVars *, void *); +static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot); + +static TmEcode DecodeDPDKThreadInit(ThreadVars *, const void *, void **); +static TmEcode DecodeDPDKThreadDeinit(ThreadVars *tv, void *data); +static TmEcode DecodeDPDK(ThreadVars *, Packet *, void *); + +static uint64_t CyclesToMicroseconds(uint64_t cycles); +static uint64_t CyclesToSeconds(uint64_t cycles); +static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset); +static uint64_t DPDKGetSeconds(void); + +static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset) +{ + for (int i = offset; i < mbuf_cnt; i++) { + rte_pktmbuf_free(mbuf_array[i]); + } +} + +static uint64_t CyclesToMicroseconds(const uint64_t cycles) +{ + const uint64_t ticks_per_us = rte_get_tsc_hz() / 1000000; + return cycles / ticks_per_us; +} + +static uint64_t CyclesToSeconds(const uint64_t cycles) +{ + const uint64_t ticks_per_s = rte_get_tsc_hz(); + return cycles / ticks_per_s; +} + +static void CyclesAddToTimeval( + const uint64_t cycles, struct timeval *orig_tv, struct timeval *new_tv) +{ + uint64_t usec = CyclesToMicroseconds(cycles) + orig_tv->tv_usec; + new_tv->tv_sec = orig_tv->tv_sec + usec / 1000000; + new_tv->tv_usec = (usec % 1000000); +} + +static void DPDKSetTimevalOfMachineStart(struct timeval *tv) +{ + gettimeofday(tv, NULL); + tv->tv_sec -= DPDKGetSeconds(); +} + +/** + * Initializes real_tv to the correct real time. Adds TSC counter value to the timeval of + * the machine start + * @param machine_start_tv - timestamp when the machine was started + * @param real_tv + */ +static void DPDKSetTimevalReal(struct timeval *machine_start_tv, struct timeval *real_tv) +{ + CyclesAddToTimeval(rte_get_tsc_cycles(), machine_start_tv, real_tv); +} + +/* get number of seconds from the reset of TSC counter (typically from the machine start) */ +static uint64_t DPDKGetSeconds() +{ + return CyclesToSeconds(rte_get_tsc_cycles()); +} + +static void DevicePostStartPMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name) +{ +} + +static void DevicePreStopPMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name) +{ +} + +/** + * Attempts to retrieve NUMA node id on which the caller runs + * @return NUMA id on success, -1 otherwise + */ +static int GetNumaNode(void) +{ + int cpu = 0; + int node = -1; + +#if defined(__linux__) + cpu = sched_getcpu(); + node = numa_node_of_cpu(cpu); +#else + SCLogWarning(SC_ERR_TM_THREADS_ERROR, "NUMA node retrieval is not supported on this OS."); +#endif + + return node; +} + +/** + * \brief Registration Function for ReceiveDPDK. + * \todo Unit tests are needed for this module. + */ +void TmModuleReceiveDPDKRegister(void) +{ + tmm_modules[TMM_RECEIVEDPDK].name = "ReceiveDPDK"; + tmm_modules[TMM_RECEIVEDPDK].ThreadInit = ReceiveDPDKThreadInit; + tmm_modules[TMM_RECEIVEDPDK].Func = NULL; + tmm_modules[TMM_RECEIVEDPDK].PktAcqLoop = ReceiveDPDKLoop; + tmm_modules[TMM_RECEIVEDPDK].PktAcqBreakLoop = NULL; + tmm_modules[TMM_RECEIVEDPDK].ThreadExitPrintStats = ReceiveDPDKThreadExitStats; + tmm_modules[TMM_RECEIVEDPDK].ThreadDeinit = ReceiveDPDKThreadDeinit; + tmm_modules[TMM_RECEIVEDPDK].cap_flags = SC_CAP_NET_RAW; + tmm_modules[TMM_RECEIVEDPDK].flags = TM_FLAG_RECEIVE_TM; +} + +/** + * \brief Registration Function for DecodeDPDK. + * \todo Unit tests are needed for this module. + */ +void TmModuleDecodeDPDKRegister(void) +{ + tmm_modules[TMM_DECODEDPDK].name = "DecodeDPDK"; + tmm_modules[TMM_DECODEDPDK].ThreadInit = DecodeDPDKThreadInit; + tmm_modules[TMM_DECODEDPDK].Func = DecodeDPDK; + tmm_modules[TMM_DECODEDPDK].ThreadExitPrintStats = NULL; + tmm_modules[TMM_DECODEDPDK].ThreadDeinit = DecodeDPDKThreadDeinit; + tmm_modules[TMM_DECODEDPDK].cap_flags = 0; + tmm_modules[TMM_DECODEDPDK].flags = TM_FLAG_DECODE_TM; +} + +static inline void DPDKDumpCounters(DPDKThreadVars *ptv) +{ + struct rte_eth_stats eth_stats; + int retval = rte_eth_stats_get(ptv->port_id, ð_stats); + if (unlikely(retval != 0)) { + SCLogError(SC_ERR_STAT, "Failed to get stats for port id %d: %s", ptv->port_id, + strerror(-retval)); + return; + } + + uint64_t th_pkts = StatsGetLocalCounterValue(ptv->tv, ptv->capture_dpdk_packets); + StatsAddUI64(ptv->tv, ptv->capture_dpdk_packets, ptv->pkts - th_pkts); + SC_ATOMIC_ADD(ptv->livedev->pkts, ptv->pkts - th_pkts); + + /* Some NICs (e.g. Intel) do not support queue statistics and the drops can be fetched only on + * the port level. Therefore setting it to the first worker to have at least continuous update + * on the dropped packets. */ + if (ptv->queue_id == 0) { + StatsSetUI64(ptv->tv, ptv->capture_dpdk_rx_errs, + eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf + ptv->pkts); + StatsSetUI64(ptv->tv, ptv->capture_dpdk_tx_errs, eth_stats.oerrors); + SC_ATOMIC_SET(ptv->livedev->drop, eth_stats.imissed + eth_stats.ierrors + + eth_stats.rx_nombuf + eth_stats.oerrors + + ptv->pkts); + } +} + +static void DPDKReleasePacket(Packet *p) +{ + int retval; + /* Need to be in copy mode and need to detect early release + where Ethernet header could not be set (and pseudo packet) + When enabling promiscuous mode on Intel cards, 2 ICMPv6 packets are generated. + These get into the infinite cycle between the NIC and the switch in some cases */ + if ((p->dpdk_v.copy_mode == DPDK_COPY_MODE_TAP || + (p->dpdk_v.copy_mode == DPDK_COPY_MODE_IPS && !PacketTestAction(p, ACTION_DROP))) +#if defined(RTE_LIBRTE_I40E_PMD) || defined(RTE_LIBRTE_IXGBE_PMD) || defined(RTE_LIBRTE_ICE_PMD) + && !(PKT_IS_ICMPV6(p) && p->icmpv6h->type == 143) +#endif + ) { + BUG_ON(PKT_IS_PSEUDOPKT(p)); + retval = + rte_eth_tx_burst(p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1); + // rte_eth_tx_burst can return only 0 (failure) or 1 (success) because we are only + // transmitting burst of size 1 and the function rte_eth_tx_burst returns number of + // successfully sent packets. + if (unlikely(retval < 1)) { + // sometimes a repeated transmit can help to send out the packet + rte_delay_us(DPDK_BURST_TX_WAIT_US); + retval = rte_eth_tx_burst( + p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1); + if (unlikely(retval < 1)) { + SCLogDebug("Unable to transmit the packet on port %u queue %u", + p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id); + rte_pktmbuf_free(p->dpdk_v.mbuf); + p->dpdk_v.mbuf = NULL; + } + } + } else { + rte_pktmbuf_free(p->dpdk_v.mbuf); + p->dpdk_v.mbuf = NULL; + } + + PacketFreeOrRelease(p); +} + +/** + * \brief Main DPDK reading Loop function + */ +static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot) +{ + SCEnter(); + Packet *p; + uint16_t nb_rx; + time_t last_dump = 0; + time_t current_time; + + DPDKThreadVars *ptv = (DPDKThreadVars *)data; + TmSlot *s = (TmSlot *)slot; + + ptv->slot = s->slot_next; + + PacketPoolWait(); + while (1) { + if (unlikely(suricata_ctl_flags != 0)) { + SCLogDebug("Stopping Suricata!"); + DPDKDumpCounters(ptv); + break; + } + + nb_rx = rte_eth_rx_burst(ptv->port_id, ptv->queue_id, ptv->received_mbufs, BURST_SIZE); + if (unlikely(nb_rx == 0)) { + continue; + } + + ptv->pkts += (uint64_t)nb_rx; + for (uint16_t i = 0; i < nb_rx; i++) { + p = PacketGetFromQueueOrAlloc(); + if (unlikely(p == NULL)) { + continue; + } + PKT_SET_SRC(p, PKT_SRC_WIRE); + p->datalink = LINKTYPE_ETHERNET; + if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) { + p->flags |= PKT_IGNORE_CHECKSUM; + } + + DPDKSetTimevalReal(&ptv->machine_start_time, &p->ts); + p->dpdk_v.mbuf = ptv->received_mbufs[i]; + p->ReleasePacket = DPDKReleasePacket; + p->dpdk_v.copy_mode = ptv->copy_mode; + p->dpdk_v.out_port_id = ptv->out_port_id; + p->dpdk_v.out_queue_id = ptv->queue_id; + + PacketSetData(p, rte_pktmbuf_mtod(p->dpdk_v.mbuf, uint8_t *), + rte_pktmbuf_pkt_len(p->dpdk_v.mbuf)); + if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) { + TmqhOutputPacketpool(ptv->tv, p); + DPDKFreeMbufArray(ptv->received_mbufs, nb_rx - i - 1, i + 1); + SCReturnInt(EXIT_FAILURE); + } + } + + /* Trigger one dump of stats every second */ + current_time = DPDKGetSeconds(); + if (current_time != last_dump) { + DPDKDumpCounters(ptv); + last_dump = current_time; + } + StatsSyncCountersIfSignalled(tv); + } + + SCReturnInt(TM_ECODE_OK); +} + +/** + * \brief Init function for ReceiveDPDK. + * + * \param tv pointer to ThreadVars + * \param initdata pointer to the interface passed from the user + * \param data pointer gets populated with DPDKThreadVars + * + */ +static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void **data) +{ + SCEnter(); + int retval, thread_numa; + DPDKThreadVars *ptv = NULL; + DPDKIfaceConfig *dpdk_config = (DPDKIfaceConfig *)initdata; + + if (initdata == NULL) { + SCLogError(SC_ERR_INVALID_ARGUMENT, "DPDK configuration is NULL in thread initialization"); + goto fail; + } + + ptv = SCCalloc(1, sizeof(DPDKThreadVars)); + if (unlikely(ptv == NULL)) { + SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate memory"); + goto fail; + } + + ptv->tv = tv; + ptv->pkts = 0; + ptv->bytes = 0; + ptv->livedev = LiveGetDevice(dpdk_config->iface); + DPDKSetTimevalOfMachineStart(&ptv->machine_start_time); + + ptv->capture_dpdk_packets = StatsRegisterCounter("capture.packets", ptv->tv); + ptv->capture_dpdk_rx_errs = StatsRegisterCounter("capture.rx_errors", ptv->tv); + ptv->capture_dpdk_tx_errs = StatsRegisterCounter("capture.tx_errors", ptv->tv); + + ptv->copy_mode = dpdk_config->copy_mode; + ptv->checksum_mode = dpdk_config->checksum_mode; + + ptv->threads = dpdk_config->threads; + ptv->port_id = dpdk_config->port_id; + ptv->out_port_id = dpdk_config->out_port_id; + uint16_t queue_id = SC_ATOMIC_ADD(dpdk_config->queue_id, 1); + ptv->queue_id = queue_id; + // pass the pointer to the mempool and then forget about it. Mempool is freed in thread deinit. + ptv->pkt_mempool = dpdk_config->pkt_mempool; + dpdk_config->pkt_mempool = NULL; + + // the last thread starts the device + if (queue_id == dpdk_config->threads - 1) { + retval = rte_eth_dev_start(ptv->port_id); + if (retval < 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (%s) during device startup of %s", + rte_strerror(-retval), dpdk_config->iface); + goto fail; + } + + struct rte_eth_dev_info dev_info; + retval = rte_eth_dev_info_get(ptv->port_id, &dev_info); + if (retval != 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (%s) when getting device info of %s", + rte_strerror(-retval), dpdk_config->iface); + goto fail; + } + + // some PMDs requires additional actions only after the device has started + DevicePostStartPMDSpecificActions(ptv, dev_info.driver_name); + } + + thread_numa = GetNumaNode(); + if (thread_numa >= 0 && thread_numa != rte_eth_dev_socket_id(ptv->port_id)) { + SCLogWarning(SC_WARN_DPDK_CONF, + "NIC on NUMA %d but thread on NUMA %d. Decreased performance expected", + rte_eth_dev_socket_id(ptv->port_id), thread_numa); + } + + *data = (void *)ptv; + dpdk_config->DerefFunc(dpdk_config); + SCReturnInt(TM_ECODE_OK); + +fail: + if (dpdk_config != NULL) + dpdk_config->DerefFunc(dpdk_config); + if (ptv != NULL) + SCFree(ptv); + SCReturnInt(TM_ECODE_FAILED); +} + +/** + * \brief This function prints stats to the screen at exit. + * \param tv pointer to ThreadVars + * \param data pointer that gets cast into DPDKThreadVars for ptv + */ +static void ReceiveDPDKThreadExitStats(ThreadVars *tv, void *data) +{ + SCEnter(); + int retval; + DPDKThreadVars *ptv = (DPDKThreadVars *)data; + + if (ptv->queue_id == 0) { + struct rte_eth_stats eth_stats; + char port_name[RTE_ETH_NAME_MAX_LEN]; + + retval = rte_eth_dev_get_name_by_port(ptv->port_id, port_name); + if (unlikely(retval != 0)) { + SCLogError(SC_ERR_STAT, "Failed to convert port id %d to the interface name: %s", + ptv->port_id, strerror(-retval)); + SCReturn; + } + retval = rte_eth_stats_get(ptv->port_id, ð_stats); + if (unlikely(retval != 0)) { + SCLogError(SC_ERR_STAT, "Failed to get stats for interface %s: %s", port_name, + strerror(-retval)); + SCReturn; + } + SCLogPerf("Total RX stats of %s: packets %" PRIu64 " bytes: %" PRIu64 " missed: %" PRIu64 + " errors: %" PRIu64 " nombufs: %" PRIu64, + port_name, eth_stats.ipackets, eth_stats.ibytes, eth_stats.imissed, + eth_stats.ierrors, eth_stats.rx_nombuf); + if (ptv->copy_mode == DPDK_COPY_MODE_TAP || ptv->copy_mode == DPDK_COPY_MODE_IPS) + SCLogPerf("Total TX stats of %s: packets %" PRIu64 " bytes: %" PRIu64 + " errors: %" PRIu64, + port_name, eth_stats.opackets, eth_stats.obytes, eth_stats.oerrors); + } + + DPDKDumpCounters(ptv); + SCLogPerf("(%s) received packets %" PRIu64, tv->name, ptv->pkts); +} + +/** + * \brief DeInit function closes dpdk at exit. + * \param tv pointer to ThreadVars + * \param data pointer that gets cast into DPDKThreadVars for ptv + */ +static TmEcode ReceiveDPDKThreadDeinit(ThreadVars *tv, void *data) +{ + SCEnter(); + DPDKThreadVars *ptv = (DPDKThreadVars *)data; + + int retval; + if (ptv->queue_id == 0) { + struct rte_eth_dev_info dev_info; + char iface[RTE_ETH_NAME_MAX_LEN]; + retval = rte_eth_dev_get_name_by_port(ptv->port_id, iface); + if (retval != 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when getting device name (port %d)", + retval, ptv->port_id); + SCReturnInt(TM_ECODE_FAILED); + } + retval = rte_eth_dev_info_get(ptv->port_id, &dev_info); + if (retval != 0) { + SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during getting device info (port %s)", + retval, iface); + SCReturnInt(TM_ECODE_FAILED); + } + + DevicePreStopPMDSpecificActions(ptv, dev_info.driver_name); + } + + rte_eth_dev_stop(ptv->port_id); + if (ptv->copy_mode == DPDK_COPY_MODE_TAP || ptv->copy_mode == DPDK_COPY_MODE_IPS) { + rte_eth_dev_stop(ptv->out_port_id); + } + + if (ptv->queue_id == 0 && ptv->pkt_mempool != NULL) { + rte_mempool_free(ptv->pkt_mempool); + ptv->pkt_mempool = NULL; + } + + SCFree(ptv); + SCReturnInt(TM_ECODE_OK); +} + +/** + * \brief This function passes off to link type decoders. + * + * DecodeDPDK decodes packets from DPDK and passes + * them off to the proper link type decoder. + * + * \param t pointer to ThreadVars + * \param p pointer to the current packet + * \param data pointer that gets cast into DPDKThreadVars for ptv + */ +static TmEcode DecodeDPDK(ThreadVars *tv, Packet *p, void *data) +{ + SCEnter(); + DecodeThreadVars *dtv = (DecodeThreadVars *)data; + + BUG_ON(PKT_IS_PSEUDOPKT(p)); + + /* update counters */ + DecodeUpdatePacketCounters(tv, dtv, p); + + /* If suri has set vlan during reading, we increase vlan counter */ + if (p->vlan_idx) { + StatsIncr(tv, dtv->counter_vlan); + } + + /* call the decoder */ + DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p)); + + PacketDecodeFinalize(tv, dtv, p); + + SCReturnInt(TM_ECODE_OK); +} + +static TmEcode DecodeDPDKThreadInit(ThreadVars *tv, const void *initdata, void **data) +{ + SCEnter(); + DecodeThreadVars *dtv = NULL; + + dtv = DecodeThreadVarsAlloc(tv); + + if (dtv == NULL) + SCReturnInt(TM_ECODE_FAILED); + + DecodeRegisterPerfCounters(dtv, tv); + + *data = (void *)dtv; + + SCReturnInt(TM_ECODE_OK); +} + +static TmEcode DecodeDPDKThreadDeinit(ThreadVars *tv, void *data) +{ + SCEnter(); + if (data != NULL) + DecodeThreadVarsFree(tv, data); + SCReturnInt(TM_ECODE_OK); +} + +#endif /* HAVE_DPDK */ +/* eof */ +/** + * @} + */ diff --git a/src/source-dpdk.h b/src/source-dpdk.h new file mode 100644 index 0000000000..afde13ec73 --- /dev/null +++ b/src/source-dpdk.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2021 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + */ + +#ifndef __SOURCE_DPDK_H__ +#define __SOURCE_DPDK_H__ + +#include "queue.h" +#include "util-dpdk.h" + +typedef enum { DPDK_COPY_MODE_NONE, DPDK_COPY_MODE_TAP, DPDK_COPY_MODE_IPS } DpdkCopyModeEnum; + +#define DPDK_BURST_TX_WAIT_US 1 + +/* DPDK Flags */ +// General flags +#define DPDK_PROMISC (1 << 0) /**< Promiscuous mode */ +#define DPDK_MULTICAST (1 << 1) /**< Enable multicast packets */ +// Offloads +#define DPDK_RX_CHECKSUM_OFFLOAD (1 << 4) /**< Enable chsum offload */ + +typedef struct DPDKIfaceConfig_ { +#ifdef HAVE_DPDK + char iface[RTE_ETH_NAME_MAX_LEN]; + uint16_t port_id; + uint16_t socket_id; + /* number of threads - zero means all available */ + int threads; + /* IPS mode */ + DpdkCopyModeEnum copy_mode; + const char *out_iface; + uint16_t out_port_id; + /* DPDK flags */ + uint32_t flags; + ChecksumValidationMode checksum_mode; + /* set maximum transmission unit of the device in bytes */ + uint16_t mtu; + uint16_t nb_rx_queues; + uint16_t nb_rx_desc; + uint16_t nb_tx_queues; + uint16_t nb_tx_desc; + uint32_t mempool_size; + uint32_t mempool_cache_size; + struct rte_mempool *pkt_mempool; + SC_ATOMIC_DECLARE(unsigned int, ref); + /* threads bind queue id one by one */ + SC_ATOMIC_DECLARE(uint16_t, queue_id); + void (*DerefFunc)(void *); + + struct rte_flow *flow[100]; +#endif +} DPDKIfaceConfig; + +/** + * \brief per packet DPDK vars + * + * This structure is used by the release data system and for IPS + */ +typedef struct DPDKPacketVars_ { + struct rte_mbuf *mbuf; + uint16_t out_port_id; + uint16_t out_queue_id; + uint8_t copy_mode; +} DPDKPacketVars; + +void TmModuleReceiveDPDKRegister(void); +void TmModuleDecodeDPDKRegister(void); + +#endif /* __SOURCE_DPDK_H__ */ diff --git a/src/suricata.c b/src/suricata.c index dc26a13c40..9411b17ad1 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -92,6 +92,8 @@ #include "source-af-packet.h" #include "source-netmap.h" +#include "source-dpdk.h" + #include "source-windivert.h" #include "source-windivert-prototypes.h" @@ -172,6 +174,8 @@ #include "util-plugin.h" +#include "util-dpdk.h" + #include "rust.h" /* @@ -358,6 +362,10 @@ static void GlobalsDestroy(SCInstance *suri) TmModuleRunDeInit(); ParseSizeDeinit(); +#ifdef HAVE_DPDK + DPDKCleanupEAL(); +#endif + #ifdef HAVE_AF_PACKET AFPPeersListClean(); #endif @@ -601,6 +609,10 @@ static void PrintUsage(const char *progname) #ifdef HAVE_PCAP_SET_BUFF printf("\t--pcap-buffer-size : size of the pcap buffer value from 0 - %i\n",INT_MAX); #endif /* HAVE_SET_PCAP_BUFF */ +#ifdef HAVE_DPDK + printf("\t--dpdk : run in dpdk mode, uses interfaces from " + "suricata.yaml\n"); +#endif #ifdef HAVE_AF_PACKET printf("\t--af-packet[=] : run in af-packet mode, no value select interfaces from suricata.yaml\n"); #endif @@ -895,6 +907,10 @@ void RegisterAllModules(void) TmModuleReceiveWinDivertRegister(); TmModuleVerdictWinDivertRegister(); TmModuleDecodeWinDivertRegister(); + + /* Dpdk */ + TmModuleReceiveDPDKRegister(); + TmModuleDecodeDPDKRegister(); } static TmEcode LoadYamlConfig(SCInstance *suri) @@ -937,6 +953,16 @@ static TmEcode ParseInterfacesList(const int runmode, char *pcap_dev) /* not an error condition if we have a 1.0 config */ LiveBuildDeviceList("pfring"); } +#ifdef HAVE_DPDK + } else if (runmode == RUNMODE_DPDK) { + char iface_selector[] = "dpdk.interfaces"; + int ret = LiveBuildDeviceList(iface_selector); + if (ret == 0) { + SCLogError( + SC_ERR_INITIALIZATION, "No interface found in config for %s", iface_selector); + SCReturnInt(TM_ECODE_FAILED); + } +#endif #ifdef HAVE_AF_PACKET } else if (runmode == RUNMODE_AFP_DEV) { /* iface has been set on command line */ @@ -1115,6 +1141,28 @@ static int ParseCommandLineAfpacket(SCInstance *suri, const char *in_arg) #endif } +static int ParseCommandLineDpdk(SCInstance *suri, const char *in_arg) +{ +#ifdef HAVE_DPDK + if (suri->run_mode == RUNMODE_UNKNOWN) { + suri->run_mode = RUNMODE_DPDK; + } else if (suri->run_mode == RUNMODE_DPDK) { + SCLogInfo("Multiple dpdk options have no effect on Suricata"); + } else { + SCLogError(SC_ERR_MULTIPLE_RUN_MODE, "more than one run mode " + "has been specified"); + PrintUsage(suri->progname); + return TM_ECODE_FAILED; + } + return TM_ECODE_OK; +#else + SCLogError(SC_ERR_NO_DPDK, "DPDK not enabled. On Linux " + "host, make sure to pass --enable-dpdk to " + "configure when building."); + return TM_ECODE_FAILED; +#endif +} + static int ParseCommandLinePcapLive(SCInstance *suri, const char *in_arg) { memset(suri->pcap_dev, 0, sizeof(suri->pcap_dev)); @@ -1192,6 +1240,9 @@ static TmEcode ParseCommandLine(int argc, char** argv, SCInstance *suri) {"pfring-int", required_argument, 0, 0}, {"pfring-cluster-id", required_argument, 0, 0}, {"pfring-cluster-type", required_argument, 0, 0}, +#ifdef HAVE_DPDK + {"dpdk", 0, 0, 0}, +#endif {"af-packet", optional_argument, 0, 0}, {"netmap", optional_argument, 0, 0}, {"pcap", optional_argument, 0, 0}, @@ -1304,13 +1355,15 @@ static TmEcode ParseCommandLine(int argc, char** argv, SCInstance *suri) } else if (strcmp((long_opts[option_index]).name , "capture-plugin-args") == 0){ suri->capture_plugin_args = optarg; - } - else if (strcmp((long_opts[option_index]).name , "af-packet") == 0) - { + } else if (strcmp((long_opts[option_index]).name, "dpdk") == 0) { + if (ParseCommandLineDpdk(suri, optarg) != TM_ECODE_OK) { + return TM_ECODE_FAILED; + } + } else if (strcmp((long_opts[option_index]).name, "af-packet") == 0) { if (ParseCommandLineAfpacket(suri, optarg) != TM_ECODE_OK) { return TM_ECODE_FAILED; } - } else if (strcmp((long_opts[option_index]).name , "netmap") == 0){ + } else if (strcmp((long_opts[option_index]).name, "netmap") == 0) { #ifdef HAVE_NETMAP if (suri->run_mode == RUNMODE_UNKNOWN) { suri->run_mode = RUNMODE_NETMAP; @@ -1348,14 +1401,14 @@ static TmEcode ParseCommandLine(int argc, char** argv, SCInstance *suri) SCLogError(SC_ERR_NFLOG_NOSUPPORT, "NFLOG not enabled."); return TM_ECODE_FAILED; #endif /* HAVE_NFLOG */ - } else if (strcmp((long_opts[option_index]).name , "pcap") == 0) { + } else if (strcmp((long_opts[option_index]).name, "pcap") == 0) { if (ParseCommandLinePcapLive(suri, optarg) != TM_ECODE_OK) { return TM_ECODE_FAILED; } - } else if(strcmp((long_opts[option_index]).name, "simulate-ips") == 0) { + } else if (strcmp((long_opts[option_index]).name, "simulate-ips") == 0) { SCLogInfo("Setting IPS mode"); EngineModeSetIPS(); - } else if(strcmp((long_opts[option_index]).name, "init-errors-fatal") == 0) { + } else if (strcmp((long_opts[option_index]).name, "init-errors-fatal") == 0) { if (ConfSetFinal("engine.init-failure-fatal", "1") != 1) { fprintf(stderr, "ERROR: Failed to set engine init-failure-fatal.\n"); return TM_ECODE_FAILED; diff --git a/src/tm-modules.c b/src/tm-modules.c index 5d0f59d1af..354770057e 100644 --- a/src/tm-modules.c +++ b/src/tm-modules.c @@ -213,6 +213,8 @@ const char * TmModuleTmmIdToString(TmmId id) CASE_CODE (TMM_DECODEPCAPFILE); CASE_CODE (TMM_RECEIVEPFRING); CASE_CODE (TMM_DECODEPFRING); + CASE_CODE(TMM_RECEIVEDPDK); + CASE_CODE(TMM_DECODEDPDK); CASE_CODE (TMM_RECEIVEPLUGIN); CASE_CODE (TMM_DECODEPLUGIN); CASE_CODE (TMM_RESPONDREJECT); diff --git a/src/tm-threads-common.h b/src/tm-threads-common.h index be8c103c97..af0284e8e9 100644 --- a/src/tm-threads-common.h +++ b/src/tm-threads-common.h @@ -53,6 +53,8 @@ typedef enum { TMM_DECODEERFDAG, TMM_RECEIVEAFP, TMM_DECODEAFP, + TMM_RECEIVEDPDK, + TMM_DECODEDPDK, TMM_RECEIVENETMAP, TMM_DECODENETMAP, TMM_ALERTPCAPINFO, diff --git a/src/util-device.c b/src/util-device.c index fc6336b3cb..fbfd5cab55 100644 --- a/src/util-device.c +++ b/src/util-device.c @@ -20,6 +20,7 @@ #include "util-device.h" #include "util-ioctl.h" #include "util-misc.h" +#include "util-dpdk.h" #include "device-storage.h" @@ -359,6 +360,7 @@ int LiveDeviceListClean() } RestoreIfaceOffloading(pd); + DPDKCloseDevice(pd); if (pd->dev) SCFree(pd->dev); diff --git a/src/util-dpdk.c b/src/util-dpdk.c new file mode 100644 index 0000000000..ac9e3d8653 --- /dev/null +++ b/src/util-dpdk.c @@ -0,0 +1,56 @@ +/* Copyright (C) 2021 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + */ + +#include "suricata.h" +#include "util-dpdk.h" + +void DPDKCleanupEAL(void) +{ +#ifdef HAVE_DPDK + if (run_mode == RUNMODE_DPDK) { + int retval = rte_eal_cleanup(); + if (retval != 0) + SCLogError(SC_ERR_DPDK_EAL_DEINIT, "EAL cleanup failed: %s", strerror(-retval)); + } +#endif +} + +void DPDKCloseDevice(LiveDevice *ldev) +{ + (void)ldev; // avoid warnings of unused variable +#ifdef HAVE_DPDK + uint16_t port_id; + int retval; + if (run_mode == RUNMODE_DPDK) { + retval = rte_eth_dev_get_port_by_name(ldev->dev, &port_id); + if (retval < 0) { + SCLogError(SC_ERR_DPDK_EAL_DEINIT, "Unable to get port id of \"%s\", error: %s", + ldev->dev, rte_strerror(-retval)); + return; + } + + SCLogInfo("Closing device %s", ldev->dev); + rte_eth_dev_close(port_id); + } +#endif +} diff --git a/src/util-dpdk.h b/src/util-dpdk.h new file mode 100644 index 0000000000..34820db79f --- /dev/null +++ b/src/util-dpdk.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2021 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + */ + +#ifndef UTIL_DPDK_H +#define UTIL_DPDK_H + +#ifdef HAVE_DPDK + +#include +#include +#include +#include +#include +#include +#include + +#endif /* HAVE_DPDK */ + +#include "util-device.h" + +void DPDKCleanupEAL(void); +void DPDKCloseDevice(LiveDevice *ldev); + +#endif /* UTIL_DPDK_H */ diff --git a/src/util-error.c b/src/util-error.c index 004158d6da..da03eb6779 100644 --- a/src/util-error.c +++ b/src/util-error.c @@ -379,6 +379,12 @@ const char * SCErrorToString(SCError err) CASE_CODE(SC_ERR_RULE_INVALID_UTF8); CASE_CODE(SC_ERR_HASHING_DISABLED); CASE_CODE(SC_WARN_THRESH_CONFIG); + CASE_CODE(SC_ERR_NO_DPDK); + CASE_CODE(SC_ERR_DPDK_INIT); + CASE_CODE(SC_ERR_DPDK_EAL_INIT); + CASE_CODE(SC_ERR_DPDK_EAL_DEINIT); + CASE_CODE(SC_ERR_DPDK_CONF); + CASE_CODE(SC_WARN_DPDK_CONF); CASE_CODE (SC_ERR_MAX); } diff --git a/src/util-error.h b/src/util-error.h index b854364d4e..5a23567c97 100644 --- a/src/util-error.h +++ b/src/util-error.h @@ -369,6 +369,12 @@ typedef enum { SC_ERR_RULE_INVALID_UTF8, SC_ERR_HASHING_DISABLED, SC_WARN_THRESH_CONFIG, + SC_ERR_NO_DPDK, + SC_ERR_DPDK_INIT, + SC_ERR_DPDK_EAL_INIT, + SC_ERR_DPDK_EAL_DEINIT, + SC_ERR_DPDK_CONF, + SC_WARN_DPDK_CONF, SC_ERR_MAX } SCError; diff --git a/suricata.yaml.in b/suricata.yaml.in index 248ffa36ec..f779904711 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -649,6 +649,63 @@ af-packet: #use-mmap: no #tpacket-v3: yes +dpdk: + eal-params: + proc-type: primary + + # DPDK capture support + # RX queues (and TX queues in IPS mode) are assigned to cores in 1:1 ratio + interfaces: + - interface: 0000:3b:00.0 # PCIe address of the NIC port + # Threading: possible values are either "auto" or number of threads + # - auto takes all cores + # in IPS mode it is required to specify the number of cores and the numbers on both interfaces must match + threads: auto + promisc: true # promiscuous mode - capture all packets + multicast: true # enables also detection on multicast packets + checksum-checks: true # if Suricata should validate checksums + checksum-checks-offload: true # if possible offload checksum validation to the NIC (saves Suricata resources) + mtu: 1500 # Set MTU of the device in bytes + + # To approximately calculate required amount of space (in bytes) for interface's mempool: mempool-size * mtu + # Make sure you have enough allocated hugepages. + # The optimum size for the packet memory pool (in terms of memory usage) is power of two minus one: n = (2^q - 1) + mempool-size: 65535 # The number of elements in the mbuf pool + + # Mempool cache size must be lower or equal to: + # - RTE_MEMPOOL_CACHE_MAX_SIZE (by default 512) and + # - "mempool-size / 1.5" + # It is advised to choose cache_size to have "mempool-size modulo cache_size == 0". + # If this is not the case, some elements will always stay in the pool and will never be used. + # The cache can be disabled if the cache_size argument is set to 0, can be useful to avoid losing objects in cache + # If the value is empty or set to "auto", Suricata will attempt to set cache size of the mempool to a value + # that matches the previously mentioned recommendations + mempool-cache-size: 257 + rx-descriptors: 1024 + tx-descriptors: 1024 + # + # IPS mode for Suricata works in 3 modes - none, tap, ips + # - none: IDS mode only - disables IPS functionality (does not further forward packets) + # - tap: forwards all packets and generates alerts (omits DROP action) This is not DPDK TAP + # - ips: the same as tap mode but it also drops packets that are flagged by rules to be dropped + copy-mode: none + copy-iface: none # or PCIe address of the second interface + + - interface: default + threads: auto + promisc: true + multicast: true + checksum-checks: true + checksum-checks-offload: true + mtu: 1500 + mempool-size: 65535 + mempool-cache-size: 257 + rx-descriptors: 1024 + tx-descriptors: 1024 + copy-mode: none + copy-iface: none + + # Cross platform libpcap capture support pcap: - interface: eth0