--- /dev/null
+/* Copyright (C) 2021 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \ingroup dpdk
+ *
+ * @{
+ */
+
+/**
+ * \file
+ *
+ * \author Lukas Sismis <lukas.sismis@gmail.com>
+ *
+ * DPDK runmode
+ *
+ */
+
+#include "suricata-common.h"
+#include "runmodes.h"
+#include "runmode-dpdk.h"
+#include "source-dpdk.h"
+#include "util-runmodes.h"
+#include "util-byte.h"
+#include "util-cpu.h"
+#include "util-dpdk.h"
+
+#ifdef HAVE_DPDK
+
+#define RSS_HKEY_LEN 40
+// General purpose RSS key for symmetric bidirectional flow distribution
+uint8_t rss_hkey[] = { 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D,
+ 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D,
+ 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A };
+
+// Calculates the closest multiple of y from x
+#define ROUNDUP(x, y) ((((x) + ((y)-1)) / (y)) * (y))
+
+/* Maximum DPDK EAL parameters count. */
+#define EAL_ARGS 48
+
+struct Arguments {
+ uint16_t capacity;
+ char **argv;
+ uint16_t argc;
+};
+
+static char *AllocArgument(size_t arg_len);
+static char *AllocAndSetArgument(const char *arg);
+static char *AllocAndSetOption(const char *arg);
+
+static void ArgumentsInit(struct Arguments *args, unsigned capacity);
+static void ArgumentsCleanup(struct Arguments *args);
+static void ArgumentsAdd(struct Arguments *args, char *value);
+static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg);
+static void InitEal(void);
+
+static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str);
+static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str);
+static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues);
+static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues);
+static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int);
+static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str);
+static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int);
+static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int);
+static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int);
+static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool);
+static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool);
+static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool);
+static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool);
+static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str);
+static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str);
+static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode);
+static void ConfigInit(DPDKIfaceConfig **iconf);
+static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface);
+static DPDKIfaceConfig *ConfigParse(const char *iface);
+
+static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
+ const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf);
+static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info,
+ const struct rte_eth_conf *port_conf);
+static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf);
+static int DeviceConfigureIPS(DPDKIfaceConfig *iconf);
+static int DeviceConfigure(DPDKIfaceConfig *iconf);
+static void *ParseDpdkConfigAndConfigureDevice(const char *iface);
+static void DPDKDerefConfig(void *conf);
+
+#define DPDK_CONFIG_DEFAULT_THREADS "auto"
+#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535
+#define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto"
+#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024
+#define DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS 1024
+#define DPDK_CONFIG_DEFAULT_MTU 1500
+#define DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE 1
+#define DPDK_CONFIG_DEFAULT_MULTICAST_MODE 1
+#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION 1
+#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD 1
+#define DPDK_CONFIG_DEFAULT_COPY_MODE "none"
+#define DPDK_CONFIG_DEFAULT_COPY_INTERFACE "none"
+
+DPDKIfaceConfigAttributes dpdk_yaml = {
+ .threads = "threads",
+ .promisc = "promisc",
+ .multicast = "multicast",
+ .checksum_checks = "checksum-checks",
+ .checksum_checks_offload = "checksum-checks-offload",
+ .mtu = "mtu",
+ .mempool_size = "mempool-size",
+ .mempool_cache_size = "mempool-cache-size",
+ .rx_descriptors = "rx-descriptors",
+ .tx_descriptors = "tx-descriptors",
+ .copy_mode = "copy-mode",
+ .copy_iface = "copy-iface",
+};
+
+static int GreatestDivisorUpTo(uint32_t num, uint32_t max_num)
+{
+ for (int i = max_num; i >= 2; i--) {
+ if (num % i == 0) {
+ return i;
+ }
+ }
+ return 1;
+}
+
+static char *AllocArgument(size_t arg_len)
+{
+ SCEnter();
+ char *ptr;
+
+ arg_len += 1; // null character
+ ptr = (char *)SCCalloc(arg_len, sizeof(char));
+ if (ptr == NULL)
+ FatalError(SC_ERR_MEM_ALLOC, "Could not allocate memory for an argument");
+
+ SCReturnPtr(ptr, "char *");
+}
+
+/**
+ * Allocates space for length of the given string and then copies contents
+ * @param arg String to set to the newly allocated space
+ * @return memory address if no error otherwise NULL (with errno set)
+ */
+static char *AllocAndSetArgument(const char *arg)
+{
+ SCEnter();
+ if (arg == NULL)
+ FatalError(SC_ERR_DPDK_CONF, "Passed argument is NULL in DPDK config initialization");
+
+ char *ptr;
+ size_t arg_len = strlen(arg);
+
+ ptr = AllocArgument(arg_len);
+ strlcpy(ptr, arg, arg_len + 1);
+ SCReturnPtr(ptr, "char *");
+}
+
+static char *AllocAndSetOption(const char *arg)
+{
+ SCEnter();
+ if (arg == NULL)
+ FatalError(SC_ERR_DPDK_CONF, "Passed option is NULL in DPDK config initialization");
+
+ char *ptr = NULL;
+ size_t arg_len = strlen(arg);
+ uint8_t is_long_arg = arg_len > 1;
+ const char *dash_prefix = is_long_arg ? "--" : "-";
+ size_t full_len = arg_len + strlen(dash_prefix);
+
+ ptr = AllocArgument(full_len);
+ strlcpy(ptr, dash_prefix, strlen(dash_prefix) + 1);
+ strlcat(ptr, arg, full_len + 1);
+ SCReturnPtr(ptr, "char *");
+}
+
+static void ArgumentsInit(struct Arguments *args, unsigned capacity)
+{
+ SCEnter();
+ args->argv = SCCalloc(capacity, sizeof(args->argv));
+ if (args->argv == NULL)
+ FatalError(SC_ERR_MEM_ALLOC, "Could not allocate memory for Arguments structure");
+
+ args->capacity = capacity;
+ args->argc = 0;
+ SCReturn;
+}
+
+static void ArgumentsCleanup(struct Arguments *args)
+{
+ SCEnter();
+ for (int i = 0; i < args->argc; i++) {
+ if (args->argv[i] != NULL) {
+ SCFree(args->argv[i]);
+ args->argv[i] = NULL;
+ }
+ }
+
+ SCFree(args->argv);
+ args->argv = NULL;
+ args->argc = 0;
+ args->capacity = 0;
+}
+
+static void ArgumentsAdd(struct Arguments *args, char *value)
+{
+ SCEnter();
+ if (args->argc + 1 > args->capacity)
+ FatalError(SC_ERR_DPDK_EAL_INIT, "No capacity for more arguments (Max: %" PRIu32 ")",
+ EAL_ARGS);
+
+ args->argv[args->argc++] = value;
+ SCReturn;
+}
+
+static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg)
+{
+ SCEnter();
+ char *option;
+ char *argument;
+
+ option = AllocAndSetOption(opt);
+ ArgumentsAdd(args, option);
+
+ // Empty argument could mean option only (e.g. --no-huge)
+ if (arg == NULL || arg[0] == '\0')
+ SCReturn;
+
+ argument = AllocAndSetArgument(arg);
+ ArgumentsAdd(args, argument);
+ SCReturn;
+}
+
+static void InitEal()
+{
+ SCEnter();
+ int retval;
+ ConfNode *param;
+ const ConfNode *eal_params = ConfGetNode("dpdk.eal-params");
+ struct Arguments args;
+ char **eal_argv;
+
+ if (eal_params == NULL) {
+ FatalError(SC_ERR_DPDK_CONF, "DPDK EAL parameters not found in the config");
+ }
+
+ ArgumentsInit(&args, EAL_ARGS);
+ ArgumentsAdd(&args, AllocAndSetArgument("suricata"));
+
+ TAILQ_FOREACH (param, &eal_params->head, next) {
+ ArgumentsAddOptionAndArgument(&args, param->name, param->val);
+ }
+
+ // creating a shallow copy for cleanup because rte_eal_init changes array contents
+ eal_argv = SCMalloc(args.argc * sizeof(args.argv));
+ if (eal_argv == NULL) {
+ FatalError(
+ SC_ERR_MEM_ALLOC, "Failed to allocate memory for the array of DPDK EAL arguments");
+ }
+ memcpy(eal_argv, args.argv, args.argc * sizeof(*args.argv));
+
+ rte_log_set_global_level(RTE_LOG_WARNING);
+ retval = rte_eal_init(args.argc, eal_argv);
+
+ ArgumentsCleanup(&args);
+ SCFree(eal_argv);
+
+ if (retval < 0) { // retval binded to the result of rte_eal_init
+ FatalError(
+ SC_ERR_DPDK_EAL_INIT, "DPDK EAL initialization error: %s", rte_strerror(-retval));
+ }
+}
+
+static void DPDKDerefConfig(void *conf)
+{
+ SCEnter();
+ DPDKIfaceConfig *iconf = (DPDKIfaceConfig *)conf;
+
+ if (SC_ATOMIC_SUB(iconf->ref, 1) == 1) {
+ if (iconf->pkt_mempool != NULL) {
+ rte_mempool_free(iconf->pkt_mempool);
+ }
+
+ SCFree(iconf);
+ }
+ SCReturn;
+}
+
+static void ConfigInit(DPDKIfaceConfig **iconf)
+{
+ SCEnter();
+ DPDKIfaceConfig *ptr = NULL;
+ ptr = SCCalloc(1, sizeof(DPDKIfaceConfig));
+ if (ptr == NULL)
+ FatalError(SC_ERR_DPDK_CONF, "Could not allocate memory for DPDKIfaceConfig");
+
+ ptr->pkt_mempool = NULL;
+ ptr->out_port_id = -1; // make sure no port is set
+ SC_ATOMIC_INIT(ptr->ref);
+ (void)SC_ATOMIC_ADD(ptr->ref, 1);
+ ptr->DerefFunc = DPDKDerefConfig;
+ ptr->flags = 0;
+
+ *iconf = ptr;
+ SCReturn;
+}
+
+static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str)
+{
+ SCEnter();
+ int retval;
+
+ if (entry_str == NULL || entry_str[0] == '\0')
+ FatalError(SC_ERR_INVALID_VALUE, "Interface name in DPDK config is NULL or empty");
+
+ retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->port_id);
+ if (retval < 0)
+ FatalError(SC_ERR_DPDK_CONF, "Interface \"%s\": %s", entry_str, rte_strerror(-retval));
+
+ strlcpy(iconf->iface, entry_str, sizeof(iconf->iface));
+ SCReturn;
+}
+
+static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
+{
+ SCEnter();
+ const char *active_runmode = RunmodeGetActive();
+
+ if (active_runmode && !strcmp("single", active_runmode)) {
+ iconf->threads = 1;
+ SCReturnInt(0);
+ }
+
+ if (entry_str == NULL) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Number of threads for interface \"%s\" not specified",
+ iconf->iface);
+ SCReturnInt(-EINVAL);
+ }
+
+ if (strcmp(entry_str, "auto") == 0) {
+ iconf->threads = (int)UtilCpuGetNumProcessorsOnline();
+ SCLogPerf("%u cores, so using %u threads", iconf->threads, iconf->threads);
+ SCReturnInt(0);
+ }
+
+ if (StringParseInt32(&iconf->threads, 10, 0, entry_str) < 0) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Threads entry for interface %s contain non-numerical characters - \"%s\"",
+ iconf->iface, entry_str);
+ SCReturnInt(-EINVAL);
+ }
+
+ if (iconf->threads < 0) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Interface %s has a negative number of threads",
+ iconf->iface);
+ SCReturnInt(-ERANGE);
+ }
+
+ SCReturnInt(0);
+}
+
+static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
+{
+ SCEnter();
+ iconf->nb_rx_queues = nb_queues;
+ if (iconf->nb_rx_queues < 1) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Interface %s requires to have positive number of RX queues", iconf->iface);
+ SCReturnInt(-ERANGE);
+ }
+
+ SCReturnInt(0);
+}
+
+static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
+{
+ SCEnter();
+ iconf->nb_tx_queues = nb_queues;
+ if (iconf->nb_tx_queues < 1) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Interface %s requires to have positive number of TX queues", iconf->iface);
+ SCReturnInt(-ERANGE);
+ }
+
+ SCReturnInt(0);
+}
+
+static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int)
+{
+ SCEnter();
+ if (entry_int <= 0) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Interface %s requires to have positive memory pool size",
+ iconf->iface);
+ SCReturnInt(-ERANGE);
+ }
+
+ iconf->mempool_size = entry_int;
+ SCReturnInt(0);
+}
+
+static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str)
+{
+ SCEnter();
+ if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "auto") == 0) {
+ // calculate the mempool size based on the mempool size (it needs to be already filled in)
+ // It is advised to have mempool cache size lower or equal to:
+ // RTE_MEMPOOL_CACHE_MAX_SIZE (by default 512) and "mempool-size / 1.5"
+ // and at the same time "mempool-size modulo cache_size == 0".
+ if (iconf->mempool_size == 0) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Cannot calculate mempool cache size of a mempool with size %d",
+ iconf->mempool_size);
+ SCReturnInt(-EINVAL);
+ }
+
+ uint32_t max_cache_size = MAX(RTE_MEMPOOL_CACHE_MAX_SIZE, iconf->mempool_size / 1.5);
+ iconf->mempool_cache_size = GreatestDivisorUpTo(iconf->mempool_size, max_cache_size);
+ SCReturnInt(0);
+ }
+
+ if (StringParseUint32(&iconf->mempool_cache_size, 10, 0, entry_str) < 0) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Mempool cache size entry for interface %s contain non-numerical "
+ "characters - \"%s\"",
+ iconf->iface, entry_str);
+ SCReturnInt(-EINVAL);
+ }
+
+ if (iconf->mempool_cache_size <= 0 || iconf->mempool_cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Interface %s requires to have positive memory pool size and be less than %" PRIu32,
+ iconf->iface, RTE_MEMPOOL_CACHE_MAX_SIZE);
+ SCReturnInt(-ERANGE);
+ }
+
+ SCReturnInt(0);
+}
+
+static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int)
+{
+ SCEnter();
+ if (entry_int <= 0) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Interface %s requires to have positive number of RX descriptors", iconf->iface);
+ SCReturnInt(-ERANGE);
+ }
+
+ iconf->nb_rx_desc = entry_int;
+ SCReturnInt(0);
+}
+
+static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int)
+{
+ SCEnter();
+ if (entry_int <= 0) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Interface %s requires to have positive number of TX descriptors", iconf->iface);
+ SCReturnInt(-ERANGE);
+ }
+
+ iconf->nb_tx_desc = entry_int;
+ SCReturnInt(0);
+}
+
+static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int)
+{
+ SCEnter();
+ if (entry_int < RTE_ETHER_MIN_MTU || entry_int > RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Interface %s requires to have size of MTU between %" PRIu32 " and %" PRIu32,
+ iconf->iface, RTE_ETHER_MIN_MTU, RTE_ETHER_MAX_JUMBO_FRAME_LEN);
+ SCReturnInt(-ERANGE);
+ }
+
+ iconf->mtu = entry_int;
+ SCReturnInt(0);
+}
+
+static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool)
+{
+ SCEnter();
+ if (entry_bool)
+ iconf->flags |= DPDK_PROMISC;
+
+ SCReturnBool(true);
+}
+
+static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool)
+{
+ SCEnter();
+ if (entry_bool)
+ iconf->flags |= DPDK_MULTICAST; // enable
+
+ SCReturnBool(true);
+}
+
+static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool)
+{
+ SCEnter();
+ if (entry_bool)
+ iconf->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
+
+ SCReturnInt(0);
+}
+
+static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool)
+{
+ SCEnter();
+ if (entry_bool)
+ iconf->flags |= DPDK_RX_CHECKSUM_OFFLOAD;
+
+ SCReturnInt(0);
+}
+
+static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str)
+{
+ SCEnter();
+ int retval;
+
+ if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "none") == 0) {
+ iconf->out_iface = NULL;
+ SCReturnInt(0);
+ }
+
+ retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->out_port_id);
+ if (retval < 0) {
+ SCLogWarning(SC_ERR_DPDK_CONF,
+ "Name of the copy interface (%s) for the interface %s is not valid, changing to %s",
+ entry_str, iconf->iface, DPDK_CONFIG_DEFAULT_COPY_INTERFACE);
+ iconf->out_iface = DPDK_CONFIG_DEFAULT_COPY_INTERFACE;
+ }
+
+ iconf->out_iface = entry_str;
+ SCReturnInt(0);
+}
+
+static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str)
+{
+ SCEnter();
+ if (entry_str == NULL) {
+ SCLogWarning(SC_ERR_INVALID_VALUE,
+ "Interface %s has no copy mode specified, changing to %s ", iconf->iface,
+ DPDK_CONFIG_DEFAULT_COPY_MODE);
+ entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE;
+ }
+
+ if (strcmp(entry_str, "none") != 0 && strcmp(entry_str, "tap") != 0 &&
+ strcmp(entry_str, "ips") != 0) {
+ SCLogWarning(SC_ERR_INVALID_VALUE,
+ "Copy mode \"%s\" is not one of the possible values (none|tap|ips) for interface "
+ "%s. Changing to %s",
+ entry_str, iconf->iface, DPDK_CONFIG_DEFAULT_COPY_MODE);
+ entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE;
+ }
+
+ if (strcmp(entry_str, "none") == 0) {
+ iconf->copy_mode = DPDK_COPY_MODE_NONE;
+ } else if (strcmp(entry_str, "tap") == 0) {
+ iconf->copy_mode = DPDK_COPY_MODE_TAP;
+ } else if (strcmp(entry_str, "ips") == 0) {
+ iconf->copy_mode = DPDK_COPY_MODE_IPS;
+ }
+
+ SCReturnInt(0);
+}
+
+static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode)
+{
+ SCEnter();
+ int retval;
+
+ retval = ConfigSetCopyIface(iconf, iface);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfigSetCopyMode(iconf, mode);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ if (iconf->copy_mode == DPDK_COPY_MODE_NONE) {
+ if (iconf->out_iface != NULL)
+ iconf->out_iface = NULL;
+ SCReturnInt(0);
+ }
+
+ if (iconf->out_iface == NULL || strlen(iconf->out_iface) <= 0) {
+ SCLogError(SC_ERR_DPDK_CONF, "Copy mode enabled but interface not set");
+ SCReturnInt(-EINVAL);
+ }
+
+ if (iconf->copy_mode == DPDK_COPY_MODE_IPS)
+ SCLogInfo("DPDK IPS mode activated between %s and %s", iconf->iface, iconf->out_iface);
+ else if (iconf->copy_mode == DPDK_COPY_MODE_TAP)
+ SCLogInfo("DPDK IPS mode activated between %s and %s", iconf->iface, iconf->out_iface);
+
+ SCReturnInt(0);
+}
+
+static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface)
+{
+ SCEnter();
+ int retval;
+ ConfNode *if_root;
+ ConfNode *if_default;
+ const char *entry_str = NULL;
+ intmax_t entry_int = 0;
+ int entry_bool = 0;
+ const char *copy_iface_str = NULL;
+ const char *copy_mode_str = NULL;
+
+ ConfigSetIface(iconf, iface);
+
+ retval = ConfSetRootAndDefaultNodes("dpdk.interfaces", iconf->iface, &if_root, &if_default);
+ if (retval < 0) {
+ FatalError(SC_ERR_DPDK_CONF, "failed to find DPDK configuration for the interface %s",
+ iconf->iface);
+ }
+
+ retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.threads, &entry_str) != 1
+ ? ConfigSetThreads(iconf, DPDK_CONFIG_DEFAULT_THREADS)
+ : ConfigSetThreads(iconf, entry_str);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
+ retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
+ retval = ConfigSetTxQueues(iconf, (uint16_t)iconf->threads);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfGetChildValueIntWithDefault(
+ if_root, if_default, dpdk_yaml.mempool_size, &entry_int) != 1
+ ? ConfigSetMempoolSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE)
+ : ConfigSetMempoolSize(iconf, entry_int);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfGetChildValueWithDefault(
+ if_root, if_default, dpdk_yaml.mempool_cache_size, &entry_str) != 1
+ ? ConfigSetMempoolCacheSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE)
+ : ConfigSetMempoolCacheSize(iconf, entry_str);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfGetChildValueIntWithDefault(
+ if_root, if_default, dpdk_yaml.rx_descriptors, &entry_int) != 1
+ ? ConfigSetRxDescriptors(iconf, DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS)
+ : ConfigSetRxDescriptors(iconf, entry_int);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfGetChildValueIntWithDefault(
+ if_root, if_default, dpdk_yaml.tx_descriptors, &entry_int) != 1
+ ? ConfigSetTxDescriptors(iconf, DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS)
+ : ConfigSetTxDescriptors(iconf, entry_int);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfGetChildValueIntWithDefault(if_root, if_default, dpdk_yaml.mtu, &entry_int) != 1
+ ? ConfigSetMtu(iconf, DPDK_CONFIG_DEFAULT_MTU)
+ : ConfigSetMtu(iconf, entry_int);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfGetChildValueBoolWithDefault(
+ if_root, if_default, dpdk_yaml.promisc, &entry_bool) != 1
+ ? ConfigSetPromiscuousMode(iconf, DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE)
+ : ConfigSetPromiscuousMode(iconf, entry_bool);
+ if (retval != true)
+ SCReturnInt(-EINVAL);
+
+ retval = ConfGetChildValueBoolWithDefault(
+ if_root, if_default, dpdk_yaml.multicast, &entry_bool) != 1
+ ? ConfigSetMulticast(iconf, DPDK_CONFIG_DEFAULT_MULTICAST_MODE)
+ : ConfigSetMulticast(iconf, entry_bool);
+ if (retval != true)
+ SCReturnInt(-EINVAL);
+
+ retval = ConfGetChildValueBoolWithDefault(
+ if_root, if_default, dpdk_yaml.checksum_checks, &entry_bool) != 1
+ ? ConfigSetChecksumChecks(iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION)
+ : ConfigSetChecksumChecks(iconf, entry_bool);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval = ConfGetChildValueBoolWithDefault(
+ if_root, if_default, dpdk_yaml.checksum_checks_offload, &entry_bool) != 1
+ ? ConfigSetChecksumOffload(
+ iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD)
+ : ConfigSetChecksumOffload(iconf, entry_bool);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ retval =
+ ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.copy_mode, ©_mode_str) |
+ ConfGetChildValueWithDefault(
+ if_root, if_default, dpdk_yaml.copy_iface, ©_iface_str);
+ // if one of copy interface settings fail to load then the default values are set
+ retval = retval != 1 ? ConfigSetCopyIfaceSettings(iconf, DPDK_CONFIG_DEFAULT_COPY_INTERFACE,
+ DPDK_CONFIG_DEFAULT_COPY_MODE)
+ : ConfigSetCopyIfaceSettings(iconf, copy_iface_str, copy_mode_str);
+ if (retval < 0)
+ SCReturnInt(retval);
+
+ SCReturnInt(0);
+}
+
+static DPDKIfaceConfig *ConfigParse(const char *iface)
+{
+ SCEnter();
+ int retval;
+ DPDKIfaceConfig *iconf = NULL;
+ if (iface == NULL)
+ FatalError(SC_ERR_DPDK_CONF, "DPDK interface is NULL");
+
+ ConfigInit(&iconf);
+ retval = ConfigLoad(iconf, iface);
+ if (retval < 0) {
+ iconf->DerefFunc(iconf);
+ SCReturnPtr(NULL, "void *");
+ }
+
+ SCReturnPtr(iconf, "DPDKIfaceConfig *");
+}
+
+static void DeviceSetPMDSpecificRSS(struct rte_eth_rss_conf *rss_conf, const char *driver_name)
+{
+}
+
+static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
+ const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf)
+{
+ *port_conf = (struct rte_eth_conf){
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_NONE,
+ .max_rx_pkt_len = iconf->mtu,
+ .offloads = 0, // turn every offload off to prevent any packet modification
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ .offloads = 0,
+ },
+ };
+
+ // configure RX offloads
+ if (dev_info->rx_offload_capa & DEV_RX_OFFLOAD_RSS_HASH) {
+ if (iconf->nb_rx_queues > 1) {
+ SCLogConfig("RSS enabled on %s for %d queues", iconf->iface, iconf->nb_rx_queues);
+ port_conf->rx_adv_conf.rss_conf = (struct rte_eth_rss_conf){
+ .rss_key = rss_hkey,
+ .rss_key_len = RSS_HKEY_LEN,
+ .rss_hf = ETH_RSS_IP,
+ };
+
+ DeviceSetPMDSpecificRSS(&port_conf->rx_adv_conf.rss_conf, dev_info->driver_name);
+
+ uint64_t rss_hf_tmp =
+ port_conf->rx_adv_conf.rss_conf.rss_hf & dev_info->flow_type_rss_offloads;
+ if (port_conf->rx_adv_conf.rss_conf.rss_hf != rss_hf_tmp) {
+ SCLogWarning(SC_WARN_DPDK_CONF,
+ "Interface %s modified RSS hash function based on hardware support, "
+ "requested:%#" PRIx64 " configured:%#" PRIx64,
+ iconf->iface, port_conf->rx_adv_conf.rss_conf.rss_hf, rss_hf_tmp);
+ port_conf->rx_adv_conf.rss_conf.rss_hf = rss_hf_tmp;
+ }
+ port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS;
+ } else {
+ SCLogConfig("RSS not enabled on %s", iconf->iface);
+ port_conf->rx_adv_conf.rss_conf.rss_key = NULL;
+ port_conf->rx_adv_conf.rss_conf.rss_hf = 0;
+ }
+ } else {
+ SCLogConfig("RSS not supported on %s", iconf->iface);
+ }
+
+ if (iconf->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
+ SCLogConfig("Checksum validation disabled on %s", iconf->iface);
+ } else if (dev_info->rx_offload_capa & DEV_RX_OFFLOAD_CHECKSUM) {
+ if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE &&
+ iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD) {
+ SCLogConfig("IP, TCP and UDP checksum validation enabled and offloaded "
+ "on %s",
+ iconf->iface);
+ port_conf->rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM;
+ } else if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE &&
+ !(iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD)) {
+ SCLogConfig("Suricata checksum validation enabled (but can be offloaded on %s)",
+ iconf->iface);
+ }
+ }
+
+ if (iconf->mtu > RTE_ETHER_MAX_LEN) {
+ port_conf->rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+ }
+
+ if (dev_info->tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) {
+ port_conf->txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+ }
+}
+
+static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info,
+ const struct rte_eth_conf *port_conf)
+{
+ SCEnter();
+ int retval;
+ uint16_t mtu_size;
+ uint16_t mbuf_size;
+ struct rte_eth_rxconf rxq_conf;
+ struct rte_eth_txconf txq_conf;
+
+ char mempool_name[64];
+ snprintf(mempool_name, 64, "mempool_%.20s", iconf->iface);
+ // +4 for VLAN header
+ mtu_size = iconf->mtu + RTE_ETHER_CRC_LEN + RTE_ETHER_HDR_LEN + 4;
+ mbuf_size = ROUNDUP(mtu_size, 1024) + RTE_PKTMBUF_HEADROOM;
+ SCLogInfo("Creating a packet mbuf pool %s of size %d, cache size %d, mbuf size %d",
+ mempool_name, iconf->mempool_size, iconf->mempool_cache_size, mbuf_size);
+
+ iconf->pkt_mempool = rte_pktmbuf_pool_create(mempool_name, iconf->mempool_size,
+ iconf->mempool_cache_size, 0, mbuf_size, (int)iconf->socket_id);
+ if (iconf->pkt_mempool == NULL) {
+ retval = -rte_errno;
+ SCLogError(SC_ERR_DPDK_INIT,
+ "Error (err=%d) during rte_pktmbuf_pool_create (mempool: %s) - %s", rte_errno,
+ mempool_name, rte_strerror(rte_errno));
+ SCReturnInt(retval);
+ }
+
+ for (uint16_t queue_id = 0; queue_id < iconf->nb_rx_queues; queue_id++) {
+ rxq_conf = dev_info->default_rxconf;
+ rxq_conf.offloads = port_conf->rxmode.offloads;
+ rxq_conf.rx_thresh.hthresh = 0;
+ rxq_conf.rx_thresh.pthresh = 0;
+ rxq_conf.rx_thresh.wthresh = 0;
+ rxq_conf.rx_free_thresh = 0;
+ rxq_conf.rx_drop_en = 0;
+ SCLogPerf(
+ "Creating Q %d of P %d using desc RX: %d TX: %d RX htresh: %d RX pthresh %d wtresh "
+ "%d free_tresh %d drop_en %d Offloads %lu",
+ queue_id, iconf->port_id, iconf->nb_rx_desc, iconf->nb_tx_desc,
+ rxq_conf.rx_thresh.hthresh, rxq_conf.rx_thresh.pthresh, rxq_conf.rx_thresh.wthresh,
+ rxq_conf.rx_free_thresh, rxq_conf.rx_drop_en, rxq_conf.offloads);
+
+ retval = rte_eth_rx_queue_setup(iconf->port_id, queue_id, iconf->nb_rx_desc,
+ iconf->socket_id, &rxq_conf, iconf->pkt_mempool);
+ if (retval < 0) {
+ rte_mempool_free(iconf->pkt_mempool);
+ SCLogError(SC_ERR_DPDK_INIT,
+ "Error (err=%d) during initialization of device queue %u of port %u", retval,
+ queue_id, iconf->port_id);
+ SCReturnInt(retval);
+ }
+ }
+
+ for (uint16_t queue_id = 0; queue_id < iconf->nb_tx_queues; queue_id++) {
+ txq_conf = dev_info->default_txconf;
+ txq_conf.offloads = port_conf->txmode.offloads;
+ SCLogPerf("Creating TX queue %d on port %d", queue_id, iconf->port_id);
+ retval = rte_eth_tx_queue_setup(
+ iconf->port_id, queue_id, iconf->nb_tx_desc, iconf->socket_id, &txq_conf);
+ if (retval < 0) {
+ rte_mempool_free(iconf->pkt_mempool);
+ SCLogError(SC_ERR_DPDK_INIT,
+ "Error (err=%d) during initialization of device queue %u of port %u", retval,
+ queue_id, iconf->port_id);
+ SCReturnInt(retval);
+ }
+ }
+
+ SCReturnInt(0);
+}
+
+static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf)
+{
+ SCEnter();
+ int retval;
+ DPDKIfaceConfig *out_iconf = NULL;
+ ConfigInit(&out_iconf);
+ if (out_iconf == NULL) {
+ FatalError(
+ SC_ERR_DPDK_CONF, "Copy interface of the interface \"%s\" is NULL", iconf->iface);
+ }
+
+ retval = ConfigLoad(out_iconf, iconf->out_iface);
+ if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_CONF, "Fail to load config of interface %s", iconf->out_iface);
+ out_iconf->DerefFunc(out_iconf);
+ SCReturnInt(-EINVAL);
+ }
+
+ if (iconf->nb_rx_queues != out_iconf->nb_tx_queues) {
+ // the other direction is validated when the copy interface is configured
+ SCLogError(SC_ERR_DPDK_CONF,
+ "Interface %s has configured %d RX queues but copy interface %s has %d TX queues"
+ " - number of queues must be equal",
+ iconf->iface, iconf->nb_rx_queues, out_iconf->iface, out_iconf->nb_tx_queues);
+ out_iconf->DerefFunc(out_iconf);
+ SCReturnInt(-EINVAL);
+ } else if (iconf->mtu != out_iconf->mtu) {
+ SCLogError(SC_ERR_DPDK_CONF,
+ "Interface %s has configured MTU of %dB but copy interface %s has MTU set to %dB"
+ " - MTU must be equal",
+ iconf->iface, iconf->mtu, out_iconf->iface, out_iconf->mtu);
+ out_iconf->DerefFunc(out_iconf);
+ SCReturnInt(-EINVAL);
+ } else if (iconf->copy_mode != out_iconf->copy_mode) {
+ SCLogError(SC_ERR_DPDK_CONF, "Copy modes of interfaces %s and %s are not equal",
+ iconf->iface, out_iconf->iface);
+ out_iconf->DerefFunc(out_iconf);
+ SCReturnInt(-EINVAL);
+ } else if (strcmp(iconf->iface, out_iconf->out_iface) != 0) {
+ // check if the other iface has the current iface set as a copy iface
+ SCLogError(SC_ERR_DPDK_CONF, "Copy interface of %s is not set to %s", out_iconf->iface,
+ iconf->iface);
+ out_iconf->DerefFunc(out_iconf);
+ SCReturnInt(-EINVAL);
+ }
+
+ out_iconf->DerefFunc(out_iconf);
+ SCReturnInt(0);
+}
+
+static int DeviceConfigureIPS(DPDKIfaceConfig *iconf)
+{
+ SCEnter();
+ int retval;
+
+ if (iconf->out_iface != NULL) {
+ retval = rte_eth_dev_get_port_by_name(iconf->out_iface, &iconf->out_port_id);
+ if (retval != 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during obtaining port id of %s", retval,
+ iconf->out_iface);
+ SCReturnInt(retval);
+ }
+
+ if (rte_eth_dev_socket_id(iconf->port_id) != rte_eth_dev_socket_id(iconf->out_port_id)) {
+ SCLogWarning(SC_WARN_DPDK_CONF, "%s and %s are not on the same NUMA node", iconf->iface,
+ iconf->out_iface);
+ }
+
+ retval = DeviceValidateOutIfaceConfig(iconf);
+ if (retval != 0) {
+ // Error will be written out by the validation function
+ SCReturnInt(retval);
+ }
+ }
+ SCReturnInt(0);
+}
+
+static int DeviceConfigure(DPDKIfaceConfig *iconf)
+{
+ SCEnter();
+ // configure device
+ int retval;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_conf port_conf;
+
+ retval = rte_eth_dev_get_port_by_name(iconf->iface, &(iconf->port_id));
+ if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when getting port id of %s Is device enabled?",
+ retval, iconf->iface);
+ SCReturnInt(retval);
+ }
+
+ if (!rte_eth_dev_is_valid_port(iconf->port_id)) {
+ SCLogError(SC_ERR_DPDK_INIT, "Specified port %d is invalid", iconf->port_id);
+ SCReturnInt(retval);
+ }
+
+ retval = rte_eth_dev_socket_id(iconf->port_id);
+ if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) invalid socket id (port %s)", retval,
+ iconf->iface);
+ SCReturnInt(retval);
+ } else {
+ iconf->socket_id = retval;
+ }
+
+ retval = rte_eth_dev_info_get(iconf->port_id, &dev_info);
+ if (retval != 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during getting device info (port %s)", retval,
+ iconf->iface);
+ SCReturnInt(retval);
+ }
+
+ if (iconf->nb_rx_queues > dev_info.max_rx_queues) {
+ SCLogError(SC_ERR_DPDK_INIT,
+ "Number of configured RX queues of %s is higher than maximum allowed (%" PRIu16 ")",
+ iconf->iface, dev_info.max_rx_queues);
+ SCReturnInt(-ERANGE);
+ }
+
+ if (iconf->nb_tx_queues > dev_info.max_tx_queues) {
+ SCLogError(SC_ERR_DPDK_INIT,
+ "Number of configured TX queues of %s is higher than maximum allowed (%" PRIu16 ")",
+ iconf->iface, dev_info.max_tx_queues);
+ SCReturnInt(-ERANGE);
+ }
+
+ if (iconf->mtu > dev_info.max_mtu || iconf->mtu < dev_info.min_mtu) {
+ SCLogError(SC_ERR_DPDK_INIT,
+ "Loaded MTU of \"%s\" is out of bounds. "
+ "Min MTU: %" PRIu16 " Max MTU: %" PRIu16,
+ iconf->iface, dev_info.min_mtu, dev_info.max_mtu);
+ SCReturnInt(-ERANGE);
+ }
+
+ // check if jumbo frames are set and are available
+ if (iconf->mtu > RTE_ETHER_MAX_LEN &&
+ !(dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)) {
+ SCLogError(SC_ERR_DPDK_CONF,
+ "Jumbo frames not supported, "
+ "set MTU of \"%s\" to 1500B",
+ iconf->iface);
+ SCReturnInt(-EINVAL);
+ }
+
+ DeviceInitPortConf(iconf, &dev_info, &port_conf);
+ if (port_conf.rxmode.offloads & DEV_RX_OFFLOAD_CHECKSUM) {
+ // Suricata does not need recalc checksums now
+ iconf->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
+ }
+
+ retval = rte_eth_dev_configure(
+ iconf->port_id, iconf->nb_rx_queues, iconf->nb_tx_queues, &port_conf);
+ if (retval != 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during configuring the device (port %u)",
+ retval, iconf->port_id);
+ SCReturnInt(retval);
+ }
+
+ retval = rte_eth_dev_adjust_nb_rx_tx_desc(
+ iconf->port_id, &iconf->nb_rx_desc, &iconf->nb_tx_desc);
+ if (retval != 0) {
+ SCLogError(SC_ERR_DPDK_INIT,
+ "Error (err=%d) during adjustment of device queues descriptors (port %u)", retval,
+ iconf->port_id);
+ SCReturnInt(retval);
+ }
+
+ retval = iconf->flags & DPDK_MULTICAST ? rte_eth_allmulticast_enable(iconf->port_id)
+ : rte_eth_allmulticast_disable(iconf->port_id);
+ if (retval == -ENOTSUP) {
+ retval = rte_eth_allmulticast_get(iconf->port_id);
+ // when multicast is enabled but set to disable or vice versa
+ if ((retval == 1 && !(iconf->flags & DPDK_MULTICAST)) ||
+ (retval == 0 && (iconf->flags & DPDK_MULTICAST))) {
+ SCLogError(SC_ERR_DPDK_CONF,
+ "Allmulticast setting of port (%" PRIu16
+ ") can not be configured. Set it to %s",
+ iconf->port_id, retval == 1 ? "true" : "false");
+ } else if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to get multicast mode on port %u",
+ retval, iconf->port_id);
+ SCReturnInt(retval);
+ }
+
+ if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to get multicast mode on port %u",
+ retval, iconf->port_id);
+ SCReturnInt(retval);
+ }
+ } else if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when en/disabling multicast on port %u",
+ retval, iconf->port_id);
+ SCReturnInt(retval);
+ }
+
+ retval = iconf->flags & DPDK_PROMISC ? rte_eth_promiscuous_enable(iconf->port_id)
+ : rte_eth_promiscuous_disable(iconf->port_id);
+ if (retval == -ENOTSUP) {
+ retval = rte_eth_promiscuous_get(iconf->port_id);
+ if ((retval == 1 && !(iconf->flags & DPDK_PROMISC)) ||
+ (retval == 0 && (iconf->flags & DPDK_PROMISC))) {
+ SCLogError(SC_ERR_DPDK_CONF,
+ "Promiscuous setting of port (%" PRIu16 ") can not be configured. Set it to %s",
+ iconf->port_id, retval == 1 ? "true" : "false");
+ SCReturnInt(TM_ECODE_FAILED);
+ } else if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to get promiscuous mode on port %u",
+ retval, iconf->port_id);
+ SCReturnInt(retval);
+ }
+ } else if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when enabling promiscuous mode on port %u",
+ retval, iconf->port_id);
+ SCReturnInt(TM_ECODE_FAILED);
+ }
+
+ // set maximum transmission unit
+ SCLogConfig("Setting MTU of %s to %dB", iconf->iface, iconf->mtu);
+ retval = rte_eth_dev_set_mtu(iconf->port_id, iconf->mtu);
+ if (retval == -ENOTSUP) {
+ SCLogWarning(SC_WARN_DPDK_CONF,
+ "Changing MTU on port %u is not supported, ignoring the setting...",
+ iconf->port_id);
+ // if it is not possible to set the MTU, retrieve it
+ retval = rte_eth_dev_get_mtu(iconf->port_id, &iconf->mtu);
+ if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) Unable to retrieve MTU from port %u",
+ retval, iconf->port_id);
+ SCReturnInt(retval);
+ }
+ } else if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when setting MTU to %u on port %u", retval,
+ iconf->mtu, iconf->port_id);
+ SCReturnInt(retval);
+ }
+
+ retval = DeviceConfigureQueues(iconf, &dev_info, &port_conf);
+ if (retval < 0) {
+ SCReturnInt(retval);
+ }
+
+ retval = DeviceConfigureIPS(iconf);
+ if (retval < 0) {
+ SCReturnInt(retval);
+ }
+
+ SCReturnInt(0);
+}
+
+static void *ParseDpdkConfigAndConfigureDevice(const char *iface)
+{
+ int retval;
+ DPDKIfaceConfig *iconf = ConfigParse(iface);
+ if (iconf == NULL) {
+ FatalError(SC_ERR_DPDK_CONF, "DPDK configuration could not be parsed");
+ }
+
+ if (DeviceConfigure(iconf) != 0) {
+ iconf->DerefFunc(iconf);
+ retval = rte_eal_cleanup();
+ if (retval != 0)
+ FatalError(SC_ERR_DPDK_EAL_INIT, "EAL cleanup failed: %s", strerror(-retval));
+
+ FatalError(SC_ERR_DPDK_CONF, "Device %s fails to configure", iface);
+ }
+
+ SC_ATOMIC_RESET(iconf->ref);
+ (void)SC_ATOMIC_ADD(iconf->ref, iconf->threads);
+ // This counter is increased by worker threads that individually pick queue IDs.
+ SC_ATOMIC_RESET(iconf->queue_id);
+ return iconf;
+}
+
+/**
+ * \brief extract information from config file
+ *
+ * The returned structure will be freed by the thread init function.
+ * This is thus necessary to or copy the structure before giving it
+ * to thread or to reparse the file for each thread (and thus have
+ * new structure.
+ *
+ * After configuration is loaded, DPDK also configures the device according to the settings.
+ *
+ * \return a DPDKIfaceConfig corresponding to the interface name
+ */
+
+static int DPDKConfigGetThreadsCount(void *conf)
+{
+ if (conf == NULL)
+ FatalError(SC_ERR_DPDK_CONF, "Configuration file is NULL");
+
+ DPDKIfaceConfig *dpdk_conf = (DPDKIfaceConfig *)conf;
+ return dpdk_conf->threads;
+}
+
+#endif /* HAVE_DPDK */
+
+const char *RunModeDpdkGetDefaultMode(void)
+{
+ return "workers";
+}
+
+void RunModeDpdkRegister(void)
+{
+ RunModeRegisterNewRunMode(RUNMODE_DPDK, "workers",
+ "Workers DPDK mode, each thread does all"
+ " tasks from acquisition to logging",
+ RunModeIdsDpdkWorkers);
+}
+
+/**
+ * \brief Workers version of the DPDK processing.
+ *
+ * Start N threads with each thread doing all the work.
+ *
+ */
+int RunModeIdsDpdkWorkers(void)
+{
+ SCEnter();
+#ifdef HAVE_DPDK
+ int ret;
+
+ RunModeInitialize();
+ TimeModeSetLive();
+
+ InitEal();
+ ret = RunModeSetLiveCaptureWorkers(ParseDpdkConfigAndConfigureDevice, DPDKConfigGetThreadsCount,
+ "ReceiveDPDK", "DecodeDPDK", thread_name_workers, NULL);
+ if (ret != 0) {
+ FatalError(SC_ERR_FATAL, "Unable to start runmode");
+ }
+
+ SCLogDebug("RunModeIdsDpdkWorkers initialised");
+
+#endif /* HAVE_DPDK */
+ SCReturnInt(0);
+}
+
+/**
+ * @}
+ */
--- /dev/null
+/* Copyright (C) 2021 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \defgroup dpdk DPDK running mode
+ *
+ * @{
+ */
+
+/**
+ * \file
+ *
+ * \author Lukas Sismis <lukas.sismis@gmail.com>
+ *
+ * DPDK capture interface
+ *
+ */
+
+#include "suricata-common.h"
+#include "runmodes.h"
+#include "source-dpdk.h"
+#include "suricata.h"
+#include "threads.h"
+#include "threadvars.h"
+#include "tm-threads.h"
+#include "tmqh-packetpool.h"
+#include "util-privs.h"
+
+#ifndef HAVE_DPDK
+
+TmEcode NoDPDKSupportExit(ThreadVars *, const void *, void **);
+
+void TmModuleReceiveDPDKRegister(void)
+{
+ tmm_modules[TMM_RECEIVEDPDK].name = "ReceiveDPDK";
+ tmm_modules[TMM_RECEIVEDPDK].ThreadInit = NoDPDKSupportExit;
+ tmm_modules[TMM_RECEIVEDPDK].Func = NULL;
+ tmm_modules[TMM_RECEIVEDPDK].ThreadExitPrintStats = NULL;
+ tmm_modules[TMM_RECEIVEDPDK].ThreadDeinit = NULL;
+ tmm_modules[TMM_RECEIVEDPDK].cap_flags = 0;
+ tmm_modules[TMM_RECEIVEDPDK].flags = TM_FLAG_RECEIVE_TM;
+}
+
+/**
+ * \brief Registration Function for DecodeDPDK.
+ */
+void TmModuleDecodeDPDKRegister(void)
+{
+ tmm_modules[TMM_DECODEDPDK].name = "DecodeDPDK";
+ tmm_modules[TMM_DECODEDPDK].ThreadInit = NoDPDKSupportExit;
+ tmm_modules[TMM_DECODEDPDK].Func = NULL;
+ tmm_modules[TMM_DECODEDPDK].ThreadExitPrintStats = NULL;
+ tmm_modules[TMM_DECODEDPDK].ThreadDeinit = NULL;
+ tmm_modules[TMM_DECODEDPDK].cap_flags = 0;
+ tmm_modules[TMM_DECODEDPDK].flags = TM_FLAG_DECODE_TM;
+}
+
+/**
+ * \brief this function prints an error message and exits.
+ */
+TmEcode NoDPDKSupportExit(ThreadVars *tv, const void *initdata, void **data)
+{
+ FatalError(SC_ERR_NO_DPDK,
+ "Error creating thread %s: you do not have "
+ "support for DPDK enabled, on Linux host please recompile "
+ "with --enable-dpdk",
+ tv->name);
+}
+
+#else /* We have DPDK support */
+
+#include "util-dpdk.h"
+#include <numa.h>
+
+#define BURST_SIZE 32
+
+/**
+ * \brief Structure to hold thread specific variables.
+ */
+typedef struct DPDKThreadVars_ {
+ /* counters */
+ uint64_t pkts;
+ ThreadVars *tv;
+ TmSlot *slot;
+ LiveDevice *livedev;
+ ChecksumValidationMode checksum_mode;
+ /* references to packet and drop counters */
+ uint16_t capture_dpdk_packets;
+ uint16_t capture_dpdk_rx_errs;
+ uint16_t capture_dpdk_tx_errs;
+ unsigned int flags;
+ int threads;
+ /* for IPS */
+ DpdkCopyModeEnum copy_mode;
+ uint16_t out_port_id;
+ /* Entry in the peers_list */
+
+ uint64_t bytes;
+ uint64_t accepted;
+ uint64_t dropped;
+ uint16_t port_id;
+ uint16_t queue_id;
+ struct rte_mempool *pkt_mempool;
+ struct rte_mbuf *received_mbufs[BURST_SIZE];
+ struct timeval machine_start_time;
+} DPDKThreadVars;
+
+static TmEcode ReceiveDPDKThreadInit(ThreadVars *, const void *, void **);
+static void ReceiveDPDKThreadExitStats(ThreadVars *, void *);
+static TmEcode ReceiveDPDKThreadDeinit(ThreadVars *, void *);
+static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot);
+
+static TmEcode DecodeDPDKThreadInit(ThreadVars *, const void *, void **);
+static TmEcode DecodeDPDKThreadDeinit(ThreadVars *tv, void *data);
+static TmEcode DecodeDPDK(ThreadVars *, Packet *, void *);
+
+static uint64_t CyclesToMicroseconds(uint64_t cycles);
+static uint64_t CyclesToSeconds(uint64_t cycles);
+static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset);
+static uint64_t DPDKGetSeconds(void);
+
+static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset)
+{
+ for (int i = offset; i < mbuf_cnt; i++) {
+ rte_pktmbuf_free(mbuf_array[i]);
+ }
+}
+
+static uint64_t CyclesToMicroseconds(const uint64_t cycles)
+{
+ const uint64_t ticks_per_us = rte_get_tsc_hz() / 1000000;
+ return cycles / ticks_per_us;
+}
+
+static uint64_t CyclesToSeconds(const uint64_t cycles)
+{
+ const uint64_t ticks_per_s = rte_get_tsc_hz();
+ return cycles / ticks_per_s;
+}
+
+static void CyclesAddToTimeval(
+ const uint64_t cycles, struct timeval *orig_tv, struct timeval *new_tv)
+{
+ uint64_t usec = CyclesToMicroseconds(cycles) + orig_tv->tv_usec;
+ new_tv->tv_sec = orig_tv->tv_sec + usec / 1000000;
+ new_tv->tv_usec = (usec % 1000000);
+}
+
+static void DPDKSetTimevalOfMachineStart(struct timeval *tv)
+{
+ gettimeofday(tv, NULL);
+ tv->tv_sec -= DPDKGetSeconds();
+}
+
+/**
+ * Initializes real_tv to the correct real time. Adds TSC counter value to the timeval of
+ * the machine start
+ * @param machine_start_tv - timestamp when the machine was started
+ * @param real_tv
+ */
+static void DPDKSetTimevalReal(struct timeval *machine_start_tv, struct timeval *real_tv)
+{
+ CyclesAddToTimeval(rte_get_tsc_cycles(), machine_start_tv, real_tv);
+}
+
+/* get number of seconds from the reset of TSC counter (typically from the machine start) */
+static uint64_t DPDKGetSeconds()
+{
+ return CyclesToSeconds(rte_get_tsc_cycles());
+}
+
+static void DevicePostStartPMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name)
+{
+}
+
+static void DevicePreStopPMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name)
+{
+}
+
+/**
+ * Attempts to retrieve NUMA node id on which the caller runs
+ * @return NUMA id on success, -1 otherwise
+ */
+static int GetNumaNode(void)
+{
+ int cpu = 0;
+ int node = -1;
+
+#if defined(__linux__)
+ cpu = sched_getcpu();
+ node = numa_node_of_cpu(cpu);
+#else
+ SCLogWarning(SC_ERR_TM_THREADS_ERROR, "NUMA node retrieval is not supported on this OS.");
+#endif
+
+ return node;
+}
+
+/**
+ * \brief Registration Function for ReceiveDPDK.
+ * \todo Unit tests are needed for this module.
+ */
+void TmModuleReceiveDPDKRegister(void)
+{
+ tmm_modules[TMM_RECEIVEDPDK].name = "ReceiveDPDK";
+ tmm_modules[TMM_RECEIVEDPDK].ThreadInit = ReceiveDPDKThreadInit;
+ tmm_modules[TMM_RECEIVEDPDK].Func = NULL;
+ tmm_modules[TMM_RECEIVEDPDK].PktAcqLoop = ReceiveDPDKLoop;
+ tmm_modules[TMM_RECEIVEDPDK].PktAcqBreakLoop = NULL;
+ tmm_modules[TMM_RECEIVEDPDK].ThreadExitPrintStats = ReceiveDPDKThreadExitStats;
+ tmm_modules[TMM_RECEIVEDPDK].ThreadDeinit = ReceiveDPDKThreadDeinit;
+ tmm_modules[TMM_RECEIVEDPDK].cap_flags = SC_CAP_NET_RAW;
+ tmm_modules[TMM_RECEIVEDPDK].flags = TM_FLAG_RECEIVE_TM;
+}
+
+/**
+ * \brief Registration Function for DecodeDPDK.
+ * \todo Unit tests are needed for this module.
+ */
+void TmModuleDecodeDPDKRegister(void)
+{
+ tmm_modules[TMM_DECODEDPDK].name = "DecodeDPDK";
+ tmm_modules[TMM_DECODEDPDK].ThreadInit = DecodeDPDKThreadInit;
+ tmm_modules[TMM_DECODEDPDK].Func = DecodeDPDK;
+ tmm_modules[TMM_DECODEDPDK].ThreadExitPrintStats = NULL;
+ tmm_modules[TMM_DECODEDPDK].ThreadDeinit = DecodeDPDKThreadDeinit;
+ tmm_modules[TMM_DECODEDPDK].cap_flags = 0;
+ tmm_modules[TMM_DECODEDPDK].flags = TM_FLAG_DECODE_TM;
+}
+
+static inline void DPDKDumpCounters(DPDKThreadVars *ptv)
+{
+ struct rte_eth_stats eth_stats;
+ int retval = rte_eth_stats_get(ptv->port_id, ð_stats);
+ if (unlikely(retval != 0)) {
+ SCLogError(SC_ERR_STAT, "Failed to get stats for port id %d: %s", ptv->port_id,
+ strerror(-retval));
+ return;
+ }
+
+ uint64_t th_pkts = StatsGetLocalCounterValue(ptv->tv, ptv->capture_dpdk_packets);
+ StatsAddUI64(ptv->tv, ptv->capture_dpdk_packets, ptv->pkts - th_pkts);
+ SC_ATOMIC_ADD(ptv->livedev->pkts, ptv->pkts - th_pkts);
+
+ /* Some NICs (e.g. Intel) do not support queue statistics and the drops can be fetched only on
+ * the port level. Therefore setting it to the first worker to have at least continuous update
+ * on the dropped packets. */
+ if (ptv->queue_id == 0) {
+ StatsSetUI64(ptv->tv, ptv->capture_dpdk_rx_errs,
+ eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf + ptv->pkts);
+ StatsSetUI64(ptv->tv, ptv->capture_dpdk_tx_errs, eth_stats.oerrors);
+ SC_ATOMIC_SET(ptv->livedev->drop, eth_stats.imissed + eth_stats.ierrors +
+ eth_stats.rx_nombuf + eth_stats.oerrors +
+ ptv->pkts);
+ }
+}
+
+static void DPDKReleasePacket(Packet *p)
+{
+ int retval;
+ /* Need to be in copy mode and need to detect early release
+ where Ethernet header could not be set (and pseudo packet)
+ When enabling promiscuous mode on Intel cards, 2 ICMPv6 packets are generated.
+ These get into the infinite cycle between the NIC and the switch in some cases */
+ if ((p->dpdk_v.copy_mode == DPDK_COPY_MODE_TAP ||
+ (p->dpdk_v.copy_mode == DPDK_COPY_MODE_IPS && !PacketTestAction(p, ACTION_DROP)))
+#if defined(RTE_LIBRTE_I40E_PMD) || defined(RTE_LIBRTE_IXGBE_PMD) || defined(RTE_LIBRTE_ICE_PMD)
+ && !(PKT_IS_ICMPV6(p) && p->icmpv6h->type == 143)
+#endif
+ ) {
+ BUG_ON(PKT_IS_PSEUDOPKT(p));
+ retval =
+ rte_eth_tx_burst(p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1);
+ // rte_eth_tx_burst can return only 0 (failure) or 1 (success) because we are only
+ // transmitting burst of size 1 and the function rte_eth_tx_burst returns number of
+ // successfully sent packets.
+ if (unlikely(retval < 1)) {
+ // sometimes a repeated transmit can help to send out the packet
+ rte_delay_us(DPDK_BURST_TX_WAIT_US);
+ retval = rte_eth_tx_burst(
+ p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1);
+ if (unlikely(retval < 1)) {
+ SCLogDebug("Unable to transmit the packet on port %u queue %u",
+ p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id);
+ rte_pktmbuf_free(p->dpdk_v.mbuf);
+ p->dpdk_v.mbuf = NULL;
+ }
+ }
+ } else {
+ rte_pktmbuf_free(p->dpdk_v.mbuf);
+ p->dpdk_v.mbuf = NULL;
+ }
+
+ PacketFreeOrRelease(p);
+}
+
+/**
+ * \brief Main DPDK reading Loop function
+ */
+static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)
+{
+ SCEnter();
+ Packet *p;
+ uint16_t nb_rx;
+ time_t last_dump = 0;
+ time_t current_time;
+
+ DPDKThreadVars *ptv = (DPDKThreadVars *)data;
+ TmSlot *s = (TmSlot *)slot;
+
+ ptv->slot = s->slot_next;
+
+ PacketPoolWait();
+ while (1) {
+ if (unlikely(suricata_ctl_flags != 0)) {
+ SCLogDebug("Stopping Suricata!");
+ DPDKDumpCounters(ptv);
+ break;
+ }
+
+ nb_rx = rte_eth_rx_burst(ptv->port_id, ptv->queue_id, ptv->received_mbufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0)) {
+ continue;
+ }
+
+ ptv->pkts += (uint64_t)nb_rx;
+ for (uint16_t i = 0; i < nb_rx; i++) {
+ p = PacketGetFromQueueOrAlloc();
+ if (unlikely(p == NULL)) {
+ continue;
+ }
+ PKT_SET_SRC(p, PKT_SRC_WIRE);
+ p->datalink = LINKTYPE_ETHERNET;
+ if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
+ p->flags |= PKT_IGNORE_CHECKSUM;
+ }
+
+ DPDKSetTimevalReal(&ptv->machine_start_time, &p->ts);
+ p->dpdk_v.mbuf = ptv->received_mbufs[i];
+ p->ReleasePacket = DPDKReleasePacket;
+ p->dpdk_v.copy_mode = ptv->copy_mode;
+ p->dpdk_v.out_port_id = ptv->out_port_id;
+ p->dpdk_v.out_queue_id = ptv->queue_id;
+
+ PacketSetData(p, rte_pktmbuf_mtod(p->dpdk_v.mbuf, uint8_t *),
+ rte_pktmbuf_pkt_len(p->dpdk_v.mbuf));
+ if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
+ TmqhOutputPacketpool(ptv->tv, p);
+ DPDKFreeMbufArray(ptv->received_mbufs, nb_rx - i - 1, i + 1);
+ SCReturnInt(EXIT_FAILURE);
+ }
+ }
+
+ /* Trigger one dump of stats every second */
+ current_time = DPDKGetSeconds();
+ if (current_time != last_dump) {
+ DPDKDumpCounters(ptv);
+ last_dump = current_time;
+ }
+ StatsSyncCountersIfSignalled(tv);
+ }
+
+ SCReturnInt(TM_ECODE_OK);
+}
+
+/**
+ * \brief Init function for ReceiveDPDK.
+ *
+ * \param tv pointer to ThreadVars
+ * \param initdata pointer to the interface passed from the user
+ * \param data pointer gets populated with DPDKThreadVars
+ *
+ */
+static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void **data)
+{
+ SCEnter();
+ int retval, thread_numa;
+ DPDKThreadVars *ptv = NULL;
+ DPDKIfaceConfig *dpdk_config = (DPDKIfaceConfig *)initdata;
+
+ if (initdata == NULL) {
+ SCLogError(SC_ERR_INVALID_ARGUMENT, "DPDK configuration is NULL in thread initialization");
+ goto fail;
+ }
+
+ ptv = SCCalloc(1, sizeof(DPDKThreadVars));
+ if (unlikely(ptv == NULL)) {
+ SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate memory");
+ goto fail;
+ }
+
+ ptv->tv = tv;
+ ptv->pkts = 0;
+ ptv->bytes = 0;
+ ptv->livedev = LiveGetDevice(dpdk_config->iface);
+ DPDKSetTimevalOfMachineStart(&ptv->machine_start_time);
+
+ ptv->capture_dpdk_packets = StatsRegisterCounter("capture.packets", ptv->tv);
+ ptv->capture_dpdk_rx_errs = StatsRegisterCounter("capture.rx_errors", ptv->tv);
+ ptv->capture_dpdk_tx_errs = StatsRegisterCounter("capture.tx_errors", ptv->tv);
+
+ ptv->copy_mode = dpdk_config->copy_mode;
+ ptv->checksum_mode = dpdk_config->checksum_mode;
+
+ ptv->threads = dpdk_config->threads;
+ ptv->port_id = dpdk_config->port_id;
+ ptv->out_port_id = dpdk_config->out_port_id;
+ uint16_t queue_id = SC_ATOMIC_ADD(dpdk_config->queue_id, 1);
+ ptv->queue_id = queue_id;
+ // pass the pointer to the mempool and then forget about it. Mempool is freed in thread deinit.
+ ptv->pkt_mempool = dpdk_config->pkt_mempool;
+ dpdk_config->pkt_mempool = NULL;
+
+ // the last thread starts the device
+ if (queue_id == dpdk_config->threads - 1) {
+ retval = rte_eth_dev_start(ptv->port_id);
+ if (retval < 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (%s) during device startup of %s",
+ rte_strerror(-retval), dpdk_config->iface);
+ goto fail;
+ }
+
+ struct rte_eth_dev_info dev_info;
+ retval = rte_eth_dev_info_get(ptv->port_id, &dev_info);
+ if (retval != 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (%s) when getting device info of %s",
+ rte_strerror(-retval), dpdk_config->iface);
+ goto fail;
+ }
+
+ // some PMDs requires additional actions only after the device has started
+ DevicePostStartPMDSpecificActions(ptv, dev_info.driver_name);
+ }
+
+ thread_numa = GetNumaNode();
+ if (thread_numa >= 0 && thread_numa != rte_eth_dev_socket_id(ptv->port_id)) {
+ SCLogWarning(SC_WARN_DPDK_CONF,
+ "NIC on NUMA %d but thread on NUMA %d. Decreased performance expected",
+ rte_eth_dev_socket_id(ptv->port_id), thread_numa);
+ }
+
+ *data = (void *)ptv;
+ dpdk_config->DerefFunc(dpdk_config);
+ SCReturnInt(TM_ECODE_OK);
+
+fail:
+ if (dpdk_config != NULL)
+ dpdk_config->DerefFunc(dpdk_config);
+ if (ptv != NULL)
+ SCFree(ptv);
+ SCReturnInt(TM_ECODE_FAILED);
+}
+
+/**
+ * \brief This function prints stats to the screen at exit.
+ * \param tv pointer to ThreadVars
+ * \param data pointer that gets cast into DPDKThreadVars for ptv
+ */
+static void ReceiveDPDKThreadExitStats(ThreadVars *tv, void *data)
+{
+ SCEnter();
+ int retval;
+ DPDKThreadVars *ptv = (DPDKThreadVars *)data;
+
+ if (ptv->queue_id == 0) {
+ struct rte_eth_stats eth_stats;
+ char port_name[RTE_ETH_NAME_MAX_LEN];
+
+ retval = rte_eth_dev_get_name_by_port(ptv->port_id, port_name);
+ if (unlikely(retval != 0)) {
+ SCLogError(SC_ERR_STAT, "Failed to convert port id %d to the interface name: %s",
+ ptv->port_id, strerror(-retval));
+ SCReturn;
+ }
+ retval = rte_eth_stats_get(ptv->port_id, ð_stats);
+ if (unlikely(retval != 0)) {
+ SCLogError(SC_ERR_STAT, "Failed to get stats for interface %s: %s", port_name,
+ strerror(-retval));
+ SCReturn;
+ }
+ SCLogPerf("Total RX stats of %s: packets %" PRIu64 " bytes: %" PRIu64 " missed: %" PRIu64
+ " errors: %" PRIu64 " nombufs: %" PRIu64,
+ port_name, eth_stats.ipackets, eth_stats.ibytes, eth_stats.imissed,
+ eth_stats.ierrors, eth_stats.rx_nombuf);
+ if (ptv->copy_mode == DPDK_COPY_MODE_TAP || ptv->copy_mode == DPDK_COPY_MODE_IPS)
+ SCLogPerf("Total TX stats of %s: packets %" PRIu64 " bytes: %" PRIu64
+ " errors: %" PRIu64,
+ port_name, eth_stats.opackets, eth_stats.obytes, eth_stats.oerrors);
+ }
+
+ DPDKDumpCounters(ptv);
+ SCLogPerf("(%s) received packets %" PRIu64, tv->name, ptv->pkts);
+}
+
+/**
+ * \brief DeInit function closes dpdk at exit.
+ * \param tv pointer to ThreadVars
+ * \param data pointer that gets cast into DPDKThreadVars for ptv
+ */
+static TmEcode ReceiveDPDKThreadDeinit(ThreadVars *tv, void *data)
+{
+ SCEnter();
+ DPDKThreadVars *ptv = (DPDKThreadVars *)data;
+
+ int retval;
+ if (ptv->queue_id == 0) {
+ struct rte_eth_dev_info dev_info;
+ char iface[RTE_ETH_NAME_MAX_LEN];
+ retval = rte_eth_dev_get_name_by_port(ptv->port_id, iface);
+ if (retval != 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) when getting device name (port %d)",
+ retval, ptv->port_id);
+ SCReturnInt(TM_ECODE_FAILED);
+ }
+ retval = rte_eth_dev_info_get(ptv->port_id, &dev_info);
+ if (retval != 0) {
+ SCLogError(SC_ERR_DPDK_INIT, "Error (err=%d) during getting device info (port %s)",
+ retval, iface);
+ SCReturnInt(TM_ECODE_FAILED);
+ }
+
+ DevicePreStopPMDSpecificActions(ptv, dev_info.driver_name);
+ }
+
+ rte_eth_dev_stop(ptv->port_id);
+ if (ptv->copy_mode == DPDK_COPY_MODE_TAP || ptv->copy_mode == DPDK_COPY_MODE_IPS) {
+ rte_eth_dev_stop(ptv->out_port_id);
+ }
+
+ if (ptv->queue_id == 0 && ptv->pkt_mempool != NULL) {
+ rte_mempool_free(ptv->pkt_mempool);
+ ptv->pkt_mempool = NULL;
+ }
+
+ SCFree(ptv);
+ SCReturnInt(TM_ECODE_OK);
+}
+
+/**
+ * \brief This function passes off to link type decoders.
+ *
+ * DecodeDPDK decodes packets from DPDK and passes
+ * them off to the proper link type decoder.
+ *
+ * \param t pointer to ThreadVars
+ * \param p pointer to the current packet
+ * \param data pointer that gets cast into DPDKThreadVars for ptv
+ */
+static TmEcode DecodeDPDK(ThreadVars *tv, Packet *p, void *data)
+{
+ SCEnter();
+ DecodeThreadVars *dtv = (DecodeThreadVars *)data;
+
+ BUG_ON(PKT_IS_PSEUDOPKT(p));
+
+ /* update counters */
+ DecodeUpdatePacketCounters(tv, dtv, p);
+
+ /* If suri has set vlan during reading, we increase vlan counter */
+ if (p->vlan_idx) {
+ StatsIncr(tv, dtv->counter_vlan);
+ }
+
+ /* call the decoder */
+ DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
+
+ PacketDecodeFinalize(tv, dtv, p);
+
+ SCReturnInt(TM_ECODE_OK);
+}
+
+static TmEcode DecodeDPDKThreadInit(ThreadVars *tv, const void *initdata, void **data)
+{
+ SCEnter();
+ DecodeThreadVars *dtv = NULL;
+
+ dtv = DecodeThreadVarsAlloc(tv);
+
+ if (dtv == NULL)
+ SCReturnInt(TM_ECODE_FAILED);
+
+ DecodeRegisterPerfCounters(dtv, tv);
+
+ *data = (void *)dtv;
+
+ SCReturnInt(TM_ECODE_OK);
+}
+
+static TmEcode DecodeDPDKThreadDeinit(ThreadVars *tv, void *data)
+{
+ SCEnter();
+ if (data != NULL)
+ DecodeThreadVarsFree(tv, data);
+ SCReturnInt(TM_ECODE_OK);
+}
+
+#endif /* HAVE_DPDK */
+/* eof */
+/**
+ * @}
+ */