From: Lukas Sismis Date: Mon, 27 Jan 2025 13:26:23 +0000 (+0100) Subject: dpdk: auto configure Rx/Tx descriptors and mempool size X-Git-Tag: suricata-8.0.0-beta1~174 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2ef2a9e26f12ee3fa0ebd6af0760761022302669;p=thirdparty%2Fsuricata.git dpdk: auto configure Rx/Tx descriptors and mempool size Ticket: 7380 Ticket: 7373 --- diff --git a/doc/userguide/capture-hardware/dpdk.rst b/doc/userguide/capture-hardware/dpdk.rst index 6be7278b8c..68b989c561 100644 --- a/doc/userguide/capture-hardware/dpdk.rst +++ b/doc/userguide/capture-hardware/dpdk.rst @@ -184,3 +184,33 @@ Below is a sample configuration that demonstrates how to enable the interrupt mo - interface: 0000:3b:00.0 interrupt-mode: true threads: 4 + +.. _dpdk-automatic-interface-configuration: + +Automatic interface configuration +--------------------------------- + +A number of interface properties can be manually configured. However, Suricata +can automatically configure the interface properties based on the NIC +capabilities. This can be done by setting ``auto`` to ``mempool-size``, +``mempool-cache-size``, ``rx-descriptors``, and ``tx-descriptors`` interface +node properties. +This will allow Suricata to automatically set the sizes of individual properties +according to the best-effort calculation based on the NIC capabilities. +For example, receive (RX) descriptors are calculated based on the maximal +"power of 2" that is lower or equal to the number of descriptors supported +by the NIC. Number of TX descriptors depends on the configured ``copy-mode``. +IDS (none) mode uses no TX descriptors and does not create any TX queues by +default. IPS and TAP mode uses the same number of TX descriptors as RX +descriptors. +The number of mempool and its cache is then derived from the count of +descriptors. + +Rx (and Tx) descriptors are set to the highest possible value to allow more +buffer room when traffic spikes occur. However, it requires more memory. +Individual properties can still be set manually if needed. + +.. note:: Mellanox ConnectX-4 NICs does not support auto-configuration of + ``tx-descriptors`` in the TAP/IPS modes. Instead it can be set to + a fixed value (e.g. 16384). + diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index 37dff61acc..eb7edbe48c 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -2184,10 +2184,10 @@ The whole DPDK configuration resides in the `dpdk:` node. This node encapsulates checksum-checks: true checksum-checks-offload: true mtu: 1500 - mempool-size: 65535 - mempool-cache-size: 257 - rx-descriptors: 1024 - tx-descriptors: 1024 + mempool-size: auto + mempool-cache-size: auto + rx-descriptors: auto + tx-descriptors: auto copy-mode: none copy-iface: none # or PCIe address of the second interface diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst index b5ffb1a27e..fb42fe328b 100644 --- a/doc/userguide/upgrade.rst +++ b/doc/userguide/upgrade.rst @@ -128,6 +128,9 @@ Major changes can be used to tune this value for TPACKET_V2. Due to the increased block size, memory usage has been increased, but should not be an issue in most cases. +- DPDK interface settings can now be configured automatically by setting + ``auto`` to ``mempool-size``, ``mempool-cache-size``, ``rx-descriptors``, + ``tx-descriptors``. See :ref:`dpdk-automatic-interface-configuration`. Removals ~~~~~~~~ diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index 7454574c00..5bc2b23fb6 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -77,12 +77,12 @@ static void InitEal(void); static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str); static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str); -static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues); -static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues); -static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int); +static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues, uint16_t max_queues); +static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues, uint16_t max_queues); +static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, const char *entry_str); static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str); -static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int); -static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int); +static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, const char *entry_str, uint16_t max_desc); +static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, const char *entry_str, uint16_t max_desc); static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int); static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool); static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool); @@ -107,10 +107,10 @@ static void DPDKDerefConfig(void *conf); #define DPDK_CONFIG_DEFAULT_THREADS "auto" #define DPDK_CONFIG_DEFAULT_INTERRUPT_MODE false -#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535 +#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE "auto" #define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto" -#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024 -#define DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS 1024 +#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS "auto" +#define DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS "auto" #define DPDK_CONFIG_DEFAULT_RSS_HASH_FUNCTIONS RTE_ETH_RSS_IP #define DPDK_CONFIG_DEFAULT_MTU 1500 #define DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE 1 @@ -139,6 +139,10 @@ DPDKIfaceConfigAttributes dpdk_yaml = { .copy_iface = "copy-iface", }; +/** + * \brief Input is a number of which we want to find the greatest divisor up to max_num (inclusive). + * The divisor is returned. + */ static int GreatestDivisorUpTo(uint32_t num, uint32_t max_num) { for (int i = max_num; i >= 2; i--) { @@ -149,6 +153,28 @@ static int GreatestDivisorUpTo(uint32_t num, uint32_t max_num) return 1; } +/** + * \brief Input is a number of which we want to find the greatest power of 2 up to num. The power of + * 2 is returned or 0 if no valid power of 2 is found. + */ +static uint64_t GreatestPowOf2UpTo(uint64_t num) +{ + if (num == 0) { + return 0; // No power of 2 exists for 0 + } + + // Bit manipulation to isolate the highest set bit + num |= (num >> 1); + num |= (num >> 2); + num |= (num >> 4); + num |= (num >> 8); + num |= (num >> 16); + num |= (num >> 32); + num = num - (num >> 1); + + return num; +} + static char *AllocArgument(size_t arg_len) { SCEnter(); @@ -441,42 +467,80 @@ static bool ConfigSetInterruptMode(DPDKIfaceConfig *iconf, bool enable) SCReturnBool(true); } -static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues) +static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues, uint16_t max_queues) { SCEnter(); - iconf->nb_rx_queues = nb_queues; - if (iconf->nb_rx_queues < 1) { - SCLogError("%s: positive number of RX queues is required", iconf->iface); + if (nb_queues == 0) { + SCLogInfo("%s: positive number of RX queues is required", iconf->iface); SCReturnInt(-ERANGE); } + if (nb_queues > max_queues) { + SCLogInfo("%s: number of RX queues cannot exceed %" PRIu16, iconf->iface, max_queues); + SCReturnInt(-ERANGE); + } + + iconf->nb_rx_queues = nb_queues; SCReturnInt(0); } -static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues) +static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues, uint16_t max_queues) { SCEnter(); - iconf->nb_tx_queues = nb_queues; - if (iconf->nb_tx_queues < 1) { - SCLogError("%s: positive number of TX queues is required", iconf->iface); + if (nb_queues == 0) { + SCLogInfo("%s: positive number of TX queues is required", iconf->iface); SCReturnInt(-ERANGE); } + if (nb_queues > max_queues) { + SCLogInfo("%s: number of TX queues cannot exceed %" PRIu16, iconf->iface, max_queues); + SCReturnInt(-ERANGE); + } + + iconf->nb_tx_queues = nb_queues; SCReturnInt(0); } -static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int) +static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, const char *entry_str) { SCEnter(); - if (entry_int <= 0) { - SCLogError("%s: positive memory pool size is required", iconf->iface); - SCReturnInt(-ERANGE); - } else if (entry_int > UINT32_MAX) { - SCLogError("%s: memory pool size cannot exceed %" PRIu32, iconf->iface, UINT32_MAX); + if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "auto") == 0) { + // calculate the mempool size based on the number of: + // - RX / TX queues + // - RX / TX descriptors + bool err = false; + if (iconf->nb_rx_queues == 0) { + // in IDS mode, we don't need TX queues + SCLogError("%s: cannot autocalculate mempool size without RX queues", iconf->iface); + err = true; + } + + if (iconf->nb_rx_desc == 0) { + SCLogError( + "%s: cannot autocalculate mempool size without RX descriptors", iconf->iface); + err = true; + } + + if (err) { + SCReturnInt(-EINVAL); + } + + iconf->mempool_size = + iconf->nb_rx_queues * iconf->nb_rx_desc + iconf->nb_tx_queues * iconf->nb_tx_desc; + SCReturnInt(0); + } + + if (StringParseUint32(&iconf->mempool_size, 10, 0, entry_str) < 0) { + SCLogError("%s: mempool size entry contain non-numerical characters - \"%s\"", iconf->iface, + entry_str); + SCReturnInt(-EINVAL); + } + + if (iconf->mempool_size == 0) { + SCLogError("%s: mempool size requires a positive integer", iconf->iface); SCReturnInt(-ERANGE); } - iconf->mempool_size = entry_int; SCReturnInt(0); } @@ -514,33 +578,65 @@ static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_s SCReturnInt(0); } -static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int) +static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, const char *entry_str, uint16_t max_desc) { SCEnter(); - if (entry_int <= 0) { + if (entry_str == NULL || entry_str[0] == '\0') { + SCLogInfo("%s: number of RX descriptors not found, going with: %s", iconf->iface, + DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS); + entry_str = DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS; + } + + if (strcmp(entry_str, "auto") == 0) { + iconf->nb_rx_desc = GreatestPowOf2UpTo(max_desc); + SCReturnInt(0); + } + + if (StringParseUint16(&iconf->nb_rx_desc, 10, 0, entry_str) < 0) { + SCLogError("%s: RX descriptors entry contains non-numerical characters - \"%s\"", + iconf->iface, entry_str); + SCReturnInt(-EINVAL); + } + + if (iconf->nb_rx_desc == 0) { SCLogError("%s: positive number of RX descriptors is required", iconf->iface); SCReturnInt(-ERANGE); - } else if (entry_int > UINT16_MAX) { - SCLogError("%s: number of RX descriptors cannot exceed %" PRIu16, iconf->iface, UINT16_MAX); + } else if (iconf->nb_rx_desc > max_desc) { + SCLogError("%s: number of RX descriptors cannot exceed %" PRIu16, iconf->iface, max_desc); SCReturnInt(-ERANGE); } - iconf->nb_rx_desc = entry_int; SCReturnInt(0); } -static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int) +static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, const char *entry_str, uint16_t max_desc) { SCEnter(); - if (entry_int <= 0) { + if (entry_str == NULL || entry_str[0] == '\0') { + SCLogInfo("%s: number of TX descriptors not found, going with: %s", iconf->iface, + DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS); + entry_str = DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS; + } + + if (strcmp(entry_str, "auto") == 0) { + iconf->nb_tx_desc = GreatestPowOf2UpTo(max_desc); + SCReturnInt(0); + } + + if (StringParseUint16(&iconf->nb_tx_desc, 10, 0, entry_str) < 0) { + SCLogError("%s: TX descriptors entry contains non-numerical characters - \"%s\"", + iconf->iface, entry_str); + SCReturnInt(-EINVAL); + } + + if (iconf->nb_tx_desc == 0) { SCLogError("%s: positive number of TX descriptors is required", iconf->iface); SCReturnInt(-ERANGE); - } else if (entry_int > UINT16_MAX) { - SCLogError("%s: number of TX descriptors cannot exceed %" PRIu16, iconf->iface, UINT16_MAX); + } else if (iconf->nb_tx_desc > max_desc) { + SCLogError("%s: number of TX descriptors cannot exceed %" PRIu16, iconf->iface, max_desc); SCReturnInt(-ERANGE); } - iconf->nb_tx_desc = entry_int; SCReturnInt(0); } @@ -706,6 +802,12 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) const char *copy_mode_str = NULL; ConfigSetIface(iconf, iface); + struct rte_eth_dev_info dev_info = { 0 }; + retval = rte_eth_dev_info_get(iconf->port_id, &dev_info); + if (retval < 0) { + SCLogError("%s: getting device info failed: %s", iconf->iface, rte_strerror(-retval)); + SCReturnInt(retval); + } retval = ConfSetRootAndDefaultNodes("dpdk.interfaces", iconf->iface, &if_root, &if_default); if (retval < 0) { @@ -730,40 +832,48 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) SCReturnInt(-EINVAL); // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported - retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads); - if (retval < 0) + retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads, dev_info.max_rx_queues); + if (retval < 0) { + SCLogError("%s: too many threads configured - reduce thread count to: %" PRIu16, + iconf->iface, dev_info.max_rx_queues); SCReturnInt(retval); + } // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported - retval = ConfigSetTxQueues(iconf, (uint16_t)iconf->threads); - if (retval < 0) + retval = ConfigSetTxQueues(iconf, (uint16_t)iconf->threads, dev_info.max_tx_queues); + if (retval < 0) { + SCLogError("%s: too many threads configured - reduce thread count to: %" PRIu16, + iconf->iface, dev_info.max_tx_queues); SCReturnInt(retval); + } - retval = ConfGetChildValueIntWithDefault( - if_root, if_default, dpdk_yaml.mempool_size, &entry_int) != 1 - ? ConfigSetMempoolSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE) - : ConfigSetMempoolSize(iconf, entry_int); + retval = ConfGetChildValueWithDefault( + if_root, if_default, dpdk_yaml.rx_descriptors, &entry_str) != 1 + ? ConfigSetRxDescriptors(iconf, DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS, + dev_info.rx_desc_lim.nb_max) + : ConfigSetRxDescriptors(iconf, entry_str, dev_info.rx_desc_lim.nb_max); if (retval < 0) SCReturnInt(retval); retval = ConfGetChildValueWithDefault( - if_root, if_default, dpdk_yaml.mempool_cache_size, &entry_str) != 1 - ? ConfigSetMempoolCacheSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE) - : ConfigSetMempoolCacheSize(iconf, entry_str); + if_root, if_default, dpdk_yaml.tx_descriptors, &entry_str) != 1 + ? ConfigSetTxDescriptors(iconf, DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS, + dev_info.tx_desc_lim.nb_max) + : ConfigSetTxDescriptors(iconf, entry_str, dev_info.tx_desc_lim.nb_max); if (retval < 0) SCReturnInt(retval); - retval = ConfGetChildValueIntWithDefault( - if_root, if_default, dpdk_yaml.rx_descriptors, &entry_int) != 1 - ? ConfigSetRxDescriptors(iconf, DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS) - : ConfigSetRxDescriptors(iconf, entry_int); + retval = ConfGetChildValueWithDefault( + if_root, if_default, dpdk_yaml.mempool_size, &entry_str) != 1 + ? ConfigSetMempoolSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE) + : ConfigSetMempoolSize(iconf, entry_str); if (retval < 0) SCReturnInt(retval); - retval = ConfGetChildValueIntWithDefault( - if_root, if_default, dpdk_yaml.tx_descriptors, &entry_int) != 1 - ? ConfigSetTxDescriptors(iconf, DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS) - : ConfigSetTxDescriptors(iconf, entry_int); + retval = ConfGetChildValueWithDefault( + if_root, if_default, dpdk_yaml.mempool_cache_size, &entry_str) != 1 + ? ConfigSetMempoolCacheSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE) + : ConfigSetMempoolCacheSize(iconf, entry_str); if (retval < 0) SCReturnInt(retval); diff --git a/suricata.yaml.in b/suricata.yaml.in index 4d03c45969..520886543c 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -801,20 +801,10 @@ dpdk: # To approximately calculate required amount of space (in bytes) for interface's mempool: mempool-size * mtu # Make sure you have enough allocated hugepages. - # The optimum size for the packet memory pool (in terms of memory usage) is power of two minus one: n = (2^q - 1) - mempool-size: 65535 # The number of elements in the mbuf pool - - # Mempool cache size must be lower or equal to: - # - RTE_MEMPOOL_CACHE_MAX_SIZE (by default 512) and - # - "mempool-size / 1.5" - # It is advised to choose cache_size to have "mempool-size modulo cache_size == 0". - # If this is not the case, some elements will always stay in the pool and will never be used. - # The cache can be disabled if the cache_size argument is set to 0, can be useful to avoid losing objects in cache - # If the value is empty or set to "auto", Suricata will attempt to set cache size of the mempool to a value - # that matches the previously mentioned recommendations - mempool-cache-size: 257 - rx-descriptors: 1024 - tx-descriptors: 1024 + mempool-size: auto # autocalculated based on Rx/Tx descriptors and threads + mempool-cache-size: auto # autocalculated from the mempool size + rx-descriptors: auto # max number of descriptors + tx-descriptors: auto # max number of descriptors # # IPS mode for Suricata works in 3 modes - none, tap, ips # - none: IDS mode only - disables IPS functionality (does not further forward packets)