From: Lukas Sismis Date: Tue, 4 Feb 2025 10:13:42 +0000 (+0100) Subject: dpdk: check for link up before full startup X-Git-Tag: suricata-8.0.0-beta1~170 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=640d0985c2edd48e253a0661ffa94b807423f4c5;p=thirdparty%2Fsuricata.git dpdk: check for link up before full startup ICE card (Intel E810) was not receiving packets immediatelly after startup, Suricata workers would act as processing while it was not. This eliminates the problem by only continuing in the initialization if the link is already up. The setting can be turned off manually from the configuraiton file. Ticket: 7381 --- diff --git a/doc/userguide/capture-hardware/dpdk.rst b/doc/userguide/capture-hardware/dpdk.rst index 68b989c561..757484d048 100644 --- a/doc/userguide/capture-hardware/dpdk.rst +++ b/doc/userguide/capture-hardware/dpdk.rst @@ -214,3 +214,20 @@ Individual properties can still be set manually if needed. ``tx-descriptors`` in the TAP/IPS modes. Instead it can be set to a fixed value (e.g. 16384). +.. _dpdk-link-state-change-timeout: + +Link State Change timeout +------------------------- + +The `linkup-timeout` YAML configuration option allows the user to set a timeout +period to wait until the interface's link is detected. This ensures that +Suricata does not start processing packets until the link is up. This option is +particularly useful for Intel E810 (Ice) NICs, which begin receiving packets +only after a few seconds have passed since the interface started. In such cases, +if this check is disabled, Suricata reports as started but only begins +processing packets after a few seconds. This issue has not been observed with +other cards. + +Setting the value to 0 causes Suricata to skip the link check. +If the interface's link remains down after the timeout period, Suricata warns +the user but continues with the engine initialization. diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index 3ff415777c..5b83480e62 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -2183,6 +2183,7 @@ The whole DPDK configuration resides in the `dpdk:` node. This node encapsulates multicast: true checksum-checks: true checksum-checks-offload: true + linkup-timeout: 10 mtu: 1500 mempool-size: auto mempool-cache-size: auto diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst index a88bc2e9ea..08a437e21e 100644 --- a/doc/userguide/upgrade.rst +++ b/doc/userguide/upgrade.rst @@ -134,6 +134,13 @@ Major changes - DPDK interface mempools are now allocated per thread instead of per port. This change improves performance and should not be visible from the user configuration perspective. +- DPDK supports link state check, allowing Suricata to start only when the link + is up. This is especially useful for Intel E810 (ice) NICs as they need + a few seconds before they are ready to receive packets. With this check + disabled, Suricata reports as started but only begins processing packets + after the previously mentioned interval. Other cards were not observed to have + this issue. This feature is disabled by default. + See :ref:`dpdk-link-state-change-timeout`. Removals ~~~~~~~~ diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index d9fb0c25ab..e1b071721a 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -119,6 +119,7 @@ static void DPDKDerefConfig(void *conf); #define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION 1 #define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD 1 #define DPDK_CONFIG_DEFAULT_VLAN_STRIP 0 +#define DPDK_CONFIG_DEFAULT_LINKUP_TIMEOUT 0 #define DPDK_CONFIG_DEFAULT_COPY_MODE "none" #define DPDK_CONFIG_DEFAULT_COPY_INTERFACE "none" @@ -132,6 +133,7 @@ DPDKIfaceConfigAttributes dpdk_yaml = { .mtu = "mtu", .vlan_strip_offload = "vlan-strip-offload", .rss_hf = "rss-hash-functions", + .linkup_timeout = "linkup-timeout", .mempool_size = "mempool-size", .mempool_cache_size = "mempool-cache-size", .rx_descriptors = "rx-descriptors", @@ -690,6 +692,19 @@ static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int) SCReturnInt(0); } +static int ConfigSetLinkupTimeout(DPDKIfaceConfig *iconf, intmax_t entry_int) +{ + SCEnter(); + if (entry_int < 0) { + SCLogError("%s: Link-up waiting timeout needs to be a positive number or 0 to disable", + iconf->iface); + SCReturnInt(-ERANGE); + } + + iconf->linkup_timeout = entry_int; + SCReturnInt(0); +} + static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool) { SCEnter(); @@ -946,6 +961,13 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) ConfigSetVlanStrip(iconf, entry_bool); } + retval = ConfGetChildValueIntWithDefault( + if_root, if_default, dpdk_yaml.linkup_timeout, &entry_int) != 1 + ? ConfigSetLinkupTimeout(iconf, DPDK_CONFIG_DEFAULT_LINKUP_TIMEOUT) + : ConfigSetLinkupTimeout(iconf, entry_int); + if (retval < 0) + SCReturnInt(retval); + retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.copy_mode, ©_mode_str); if (retval != 1) { copy_mode_str = DPDK_CONFIG_DEFAULT_COPY_MODE; diff --git a/src/runmode-dpdk.h b/src/runmode-dpdk.h index 392874446d..b91838915c 100644 --- a/src/runmode-dpdk.h +++ b/src/runmode-dpdk.h @@ -33,6 +33,7 @@ typedef struct DPDKIfaceConfigAttributes_ { const char *mtu; const char *vlan_strip_offload; const char *rss_hf; + const char *linkup_timeout; const char *mempool_size; const char *mempool_cache_size; const char *rx_descriptors; diff --git a/src/source-dpdk.c b/src/source-dpdk.c index 8307b215ec..be93a4120e 100644 --- a/src/source-dpdk.c +++ b/src/source-dpdk.c @@ -638,6 +638,45 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void goto fail; } + uint32_t timeout = dpdk_config->linkup_timeout * 10; + while (timeout > 0) { + struct rte_eth_link link = { 0 }; + retval = rte_eth_link_get_nowait(ptv->port_id, &link); + if (retval != 0) { + if (retval == -ENOTSUP) { + SCLogInfo("%s: link status not supported, skipping", dpdk_config->iface); + } else { + SCLogInfo("%s: error (%s) when getting link status, skipping", + dpdk_config->iface, rte_strerror(-retval)); + } + break; + } + if (link.link_status) { + char link_status_str[RTE_ETH_LINK_MAX_STR_LEN]; +#if RTE_VERSION >= RTE_VERSION_NUM(20, 11, 0, 0) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + rte_eth_link_to_str(link_status_str, sizeof(link_status_str), &link); +#pragma GCC diagnostic pop +#else + snprintf(link_status_str, sizeof(link_status_str), + "Link Up, speed %u Mbps, %s", // 22 chars + 10 for digits + 11 for duplex + link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? "full-duplex" : "half-duplex"); +#endif + + SCLogInfo("%s: %s", dpdk_config->iface, link_status_str); + break; + } + + rte_delay_ms(100); + timeout--; + } + + if (dpdk_config->linkup_timeout && timeout == 0) { + SCLogWarning("%s: link is down, trying to continue anyway", dpdk_config->iface); + } + // some PMDs requires additional actions only after the device has started DevicePostStartPMDSpecificActions(ptv, dev_info.driver_name); diff --git a/src/source-dpdk.h b/src/source-dpdk.h index 28341c1704..350a5a164a 100644 --- a/src/source-dpdk.h +++ b/src/source-dpdk.h @@ -75,6 +75,7 @@ typedef struct DPDKIfaceConfig_ { uint32_t mempool_size; uint32_t mempool_cache_size; DPDKDeviceResources *pkt_mempools; + uint16_t linkup_timeout; // in seconds how long to wait for link to come up SC_ATOMIC_DECLARE(unsigned int, ref); /* threads bind queue id one by one */ SC_ATOMIC_DECLARE(uint16_t, queue_id); diff --git a/src/util-dpdk-common.h b/src/util-dpdk-common.h index 02125ff7c2..9a2779dc7c 100644 --- a/src/util-dpdk-common.h +++ b/src/util-dpdk-common.h @@ -113,6 +113,10 @@ #define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD #endif +#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) +#define RTE_ETH_LINK_MAX_STR_LEN 40 +#endif + typedef struct { struct rte_mempool **pkt_mp; uint16_t pkt_mp_cnt; diff --git a/suricata.yaml.in b/suricata.yaml.in index 032a6d1550..e83ec1bfa6 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -798,6 +798,7 @@ dpdk: # rss-hash-functions: 0x0 # advanced configuration option, use only if you use untested NIC card and experience RSS warnings, # For `rss-hash-functions` use hexadecimal 0x01ab format to specify RSS hash function flags - DumpRssFlags can help (you can see output if you use -vvv option during Suri startup) # setting auto to rss_hf sets the default RSS hash functions (based on IP addresses) + # linkup-timeout: 0 # how many seconds to wait before giving up, 0 to disable link state checking # To approximately calculate required amount of space (in bytes) for interface's mempool: mempool-size * mtu # Make sure you have enough allocated hugepages. @@ -822,6 +823,7 @@ dpdk: mtu: 1500 vlan-strip-offload: false rss-hash-functions: auto + linkup-timeout: 0 mempool-size: auto mempool-cache-size: auto rx-descriptors: auto