- worker-cpu-set:
cpu: [ 2,4,6,8 ]
...
+
+Interrupt (power-saving) mode
+-----------------------------
+
+The DPDK is traditionally recognized for its polling mode operation.
+In this mode, CPU cores are continuously querying for packets from
+the Network Interface Card (NIC). While this approach offers benefits like
+reduced latency and improved performance, it might not be the most efficient
+in scenarios with sporadic or low traffic.
+The constant polling can lead to unnecessary CPU consumption.
+To address this, DPDK offers an `interrupt` mode.
+
+The obvious advantage that interrupt mode brings is power efficiency.
+So far in our tests, we haven't observed a decrease in performance. Suricata's
+performance has actually seen a slight improvement.
+The (IPS runmode) users should be aware that interrupts can
+introduce non-deterministic latency. However, the latency should never be
+higher than in other (e.g. AF_PACKET/AF_XDP/...) capture methods.
+
+Interrupt mode in DPDK can be configured on a per-interface basis.
+This allows for a hybrid setup where some workers operate in polling mode,
+while others utilize the interrupt mode.
+The configuration for the interrupt mode can be found and modified in the
+DPDK section of the suricata.yaml file.
+
+Below is a sample configuration that demonstrates how to enable the interrupt mode for a specific interface:
+
+::
+
+ ...
+ dpdk:
+ eal-params:
+ proc-type: primary
+
+ interfaces:
+ - interface: 0000:3b:00.0
+ interrupt-mode: true
+ threads: 4
static void DPDKDerefConfig(void *conf);
#define DPDK_CONFIG_DEFAULT_THREADS "auto"
+#define DPDK_CONFIG_DEFAULT_INTERRUPT_MODE false
#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535
#define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto"
#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024
DPDKIfaceConfigAttributes dpdk_yaml = {
.threads = "threads",
+ .irq_mode = "interrupt-mode",
.promisc = "promisc",
.multicast = "multicast",
.checksum_checks = "checksum-checks",
SCReturnInt(0);
}
+static bool ConfigSetInterruptMode(DPDKIfaceConfig *iconf, bool enable)
+{
+ SCEnter();
+ if (enable)
+ iconf->flags |= DPDK_IRQ_MODE;
+
+ SCReturnBool(true);
+}
+
static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
{
SCEnter();
if (retval < 0)
SCReturnInt(retval);
+ bool irq_enable;
+ retval = ConfGetChildValueBoolWithDefault(if_root, if_default, dpdk_yaml.irq_mode, &entry_bool);
+ if (retval != 1) {
+ irq_enable = DPDK_CONFIG_DEFAULT_INTERRUPT_MODE;
+ } else {
+ irq_enable = entry_bool ? true : false;
+ }
+ retval = ConfigSetInterruptMode(iconf, irq_enable);
+ if (retval != true)
+ SCReturnInt(-EINVAL);
+
// currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads);
if (retval < 0)
},
};
+ SCLogConfig("%s: interrupt mode is %s", iconf->iface,
+ iconf->flags & DPDK_IRQ_MODE ? "enabled" : "disabled");
+ if (iconf->flags & DPDK_IRQ_MODE)
+ port_conf->intr_conf.rxq = 1;
+
// configure RX offloads
if (dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH) {
if (iconf->nb_rx_queues > 1) {
#define BURST_SIZE 32
static struct timeval machine_start_time = { 0, 0 };
+// interrupt mode constants
+#define MIN_ZERO_POLL_COUNT 10U
+#define MIN_ZERO_POLL_COUNT_TO_SLEEP 10U
+#define MINIMUM_SLEEP_TIME_US 1U
+#define STANDARD_SLEEP_TIME_US 100U
+#define MAX_EPOLL_TIMEOUT_MS 500U
+static rte_spinlock_t intr_lock[RTE_MAX_ETHPORTS];
/**
* \brief Structure to hold thread specific variables.
TmSlot *slot;
LiveDevice *livedev;
ChecksumValidationMode checksum_mode;
+ bool intr_enabled;
/* references to packet and drop counters */
uint16_t capture_dpdk_packets;
uint16_t capture_dpdk_rx_errs;
static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset);
static uint64_t DPDKGetSeconds(void);
+static bool InterruptsRXEnable(uint16_t port_id, uint16_t queue_id)
+{
+ uint32_t event_data = port_id << UINT16_WIDTH | queue_id;
+ int32_t ret = rte_eth_dev_rx_intr_ctl_q(port_id, queue_id, RTE_EPOLL_PER_THREAD,
+ RTE_INTR_EVENT_ADD, (void *)((uintptr_t)event_data));
+
+ if (ret != 0) {
+ SCLogError("%s-Q%d: failed to enable interrupt mode: %s", DPDKGetPortNameByPortID(port_id),
+ queue_id, rte_strerror(-ret));
+ return false;
+ }
+ return true;
+}
+
+static inline uint32_t InterruptsSleepHeuristic(uint32_t no_pkt_polls_count)
+{
+ if (no_pkt_polls_count < MIN_ZERO_POLL_COUNT_TO_SLEEP)
+ return MINIMUM_SLEEP_TIME_US;
+
+ return STANDARD_SLEEP_TIME_US;
+}
+
+static inline void InterruptsTurnOnOff(uint16_t port_id, uint16_t queue_id, bool on)
+{
+ rte_spinlock_lock(&(intr_lock[port_id]));
+
+ if (on)
+ rte_eth_dev_rx_intr_enable(port_id, queue_id);
+ else
+ rte_eth_dev_rx_intr_disable(port_id, queue_id);
+
+ rte_spinlock_unlock(&(intr_lock[port_id]));
+}
+
static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset)
{
for (int i = offset; i < mbuf_cnt; i++) {
rte_eth_stats_reset(ptv->port_id);
rte_eth_xstats_reset(ptv->port_id);
+
+ uint32_t pwd_zero_rx_packet_polls_count = 0;
+ if (ptv->intr_enabled && !InterruptsRXEnable(ptv->port_id, ptv->queue_id))
+ SCReturnInt(TM_ECODE_FAILED);
+
while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
SCLogDebug("Stopping Suricata!");
TmThreadsCaptureHandleTimeout(tv, NULL);
last_timeout_msec = msecs;
}
- continue;
+
+ if (!ptv->intr_enabled)
+ continue;
+
+ pwd_zero_rx_packet_polls_count++;
+ if (pwd_zero_rx_packet_polls_count <= MIN_ZERO_POLL_COUNT)
+ continue;
+
+ uint32_t pwd_idle_hint = InterruptsSleepHeuristic(pwd_zero_rx_packet_polls_count);
+
+ if (pwd_idle_hint < STANDARD_SLEEP_TIME_US) {
+ rte_delay_us(pwd_idle_hint);
+ } else {
+ InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, true);
+ struct rte_epoll_event event;
+ rte_epoll_wait(RTE_EPOLL_PER_THREAD, &event, 1, MAX_EPOLL_TIMEOUT_MS);
+ InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, false);
+ continue;
+ }
+ } else if (ptv->intr_enabled && pwd_zero_rx_packet_polls_count) {
+ pwd_zero_rx_packet_polls_count = 0;
}
ptv->pkts += (uint64_t)nb_rx;
ptv->checksum_mode = dpdk_config->checksum_mode;
ptv->threads = dpdk_config->threads;
+ ptv->intr_enabled = (dpdk_config->flags & DPDK_IRQ_MODE) ? true : false;
ptv->port_id = dpdk_config->port_id;
ptv->out_port_id = dpdk_config->out_port_id;
ptv->port_socket_id = dpdk_config->socket_id;
"%s: unable to determine NIC's NUMA node, degraded performance can be expected",
dpdk_config->iface);
}
+ if (ptv->intr_enabled) {
+ rte_spinlock_init(&intr_lock[ptv->port_id]);
+ }
}
*data = (void *)ptv;