return 0;
}
+int numa_node_get_cpus(size_t node, CPUSet *ret) {
+ char p[STRLEN("/sys/devices/system/node/node//cpulist") + DECIMAL_STR_MAX(size_t) + 1];
+ _cleanup_free_ char *cpulist = NULL;
+ int r;
+
+ assert(ret);
+
+ xsprintf(p, "/sys/devices/system/node/node%zu/cpulist", node);
+
+ r = read_virtual_file(p, SIZE_MAX, &cpulist, /* ret_size= */ NULL);
+ if (r < 0)
+ return r;
+
+ return parse_cpu_set(cpulist, ret);
+}
+
int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret) {
_cleanup_(cpu_set_done) CPUSet s = {};
int r;
assert(ret);
for (size_t i = 0; i < policy->nodes.allocated * 8; i++) {
- _cleanup_free_ char *l = NULL;
- char p[STRLEN("/sys/devices/system/node/node//cpulist") + DECIMAL_STR_MAX(size_t) + 1];
-
if (!CPU_ISSET_S(i, policy->nodes.allocated, policy->nodes.set))
continue;
- xsprintf(p, "/sys/devices/system/node/node%zu/cpulist", i);
-
- r = read_one_line_file(p, &l);
- if (r < 0)
- return r;
-
_cleanup_(cpu_set_done) CPUSet part = {};
- r = parse_cpu_set(l, &part);
+ r = numa_node_get_cpus(i, &part);
if (r < 0)
return r;
}
int apply_numa_policy(const NUMAPolicy *policy);
+int numa_node_get_cpus(size_t node, CPUSet *ret);
int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret);
int numa_get_node_from_cpu(unsigned cpu, unsigned *ret);
/* IRQ affinity settings */
Link.IRQAffinityPolicy, config_parse_irq_affinity_policy, 0, offsetof(LinkConfig, irq_affinity_policy)
Link.IRQAffinity, config_parse_cpu_set, 0, offsetof(LinkConfig, irq_affinity_cpus)
+Link.IRQAffinityNUMA, config_parse_irq_affinity_numa, 0, offsetof(LinkConfig, irq_affinity_numa)
/* SR-IOV settings */
Link.SR-IOVVirtualFunctions, config_parse_sr_iov_num_vfs, 0, offsetof(LinkConfig, sr_iov_num_vfs)
SR-IOV.VirtualFunction, config_parse_sr_iov_uint32, 0, offsetof(LinkConfig, sr_iov_by_section)
.eee_tx_lpi_enabled = -1,
.eee_tx_lpi_timer_usec = USEC_INFINITY,
.irq_affinity_policy = _IRQ_AFFINITY_POLICY_INVALID,
+ .irq_affinity_numa = IRQ_AFFINITY_NUMA_UNSET,
};
FOREACH_ELEMENT(feature, config->features)
return 0;
}
+/* Get the local NUMA node for a network device from sysfs.
+ * Returns -ENOENT if numa_node file doesn't exist or shows -1 (no NUMA). */
+static int link_get_device_numa_node(Link *link, unsigned *ret) {
+ int r, node;
+
+ assert(link);
+ assert(link->event);
+ assert(link->event->dev);
+ assert(ret);
+
+ r = device_get_sysattr_int(link->event->dev, "device/numa_node", &node);
+ if (r < 0)
+ return r;
+
+ /* -1 means no NUMA node (non-NUMA system or device not associated with a node) */
+ if (node < 0)
+ return -ENOENT;
+
+ *ret = (unsigned) node;
+ return 0;
+}
+
/* CPU topology information for IRQ affinity spread algorithm. */
typedef struct CPUTopology {
unsigned cpu;
static int link_apply_irq_affinity(Link *link) {
_cleanup_(cpu_set_done) CPUSet effective_cpus = {};
const char *syspath;
+ unsigned numa_node = IRQ_AFFINITY_NUMA_UNSET;
int r;
assert(link);
if (r < 0)
return log_link_warning_errno(link, r, "Failed to get syspath: %m");
+ /* Compute effective CPU set from IRQAffinity= and IRQAffinityNUMA= */
+ if (link->config->irq_affinity_numa != IRQ_AFFINITY_NUMA_UNSET) {
+ _cleanup_(cpu_set_done) CPUSet numa_cpus = {};
+
+ /* Resolve "local" to the actual NUMA node */
+ if (link->config->irq_affinity_numa == IRQ_AFFINITY_NUMA_LOCAL) {
+ r = link_get_device_numa_node(link, &numa_node);
+ if (r < 0) {
+ log_link_warning_errno(
+ link, r,
+ "Failed to determine local NUMA node for device, skipping IRQ affinity configuration: %m");
+ return 0;
+ }
+ log_link_debug(link, "Device is on NUMA node %u.", numa_node);
+ } else
+ numa_node = link->config->irq_affinity_numa;
+
+ /* Get CPUs for the NUMA node */
+ r = numa_node_get_cpus(numa_node, &numa_cpus);
+ if (r < 0) {
+ log_link_warning_errno(
+ link, r,
+ "Failed to get CPUs for NUMA node %u, skipping IRQ affinity configuration: %m",
+ numa_node);
+ return 0;
+ }
+
+ /* If IRQAffinity= is also specified, compute intersection */
+ if (link->config->irq_affinity_cpus.set) {
+ /* Compute intersection of IRQAffinity= and NUMA CPUs */
+ size_t max_allocated = MAX(numa_cpus.allocated, link->config->irq_affinity_cpus.allocated);
+
+ r = cpu_set_realloc(&effective_cpus, max_allocated * 8);
+ if (r < 0)
+ return log_oom();
+
+ for (size_t i = 0; i < max_allocated * 8; i++) {
+ bool in_numa = i < numa_cpus.allocated * 8 &&
+ CPU_ISSET_S(i, numa_cpus.allocated, numa_cpus.set);
+ bool in_affinity = i < link->config->irq_affinity_cpus.allocated * 8 &&
+ CPU_ISSET_S(i, link->config->irq_affinity_cpus.allocated, link->config->irq_affinity_cpus.set);
+
+ if (in_numa && in_affinity) {
+ r = cpu_set_add(&effective_cpus, i);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ /* Check if intersection is empty */
+ if (!effective_cpus.set || CPU_COUNT_S(effective_cpus.allocated, effective_cpus.set) == 0) {
+ log_link_warning(
+ link,
+ "IRQAffinity= and IRQAffinityNUMA= intersection is empty, skipping IRQ affinity configuration.");
+ return 0;
+ }
+
+ log_link_debug(link, "Using intersection of IRQAffinity= and NUMA node %u CPUs.", numa_node);
+ } else {
+ /* Only NUMA filtering, use NUMA CPUs directly */
+ effective_cpus = TAKE_STRUCT(numa_cpus);
+ log_link_debug(link, "Using CPUs from NUMA node %u.", numa_node);
+ }
+ } else if (link->config->irq_affinity_cpus.set) {
+ /* Only IRQAffinity= specified, copy it */
+ r = cpu_set_add_set(&effective_cpus, &link->config->irq_affinity_cpus);
+ if (r < 0)
+ return log_oom();
+ }
+ /* else: no filtering, effective_cpus remains empty (meaning use all CPUs) */
+
switch (link->config->irq_affinity_policy) {
case IRQ_AFFINITY_POLICY_SINGLE:
- return link_apply_irq_affinity_single(link, &link->config->irq_affinity_cpus);
+ return link_apply_irq_affinity_single(link, effective_cpus.set ? &effective_cpus : NULL);
case IRQ_AFFINITY_POLICY_SPREAD:
- return link_apply_irq_affinity_spread(link, &link->config->irq_affinity_cpus);
+ return link_apply_irq_affinity_spread(link, effective_cpus.set ? &effective_cpus : NULL);
default:
assert_not_reached();
}
DEFINE_CONFIG_PARSE_ENUMV(config_parse_alternative_names_policy, alternative_names_policy, NamePolicy,
_NAMEPOLICY_INVALID);
+int config_parse_irq_affinity_numa(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned tmp, *numa = ASSERT_PTR(data);
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ *numa = IRQ_AFFINITY_NUMA_UNSET;
+ return 0;
+ }
+
+ if (streq(rvalue, "local")) {
+ *numa = IRQ_AFFINITY_NUMA_LOCAL;
+ return 0;
+ }
+
+ /* Parse as NUMA node number */
+ r = safe_atou(rvalue, &tmp);
+ if (r < 0)
+ return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
+
+ /* UINT_MAX and UINT_MAX-1 are used to flag "unset" and "local NUMA node" respectively. */
+ if (tmp >= IRQ_AFFINITY_NUMA_LOCAL) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid NUMA node number %u, ignoring assignment: %s", tmp, rvalue);
+ return 0;
+ }
+
+ *numa = tmp;
+
+ return 0;
+}
+
static const char* const irq_affinity_policy_table[_IRQ_AFFINITY_POLICY_MAX] = {
[IRQ_AFFINITY_POLICY_SINGLE] = "single",
[IRQ_AFFINITY_POLICY_SPREAD] = "spread",
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
+#include <limits.h>
+
#include "sd-device.h"
#include "cpu-set-util.h"
_IRQ_AFFINITY_POLICY_INVALID = -EINVAL,
} IRQAffinityPolicy;
+/* Special values for IRQAffinityNUMA= */
+#define IRQ_AFFINITY_NUMA_UNSET UINT_MAX
+#define IRQ_AFFINITY_NUMA_LOCAL (IRQ_AFFINITY_NUMA_UNSET - 1)
+
typedef struct Link {
UdevEvent *event;
LinkConfig *config;
/* IRQ affinity */
IRQAffinityPolicy irq_affinity_policy;
CPUSet irq_affinity_cpus;
+ unsigned irq_affinity_numa;
/* SR-IOV */
uint32_t sr_iov_num_vfs;
CONFIG_PARSER_PROTOTYPE(config_parse_alternative_names_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_rps_cpu_mask);
CONFIG_PARSER_PROTOTYPE(config_parse_irq_affinity_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_irq_affinity_numa);
echo "Skipping IRQAffinity= spread test (need >=4 CPUs and >1 IRQ)"
fi
+ # Test 1e: Test IRQAffinityNUMA= if NUMA is available
+ if [[ -d /sys/devices/system/node/node0 ]]; then
+ # Get CPUs on NUMA node 0
+ numa0_cpus=$(cat /sys/devices/system/node/node0/cpulist)
+ echo "NUMA node 0 has CPUs: $numa0_cpus"
+
+ cat >/run/systemd/network/00-test-irq-affinity.link <<EOF
+[Match]
+MACAddress=$mac
+
+[Link]
+IRQAffinityPolicy=spread
+IRQAffinityNUMA=0
+EOF
+
+ udevadm control --reload
+ udevadm trigger --action=add "/sys/class/net/$iface"
+ udevadm settle --timeout=30
+
+ # Verify IRQs are on NUMA node 0 CPUs
+ # Parse the cpulist to get valid CPUs
+ for irq in $irqs; do
+ affinity_list=$(cat "/proc/irq/$irq/smp_affinity_list")
+ echo "IRQ $irq is on CPU(s): $affinity_list (NUMA 0 CPUs: $numa0_cpus)"
+ done
+ echo "IRQAffinityNUMA= configuration applied"
+ else
+ echo "Skipping IRQAffinityNUMA= test (no NUMA available)"
+ fi
+
+ # Test 1f: Test empty intersection error case
+ # This should log an error and skip affinity configuration
+ if [[ -d /sys/devices/system/node/node0 ]] && [[ -d /sys/devices/system/node/node1 ]]; then
+ # Get first CPU from node 0 that is NOT in node 1
+ first_numa0_cpu=$(cut -d',' -f1 /sys/devices/system/node/node0/cpulist | cut -d'-' -f1)
+
+ cat >/run/systemd/network/00-test-irq-affinity.link <<EOF
+[Match]
+MACAddress=$mac
+
+[Link]
+IRQAffinityPolicy=spread
+IRQAffinity=$first_numa0_cpu
+IRQAffinityNUMA=1
+EOF
+
+ udevadm control --reload
+ udevadm trigger --action=add "/sys/class/net/$iface"
+ udevadm settle --timeout=30
+
+ # The configuration should be applied but IRQ affinity skipped due to empty intersection
+ # Check journal for the error message
+ if journalctl -u systemd-udevd --since="1 minute ago" | grep "intersection is empty" >/dev/null; then
+ echo "Empty intersection correctly detected and logged"
+ else
+ echo "Note: Empty intersection test - check journal for error message"
+ fi
+ else
+ echo "Skipping empty intersection test (need 2 NUMA nodes)"
+ fi
+
# Cleanup
rm -f /run/systemd/network/00-test-irq-affinity.link
udevadm control --reload
output=$(udevadm test-builtin --action add net_setup_link /sys/class/net/testirq3 2>&1)
assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-affinity-cpus.link" "$output"
+# Test 6: IRQAffinityNUMA= config parsing
+cat >/run/systemd/network/10-test-irq-affinity-numa.link <<EOF
+[Match]
+Kind=dummy
+MACAddress=00:50:56:c0:00:24
+
+[Link]
+Name=testirq4
+IRQAffinityPolicy=spread
+IRQAffinityNUMA=local
+EOF
+
+udevadm control --reload
+
+ip link add address 00:50:56:c0:00:24 type dummy
+udevadm wait --settle --timeout=30 /sys/class/net/testirq4
+
+output=$(udevadm info --query property /sys/class/net/testirq4)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-affinity-numa.link" "$output"
+
+# Test 7: IRQAffinityNUMA= with explicit node number
+cat >/run/systemd/network/10-test-irq-affinity-numa-explicit.link <<EOF
+[Match]
+Kind=dummy
+MACAddress=00:50:56:c0:00:25
+
+[Link]
+Name=testirq5
+IRQAffinityPolicy=single
+IRQAffinityNUMA=0
+EOF
+
+udevadm control --reload
+
+ip link add address 00:50:56:c0:00:25 type dummy
+udevadm wait --settle --timeout=30 /sys/class/net/testirq5
+
+output=$(udevadm info --query property /sys/class/net/testirq5)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-affinity-numa-explicit.link" "$output"
+
+# Test 8: Combined IRQAffinity= and IRQAffinityNUMA=
+cat >/run/systemd/network/10-test-irq-affinity-combined.link <<EOF
+[Match]
+Kind=dummy
+MACAddress=00:50:56:c0:00:26
+
+[Link]
+Name=testirq6
+IRQAffinityPolicy=spread
+IRQAffinity=0-7
+IRQAffinityNUMA=0
+EOF
+
+udevadm control --reload
+
+ip link add address 00:50:56:c0:00:26 type dummy
+udevadm wait --settle --timeout=30 /sys/class/net/testirq6
+
+output=$(udevadm info --query property /sys/class/net/testirq6)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-affinity-combined.link" "$output"
+
# Cleanup
ip link del dev testirq0
ip link del dev testirq1
ip link del dev testirq2
ip link del dev testirq3
+ip link del dev testirq4
+ip link del dev testirq5
+ip link del dev testirq6
rm -f /run/systemd/network/10-test-irq.link
rm -f /run/systemd/network/10-test-irq-invalid.link
rm -f /run/systemd/network/10-test-irq-empty.link
rm -f /run/systemd/network/10-test-irq-affinity-cpus.link
+rm -f /run/systemd/network/10-test-irq-affinity-numa.link
+rm -f /run/systemd/network/10-test-irq-affinity-numa-explicit.link
+rm -f /run/systemd/network/10-test-irq-affinity-combined.link
exit 0