]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
udev/net: add IRQAffinityPolicy= option for .link files
authorQuentin Deslandes <qde@naccy.de>
Mon, 16 Feb 2026 18:39:04 +0000 (19:39 +0100)
committerQuentin Deslandes <qde@naccy.de>
Wed, 20 May 2026 09:39:10 +0000 (11:39 +0200)
Add support for configuring IRQ affinity for network interfaces via
systemd .link files. For now, the new IRQAffinityPolicy= option in the [Link]
section only accepts "single", which pins all MSI IRQs to CPU 0.

This allows declarative IRQ affinity configuration for network devices
during udev processing, which is useful for optimizing network
performance on multi-core systems.

Further commits will expand the options supported by IRQAffinityPolicy=.

src/udev/net/link-config-gperf.gperf
src/udev/net/link-config.c
src/udev/net/link-config.h
src/udev/net/test-link-config-tables.c
test/units/TEST-17-UDEV.irq-affinity.sh [new file with mode: 0755]

index ef68dab339fff7f211eb6391cdfb8b83bc0bd173..0b63a1e4462795a16051245abd8160214e81d570 100644 (file)
@@ -132,6 +132,8 @@ Link.TxMaxCoalescedHighFrames,             config_parse_coalesce_u32,
 Link.CoalescePacketRateSampleIntervalSec,  config_parse_coalesce_sec,             0,                             offsetof(LinkConfig, coalesce.rate_sample_interval)
 /* Rx RPS CPU mask */
 Link.ReceivePacketSteeringCPUMask,         config_parse_rps_cpu_mask,             0,                             offsetof(LinkConfig, rps_cpu_mask)
+/* IRQ affinity settings */
+Link.IRQAffinityPolicy,                    config_parse_irq_affinity_policy,      0,                             offsetof(LinkConfig, irq_affinity_policy)
 /* SR-IOV settings */
 Link.SR-IOVVirtualFunctions,               config_parse_sr_iov_num_vfs,           0,                             offsetof(LinkConfig, sr_iov_num_vfs)
 SR-IOV.VirtualFunction,                    config_parse_sr_iov_uint32,            0,                             offsetof(LinkConfig, sr_iov_by_section)
index 3c4b886e6759c6a2235d90fb090e18151ee12e67..4a1512365d6b2305e9df6c92ab41ed390d955416 100644 (file)
@@ -15,6 +15,7 @@
 #include "creds-util.h"
 #include "device-private.h"
 #include "device-util.h"
+#include "dirent-util.h"
 #include "escape.h"
 #include "ether-addr-util.h"
 #include "ethtool-util.h"
@@ -270,6 +271,7 @@ int link_load_one(LinkConfigContext *ctx, const char *filename) {
                 .eee_enabled = -1,
                 .eee_tx_lpi_enabled = -1,
                 .eee_tx_lpi_timer_usec = USEC_INFINITY,
+                .irq_affinity_policy = _IRQ_AFFINITY_POLICY_INVALID,
         };
 
         FOREACH_ELEMENT(feature, config->features)
@@ -922,6 +924,96 @@ static int link_apply_sr_iov_config(Link *link) {
         return 0;
 }
 
+static int set_irq_affinity(Link *link, unsigned irq, unsigned cpu) {
+        _cleanup_free_ char *affinity_path = NULL, *mask_str = NULL;
+        unsigned n_groups = cpu / 32;
+        int r;
+
+        assert(link);
+
+        if (asprintf(&affinity_path, "/proc/irq/%u/smp_affinity", irq) < 0)
+                return log_oom();
+
+        /* Convert CPU number to hex bitmask.
+         * For CPU N, set bit N (1 << N). CPUs are split by comma-separated
+         * 32-bits groups. To assign CPU 32, we should write 1,00000000 */
+
+        if (asprintf(&mask_str, "%x", 1U << (cpu % 32)) < 0)
+                return log_oom();
+
+        for (unsigned i = 0; i < n_groups; i++)
+                if (!strextend(&mask_str, ",00000000"))
+                        return log_oom();
+
+        r = write_string_file(affinity_path, mask_str, WRITE_STRING_FILE_DISABLE_BUFFER);
+        if (r < 0)
+                return log_link_warning_errno(link, r, "Failed to set IRQ %u affinity to CPU %u: %m", irq, cpu);
+
+        log_link_debug(link, "Set IRQ %u affinity to CPU %u.", irq, cpu);
+
+        return 0;
+}
+
+static int link_apply_irq_affinity_single(Link *link) {
+        _cleanup_closedir_ DIR *dir = NULL;
+        int r;
+
+        assert(link);
+        assert(link->config);
+        assert(ASSERT_PTR(link->event)->dev);
+
+        r = device_opendir(link->event->dev, "device/msi_irqs", &dir);
+        if (r < 0) {
+                if (r != -ENOENT)
+                        return log_link_error_errno(link, r, "Failed to open device/msi_irqs: %m");
+                log_link_debug_errno(link, r, "No MSI IRQs found, skipping IRQ affinity configuration: %m");
+                return 0;
+        }
+
+        FOREACH_DIRENT(de, dir, return log_link_error_errno(link, errno, "Failed to read directory device/msi_irqs: %m")) {
+                unsigned irq;
+
+                r = safe_atou(de->d_name, &irq);
+                if (r < 0)
+                        return log_link_error_errno(link, r, "Failed to convert parse IRQ number: %s", de->d_name);
+
+                (void) set_irq_affinity(link, irq, /* cpu= */ 0);
+        }
+
+        log_link_info(link, "Applied IRQ affinity policy 'single' (pinning to CPU 0).");
+
+        return 0;
+}
+
+static int link_apply_irq_affinity(Link *link) {
+        _cleanup_(cpu_set_done) CPUSet effective_cpus = {};
+        const char *syspath;
+        int r;
+
+        assert(link);
+        assert(link->config);
+        assert(ASSERT_PTR(link->event)->dev);
+
+        if (link->event->event_mode != EVENT_UDEV_WORKER) {
+                log_link_debug(link, "Running in test mode, skipping application of IRQ affinity settings.");
+                return 0;
+        }
+
+        if (link->config->irq_affinity_policy < 0)
+                return 0;
+
+        r = sd_device_get_syspath(link->event->dev, &syspath);
+        if (r < 0)
+                return log_link_warning_errno(link, r, "Failed to get syspath: %m");
+
+        switch (link->config->irq_affinity_policy) {
+        case IRQ_AFFINITY_POLICY_SINGLE:
+                return link_apply_irq_affinity_single(link);
+        default:
+                assert_not_reached();
+        }
+}
+
 static int link_apply_rps_cpu_mask(Link *link) {
         _cleanup_free_ char *mask_str = NULL;
         LinkConfig *config;
@@ -1051,6 +1143,10 @@ int link_apply_config(LinkConfigContext *ctx, Link *link) {
         if (r < 0)
                 return r;
 
+        r = link_apply_irq_affinity(link);
+        if (r < 0)
+                return r;
+
         return link_apply_udev_properties(link);
 }
 
@@ -1400,3 +1496,14 @@ DEFINE_CONFIG_PARSE_ENUMV(config_parse_name_policy, name_policy, NamePolicy,
 
 DEFINE_CONFIG_PARSE_ENUMV(config_parse_alternative_names_policy, alternative_names_policy, NamePolicy,
                           _NAMEPOLICY_INVALID);
+
+static const char* const irq_affinity_policy_table[_IRQ_AFFINITY_POLICY_MAX] = {
+        [IRQ_AFFINITY_POLICY_SINGLE] = "single",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(irq_affinity_policy, IRQAffinityPolicy);
+DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(
+        config_parse_irq_affinity_policy,
+        irq_affinity_policy,
+        IRQAffinityPolicy,
+        _IRQ_AFFINITY_POLICY_INVALID);
index e00327c01526bbef1ba43290f3c7ea2b6d172f65..b6853cd922c99ff76c20e95feb87979aa2b2f76d 100644 (file)
@@ -22,6 +22,12 @@ typedef enum MACAddressPolicy {
         _MAC_ADDRESS_POLICY_INVALID = -EINVAL,
 } MACAddressPolicy;
 
+typedef enum IRQAffinityPolicy {
+        IRQ_AFFINITY_POLICY_SINGLE,
+        _IRQ_AFFINITY_POLICY_MAX,
+        _IRQ_AFFINITY_POLICY_INVALID = -EINVAL,
+} IRQAffinityPolicy;
+
 typedef struct Link {
         UdevEvent *event;
         LinkConfig *config;
@@ -113,6 +119,9 @@ struct LinkConfig {
         /* Rx RPS CPU mask */
         CPUSet rps_cpu_mask;
 
+        /* IRQ affinity */
+        IRQAffinityPolicy irq_affinity_policy;
+
         /* SR-IOV */
         uint32_t sr_iov_num_vfs;
         OrderedHashmap *sr_iov_by_section;
@@ -136,6 +145,7 @@ int link_get_config(LinkConfigContext *ctx, Link *link);
 int link_apply_config(LinkConfigContext *ctx, Link *link);
 
 DECLARE_STRING_TABLE_LOOKUP(mac_address_policy, MACAddressPolicy);
+DECLARE_STRING_TABLE_LOOKUP(irq_affinity_policy, IRQAffinityPolicy);
 
 /* gperf lookup function */
 const struct ConfigPerfItem* link_config_gperf_lookup(const char *str, GPERF_LEN_TYPE length);
@@ -150,3 +160,4 @@ CONFIG_PARSER_PROTOTYPE(config_parse_mac_address_policy);
 CONFIG_PARSER_PROTOTYPE(config_parse_name_policy);
 CONFIG_PARSER_PROTOTYPE(config_parse_alternative_names_policy);
 CONFIG_PARSER_PROTOTYPE(config_parse_rps_cpu_mask);
+CONFIG_PARSER_PROTOTYPE(config_parse_irq_affinity_policy);
index 499778505c4424946bea514c484c557b3529021b..58b628bc9b388c4a208963c4e43f841fbe156f13 100644 (file)
@@ -8,6 +8,7 @@ int main(int argc, char **argv) {
         test_setup_logging(LOG_DEBUG);
 
         test_table(MACAddressPolicy, mac_address_policy, MAC_ADDRESS_POLICY);
+        test_table(IRQAffinityPolicy, irq_affinity_policy, IRQ_AFFINITY_POLICY);
 
         return EXIT_SUCCESS;
 }
diff --git a/test/units/TEST-17-UDEV.irq-affinity.sh b/test/units/TEST-17-UDEV.irq-affinity.sh
new file mode 100755 (executable)
index 0000000..5fa8bd3
--- /dev/null
@@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eux
+set -o pipefail
+
+# shellcheck source=test/units/util.sh
+. "$(dirname "$0")"/util.sh
+
+# Test IRQAffinityPolicy configuration
+
+# Find a network interface with MSI IRQs (typically virtio-net in the VM)
+# We need a real device to test actual IRQ affinity application
+find_interface_with_msi_irqs() {
+    for iface in /sys/class/net/*; do
+        [[ -e "$iface" ]] || continue
+        iface_name=$(basename "$iface")
+        [[ "$iface_name" == "lo" ]] && continue
+        msi_irqs_path="$iface/device/msi_irqs"
+        if [[ -d "$msi_irqs_path" ]] && [[ -n "$(ls -A "$msi_irqs_path" 2>/dev/null)" ]]; then
+            echo "$iface_name"
+            return 0
+        fi
+    done
+    return 1
+}
+
+get_interface_irqs() {
+    local iface="$1"
+    local msi_irqs_path="/sys/class/net/$iface/device/msi_irqs"
+    if [[ -d "$msi_irqs_path" ]]; then
+        ls "$msi_irqs_path"
+    fi
+}
+
+check_irq_affinity() {
+    local irq="$1"
+    local expected_mask="$2"
+    local affinity
+    affinity=$(cat "/proc/irq/$irq/smp_affinity")
+    # Remove leading zeros and commas for comparison
+    affinity=$(echo "$affinity" | sed 's/^[0,]*//;s/,//g')
+    expected_mask=$(echo "$expected_mask" | sed 's/^[0,]*//;s/,//g')
+    [[ "$affinity" == "$expected_mask" ]]
+}
+
+# Test 1: Verify IRQ affinity is actually applied on a real device
+if iface=$(find_interface_with_msi_irqs); then
+    echo "Found interface with MSI IRQs: $iface"
+
+    # Get the MAC address of the interface
+    mac=$(cat "/sys/class/net/$iface/address")
+
+    # Get IRQs before applying policy
+    irqs=$(get_interface_irqs "$iface")
+    echo "Interface $iface has IRQs: $irqs"
+
+    # Test 1a: test single policy
+    mkdir -p /run/systemd/network/
+    cat >/run/systemd/network/00-test-irq-affinity.link <<EOF
+[Match]
+MACAddress=$mac
+
+[Link]
+IRQAffinityPolicy=single
+EOF
+
+    udevadm control --reload
+
+    # Trigger udev to re-apply the link configuration
+    udevadm trigger --action=add "/sys/class/net/$iface"
+    udevadm settle --timeout=30
+
+    # Verify the link file was applied
+    output=$(udevadm info --query property "/sys/class/net/$iface")
+    assert_in "ID_NET_LINK_FILE=/run/systemd/network/00-test-irq-affinity.link" "$output"
+
+    # Verify IRQ affinity was actually set to CPU 0 (mask "1")
+    for irq in $irqs; do
+        if check_irq_affinity "$irq" "1"; then
+            echo "IRQ $irq correctly pinned to CPU 0"
+        else
+            actual=$(cat "/proc/irq/$irq/smp_affinity")
+            echo "IRQ $irq affinity is '$actual', expected '1' (CPU 0)"
+            exit 1
+        fi
+    done
+
+    # Cleanup
+    rm -f /run/systemd/network/00-test-irq-affinity.link
+    udevadm control --reload
+else
+    echo "No interface with MSI IRQs found, skipping actual IRQ affinity test"
+fi
+
+# Test 2: Config parsing with dummy interfaces (no MSI IRQs)
+mkdir -p /run/systemd/network/
+cat >/run/systemd/network/10-test-irq.link <<EOF
+[Match]
+Kind=dummy
+MACAddress=00:50:56:c0:00:20
+
+[Link]
+Name=testirq0
+IRQAffinityPolicy=single
+EOF
+
+udevadm control --reload
+
+# Create a dummy interface
+ip link add address 00:50:56:c0:00:20 type dummy
+udevadm wait --settle --timeout=30 /sys/class/net/testirq0
+
+# Check that the link file was applied
+output=$(udevadm info --query property /sys/class/net/testirq0)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq.link" "$output"
+assert_in "ID_NET_NAME=testirq0" "$output"
+
+# Test that udevadm test-builtin parses the config correctly
+output=$(udevadm test-builtin --action add net_setup_link /sys/class/net/testirq0 2>&1)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq.link" "$output"
+
+# Test 3: Invalid policy values are rejected/warned
+cat >/run/systemd/network/10-test-irq-invalid.link <<EOF
+[Match]
+Kind=dummy
+MACAddress=00:50:56:c0:00:21
+
+[Link]
+Name=testirq1
+IRQAffinityPolicy=invalid_policy
+EOF
+
+udevadm control --reload
+
+# Create another dummy interface - invalid policy should be ignored
+ip link add address 00:50:56:c0:00:21 type dummy
+udevadm wait --settle --timeout=30 /sys/class/net/testirq1
+
+# Check that the link file was still applied (invalid policy is just ignored/warned)
+output=$(udevadm info --query property /sys/class/net/testirq1)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-invalid.link" "$output"
+
+# Test 4: Empty policy (reset/disable)
+cat >/run/systemd/network/10-test-irq-empty.link <<EOF
+[Match]
+Kind=dummy
+MACAddress=00:50:56:c0:00:22
+
+[Link]
+Name=testirq2
+IRQAffinityPolicy=
+EOF
+
+udevadm control --reload
+
+ip link add address 00:50:56:c0:00:22 type dummy
+udevadm wait --settle --timeout=30 /sys/class/net/testirq2
+
+output=$(udevadm info --query property /sys/class/net/testirq2)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-empty.link" "$output"
+
+# Cleanup
+ip link del dev testirq0
+ip link del dev testirq1
+ip link del dev testirq2
+
+rm -f /run/systemd/network/10-test-irq.link
+rm -f /run/systemd/network/10-test-irq-invalid.link
+rm -f /run/systemd/network/10-test-irq-empty.link
+
+exit 0