]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
udev/net: add IRQAffinity= option to filter eligible CPUs
authorQuentin Deslandes <qde@naccy.de>
Mon, 16 Feb 2026 19:40:38 +0000 (20:40 +0100)
committerQuentin Deslandes <qde@naccy.de>
Wed, 20 May 2026 09:39:10 +0000 (11:39 +0200)
Add IRQAffinity= option to .link files that filters the set of CPUs
eligible for IRQ placement. This works in conjunction with
IRQAffinityPolicy= to constrain which CPUs receive network IRQs.

When specified with spread policy, only the listed CPUs are considered
for IRQ distribution. When specified with single policy, IRQs are
pinned to the first CPU in the allowed set instead of CPU 0.

src/udev/net/link-config-gperf.gperf
src/udev/net/link-config.c
src/udev/net/link-config.h
test/units/TEST-17-UDEV.irq-affinity.sh

index 0b63a1e4462795a16051245abd8160214e81d570..90eebe120d1551d94922765baf4e8923e7feb554 100644 (file)
@@ -134,6 +134,7 @@ Link.CoalescePacketRateSampleIntervalSec,  config_parse_coalesce_sec,
 Link.ReceivePacketSteeringCPUMask,         config_parse_rps_cpu_mask,             0,                             offsetof(LinkConfig, rps_cpu_mask)
 /* IRQ affinity settings */
 Link.IRQAffinityPolicy,                    config_parse_irq_affinity_policy,      0,                             offsetof(LinkConfig, irq_affinity_policy)
+Link.IRQAffinity,                          config_parse_cpu_set,                  0,                             offsetof(LinkConfig, irq_affinity_cpus)
 /* SR-IOV settings */
 Link.SR-IOVVirtualFunctions,               config_parse_sr_iov_num_vfs,           0,                             offsetof(LinkConfig, sr_iov_num_vfs)
 SR-IOV.VirtualFunction,                    config_parse_sr_iov_uint32,            0,                             offsetof(LinkConfig, sr_iov_by_section)
index 6211081d79a647605202847b04982e978df8a3eb..5fd59f9453a6664f753c7b7f3015d94b4d5c7dfd 100644 (file)
@@ -83,6 +83,7 @@ static LinkConfig* link_config_free(LinkConfig *config) {
         free(config->wol_password_file);
         erase_and_free(config->wol_password);
         cpu_set_done(&config->rps_cpu_mask);
+        cpu_set_done(&config->irq_affinity_cpus);
 
         ordered_hashmap_free(config->sr_iov_by_section);
 
@@ -1408,7 +1409,7 @@ static int set_irq_affinity(Link *link, unsigned irq, unsigned cpu) {
         return 0;
 }
 
-static int link_apply_irq_affinity_spread(Link *link) {
+static int link_apply_irq_affinity_spread(Link *link, const CPUSet *allowed_cpus) {
         _cleanup_closedir_ DIR *dir = NULL;
         _cleanup_free_ CPUTopology *topology = NULL;
         _cleanup_free_ unsigned *irqs = NULL;
@@ -1450,7 +1451,30 @@ static int link_apply_irq_affinity_spread(Link *link) {
         if (r < 0)
                 return log_link_error_errno(link, r, "Failed to discover CPU topology: %m");
 
-        log_link_debug(link, "Discovered %zu CPUs, spreading %zu IRQs.", topology_count, irq_count);
+        /* Filter topology by allowed CPUs if specified */
+        if (allowed_cpus && allowed_cpus->set) {
+                _cleanup_free_ CPUTopology *filtered_topology = new(CPUTopology, topology_count);
+                size_t filtered_count = 0;
+
+                if (!filtered_topology)
+                        return log_oom();
+
+                for (size_t i = 0; i < topology_count; i++)
+                        if (CPU_ISSET_S(topology[i].cpu, allowed_cpus->allocated, allowed_cpus->set))
+                                filtered_topology[filtered_count++] = topology[i];
+
+                if (filtered_count == 0) {
+                        log_link_warning(link, "IRQAffinity= filter excludes all CPUs, skipping spread.");
+                        return 0;
+                }
+
+                log_link_debug(link, "Filtered to %zu CPUs (from %zu) based on IRQAffinity=.", filtered_count, topology_count);
+
+                free_and_replace(topology, filtered_topology);
+                topology_count = filtered_count;
+        }
+
+        log_link_debug(link, "Spreading %zu IRQs across %zu CPUs.", irq_count, topology_count);
 
         /* Select CPUs using maximum distance algorithm */
         r = select_spread_cpus(topology, topology_count, irq_count, &spread_cpus, &spread_count);
@@ -1466,14 +1490,32 @@ static int link_apply_irq_affinity_spread(Link *link) {
         return 0;
 }
 
-static int link_apply_irq_affinity_single(Link *link) {
+static int link_apply_irq_affinity_single(Link *link, const CPUSet *allowed_cpus) {
         _cleanup_closedir_ DIR *dir = NULL;
+        unsigned target_cpu = 0;
         int r;
 
         assert(link);
         assert(link->config);
         assert(ASSERT_PTR(link->event)->dev);
 
+        /* If IRQAffinity= is specified, use the first allowed CPU instead of CPU 0 */
+        if (allowed_cpus && allowed_cpus->set) {
+                bool found = false;
+
+                for (unsigned cpu = 0; cpu < allowed_cpus->allocated * 8; cpu++)
+                        if (CPU_ISSET_S(cpu, allowed_cpus->allocated, allowed_cpus->set)) {
+                                target_cpu = cpu;
+                                found = true;
+                                break;
+                        }
+
+                if (!found) {
+                        log_link_warning(link, "IRQAffinity= filter excludes all CPUs, skipping single.");
+                        return 0;
+                }
+        }
+
         r = device_opendir(link->event->dev, "device/msi_irqs", &dir);
         if (r < 0) {
                 if (r != -ENOENT)
@@ -1489,10 +1531,10 @@ static int link_apply_irq_affinity_single(Link *link) {
                 if (r < 0)
                         return log_link_error_errno(link, r, "Failed to convert parse IRQ number: %s", de->d_name);
 
-                (void) set_irq_affinity(link, irq, /* cpu= */ 0);
+                (void) set_irq_affinity(link, irq, target_cpu);
         }
 
-        log_link_info(link, "Applied IRQ affinity policy 'single' (pinning to CPU 0).");
+        log_link_info(link, "Applied IRQ affinity policy 'single' (pinning to CPU %u).", target_cpu);
 
         return 0;
 }
@@ -1520,9 +1562,9 @@ static int link_apply_irq_affinity(Link *link) {
 
         switch (link->config->irq_affinity_policy) {
         case IRQ_AFFINITY_POLICY_SINGLE:
-                return link_apply_irq_affinity_single(link);
+                return link_apply_irq_affinity_single(link, &link->config->irq_affinity_cpus);
         case IRQ_AFFINITY_POLICY_SPREAD:
-                return link_apply_irq_affinity_spread(link);
+                return link_apply_irq_affinity_spread(link, &link->config->irq_affinity_cpus);
         default:
                 assert_not_reached();
         }
index ff581d8b021eb261a78423348b54b71106e52c84..da28f569807d8f9fd9ab6d4d9e44ce6594bbb1a7 100644 (file)
@@ -122,6 +122,7 @@ struct LinkConfig {
 
         /* IRQ affinity */
         IRQAffinityPolicy irq_affinity_policy;
+        CPUSet irq_affinity_cpus;
 
         /* SR-IOV */
         uint32_t sr_iov_num_vfs;
index f9dde104fa6a720d7a9dbbf72a619753d0af4c96..7c3662243e2949d8b9590f21da5b8cd6226a60a0 100755 (executable)
@@ -126,6 +126,69 @@ EOF
         echo "Skipping spread verification (need >1 CPU and >1 IRQ)"
     fi
 
+    # Test 1c: Test IRQAffinity= CPU filtering with single policy
+    # Pin to CPU 1 instead of default CPU 0
+    cat >/run/systemd/network/00-test-irq-affinity.link <<EOF
+[Match]
+MACAddress=$mac
+
+[Link]
+IRQAffinityPolicy=single
+IRQAffinity=1
+EOF
+
+    udevadm control --reload
+    udevadm trigger --action=add "/sys/class/net/$iface"
+    udevadm settle --timeout=30
+
+    if [[ "$n_cpus" -gt 1 ]]; then
+        for irq in $irqs; do
+            echo "Checking IRQ $irq affinity with IRQAffinity=1..."
+            if check_irq_affinity "$irq" "2"; then
+                echo "IRQ $irq correctly pinned to CPU 1"
+            else
+                actual=$(cat "/proc/irq/$irq/smp_affinity")
+                echo "IRQ $irq affinity is '$actual', expected '2' (CPU 1)"
+                exit 1
+            fi
+        done
+        echo "IRQAffinity= filtering with single policy works correctly"
+    else
+        echo "Skipping IRQAffinity= test (need >1 CPU)"
+    fi
+
+    # Test 1d: Test IRQAffinity= with spread policy (restrict to subset of CPUs)
+    if [[ "$n_cpus" -ge 4 ]] && [[ "$irq_count" -gt 1 ]]; then
+        cat >/run/systemd/network/00-test-irq-affinity.link <<EOF
+[Match]
+MACAddress=$mac
+
+[Link]
+IRQAffinityPolicy=spread
+IRQAffinity=0-1
+EOF
+
+        udevadm control --reload
+        udevadm trigger --action=add "/sys/class/net/$iface"
+        udevadm settle --timeout=30
+
+        # Verify all IRQs are on CPU 0 or 1 only
+        for irq in $irqs; do
+            affinity_list=$(cat "/proc/irq/$irq/smp_affinity_list")
+            echo "IRQ $irq is on CPU(s): $affinity_list"
+            # Check that affinity is 0 or 1 (not higher CPUs)
+            if [[ "$affinity_list" =~ ^[01]$ ]]; then
+                echo "IRQ $irq correctly restricted to CPUs 0-1"
+            else
+                echo "ERROR: IRQ $irq is on CPU $affinity_list, expected 0 or 1"
+                exit 1
+            fi
+        done
+        echo "IRQAffinity= filtering with spread policy works correctly"
+    else
+        echo "Skipping IRQAffinity= spread test (need >=4 CPUs and >1 IRQ)"
+    fi
+
     # Cleanup
     rm -f /run/systemd/network/00-test-irq-affinity.link
     udevadm control --reload
@@ -200,13 +263,39 @@ udevadm wait --settle --timeout=30 /sys/class/net/testirq2
 output=$(udevadm info --query property /sys/class/net/testirq2)
 assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-empty.link" "$output"
 
+# Test 5: IRQAffinity= config parsing
+cat >/run/systemd/network/10-test-irq-affinity-cpus.link <<EOF
+[Match]
+Kind=dummy
+MACAddress=00:50:56:c0:00:23
+
+[Link]
+Name=testirq3
+IRQAffinityPolicy=spread
+IRQAffinity=0-3,8-11
+EOF
+
+udevadm control --reload
+
+ip link add address 00:50:56:c0:00:23 type dummy
+udevadm wait --settle --timeout=30 /sys/class/net/testirq3
+
+output=$(udevadm info --query property /sys/class/net/testirq3)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-affinity-cpus.link" "$output"
+
+# Test that udevadm test-builtin parses the IRQAffinity= config correctly
+output=$(udevadm test-builtin --action add net_setup_link /sys/class/net/testirq3 2>&1)
+assert_in "ID_NET_LINK_FILE=/run/systemd/network/10-test-irq-affinity-cpus.link" "$output"
+
 # Cleanup
 ip link del dev testirq0
 ip link del dev testirq1
 ip link del dev testirq2
+ip link del dev testirq3
 
 rm -f /run/systemd/network/10-test-irq.link
 rm -f /run/systemd/network/10-test-irq-invalid.link
 rm -f /run/systemd/network/10-test-irq-empty.link
+rm -f /run/systemd/network/10-test-irq-affinity-cpus.link
 
 exit 0