]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add support for setting CPUAffinity= to special "numa" value 14749/head
authorMichal Sekletár <msekleta@redhat.com>
Mon, 17 Feb 2020 12:50:31 +0000 (13:50 +0100)
committerMichal Sekletár <msekleta@redhat.com>
Mon, 16 Mar 2020 07:57:28 +0000 (08:57 +0100)
systemd will automatically derive CPU affinity mask from NUMA node
mask.

Fixes #13248

man/systemd.exec.xml
src/core/dbus-execute.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment.c
src/shared/bus-unit-util.c
src/shared/cpu-set-util.c
src/test/test-cpu-set-util.c
test/TEST-36-NUMAPOLICY/testsuite.sh

index 8f1695ad293f6113a4b32f58c18822687c4f8928..79a2c744c64fb15fdb9b05a986d6d89410099c20 100644 (file)
@@ -774,10 +774,11 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
         <term><varname>CPUAffinity=</varname></term>
 
         <listitem><para>Controls the CPU affinity of the executed processes. Takes a list of CPU indices or ranges
-        separated by either whitespace or commas. CPU ranges are specified by the lower and upper CPU indices separated
-        by a dash. This option may be specified more than once, in which case the specified CPU affinity masks are
-        merged. If the empty string is assigned, the mask is reset, all assignments prior to this will have no
-        effect. See
+        separated by either whitespace or commas. Alternatively, takes a special "numa" value in which case systemd
+        automatically derives allowed CPU range based on the value of <varname>NUMAMask=</varname> option. CPU ranges
+        are specified by the lower and upper CPU indices separated by a dash. This option may be specified more than
+        once, in which case the specified CPU affinity masks are merged. If the empty string is assigned, the mask
+        is reset, all assignments prior to this will have no effect. See
         <citerefentry><refentrytitle>sched_setaffinity</refentrytitle><manvolnum>2</manvolnum></citerefentry> for
         details.</para></listitem>
       </varlistentry>
index d8ba3e5d9241e763c3d701f218e0080855ab5950..e8be76e315c56c8e90dd4f025607d0211205893f 100644 (file)
@@ -56,6 +56,8 @@ static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext,
 static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
 static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
 static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
+static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa);
+
 
 static int property_get_environment_files(
                 sd_bus *bus,
@@ -213,6 +215,7 @@ static int property_get_cpu_affinity(
                 sd_bus_error *error) {
 
         ExecContext *c = userdata;
+        _cleanup_(cpu_set_reset) CPUSet s = {};
         _cleanup_free_ uint8_t *array = NULL;
         size_t allocated;
 
@@ -220,7 +223,16 @@ static int property_get_cpu_affinity(
         assert(reply);
         assert(c);
 
-        (void) cpu_set_to_dbus(&c->cpu_set, &array, &allocated);
+        if (c->cpu_affinity_from_numa) {
+                int r;
+
+                r = numa_to_cpu_set(&c->numa_policy, &s);
+                if (r < 0)
+                        return r;
+        }
+
+        (void) cpu_set_to_dbus(c->cpu_affinity_from_numa ? &s : &c->cpu_set,  &array, &allocated);
+
         return sd_bus_message_append_array(reply, 'y', array, allocated);
 }
 
@@ -741,6 +753,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("CPUAffinityFromNUMA", "b", property_get_cpu_affinity_from_numa, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1770,6 +1783,20 @@ int bus_exec_context_set_transient_property(
 
                 return 1;
 
+        } else if (streq(name, "CPUAffinityFromNUMA")) {
+                int q;
+
+                r = sd_bus_message_read_basic(message, 'b', &q);
+                if (r < 0)
+                        return r;
+
+                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                        c->cpu_affinity_from_numa = q;
+                        unit_write_settingf(u, flags, name, "%s=%s", "CPUAffinity", "numa");
+                }
+
+                return 1;
+
         } else if (streq(name, "NUMAPolicy")) {
                 int32_t type;
 
@@ -1784,6 +1811,7 @@ int bus_exec_context_set_transient_property(
                         c->numa_policy.type = type;
 
                 return 1;
+
         } else if (streq(name, "Nice")) {
                 int32_t q;
 
index 00a2f2e17e477d0610e680946c81d11f8346ddb9..8e1e77b4b2a5442c741943f978def1495ca85898 100644 (file)
@@ -3021,6 +3021,33 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **
         return using_subcgroup;
 }
 
+static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
+        _cleanup_(cpu_set_reset) CPUSet s = {};
+        int r;
+
+        assert(c);
+        assert(ret);
+
+        if (!c->numa_policy.nodes.set) {
+                log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
+                return 0;
+        }
+
+        r = numa_to_cpu_set(&c->numa_policy, &s);
+        if (r < 0)
+                return r;
+
+        cpu_set_reset(ret);
+
+        return cpu_set_add_all(ret, &s);
+}
+
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
+        assert(c);
+
+        return c->cpu_affinity_from_numa;
+}
+
 static int exec_child(
                 Unit *unit,
                 const ExecCommand *command,
@@ -3318,11 +3345,26 @@ static int exec_child(
                 }
         }
 
-        if (context->cpu_set.set)
-                if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
+        if (context->cpu_affinity_from_numa || context->cpu_set.set) {
+                _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
+                const CPUSet *cpu_set;
+
+                if (context->cpu_affinity_from_numa) {
+                        r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
+                        if (r < 0) {
+                                *exit_status = EXIT_CPUAFFINITY;
+                                return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
+                        }
+
+                        cpu_set = &converted_cpu_set;
+                } else
+                        cpu_set = &context->cpu_set;
+
+                if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
                         *exit_status = EXIT_CPUAFFINITY;
                         return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
                 }
+        }
 
         if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
                 r = apply_numa_policy(&context->numa_policy);
index 6cfa70679f9a7d7be4e83ca8d6edad9922a39ba2..4baf5b1a405f942d7d00de64ab4a6eedd30b03a5 100644 (file)
@@ -182,6 +182,7 @@ struct ExecContext {
 
         CPUSet cpu_set;
         NUMAPolicy numa_policy;
+        bool cpu_affinity_from_numa;
 
         ExecInput std_input;
         ExecOutput std_output;
@@ -406,6 +407,8 @@ void exec_runtime_vacuum(Manager *m);
 
 void exec_params_clear(ExecParameters *p);
 
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
+
 const char* exec_output_to_string(ExecOutput i) _const_;
 ExecOutput exec_output_from_string(const char *s) _pure_;
 
index 71a9873da46fbe6404c967b32982c031de02a9cd..646364eb898788b843cc0e72cdbefd662bb12d33 100644 (file)
@@ -1330,13 +1330,25 @@ int config_parse_exec_cpu_affinity(const char *unit,
                                    void *userdata) {
 
         ExecContext *c = data;
+        int r;
 
         assert(filename);
         assert(lvalue);
         assert(rvalue);
         assert(data);
 
-        return parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
+        if (streq(rvalue, "numa")) {
+                c->cpu_affinity_from_numa = true;
+                cpu_set_reset(&c->cpu_set);
+
+                return 0;
+        }
+
+        r = parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
+        if (r >= 0)
+                c->cpu_affinity_from_numa = false;
+
+        return r;
 }
 
 int config_parse_capability_set(
index 98cf51aeda55c81e9d4c11cb22d0e7821ba32d30..a30876c1a13f672dedc943c7923b11c2c9570e95 100644 (file)
@@ -29,6 +29,7 @@
 #include "signal-util.h"
 #include "socket-util.h"
 #include "sort-util.h"
+#include "stdio-util.h"
 #include "string-util.h"
 #include "syslog-util.h"
 #include "terminal-util.h"
@@ -1103,6 +1104,13 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
                 _cleanup_free_ uint8_t *array = NULL;
                 size_t allocated;
 
+                if (eq && streq(eq, "numa")) {
+                        r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true);
+                        if (r < 0)
+                                return bus_log_create_error(r);
+                        return r;
+                }
+
                 r = parse_cpu_set(eq, &cpuset);
                 if (r < 0)
                         return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
index 97c16ebb8cdcaadbddaa111d2dc55fa8226f2cc6..9b9238362f2ff2d27f2fbcf4c4c65ee686c4500f 100644 (file)
@@ -131,7 +131,7 @@ int cpu_set_add_all(CPUSet *a, const CPUSet *b) {
                                 return r;
                 }
 
-        return 0;
+        return 1;
 }
 
 int parse_cpu_set_full(
@@ -216,7 +216,7 @@ int parse_cpu_set_extend(
         if (!old->set) {
                 *old = cpuset;
                 cpuset = (CPUSet) {};
-                return 0;
+                return 1;
         }
 
         return cpu_set_add_all(old, &cpuset);
index e1dd2eb32bae62c1ef3722c2e7b888bbe7eb828d..450e19e06f2d90166cca85cb493a2d69aa1750c8 100644 (file)
@@ -216,12 +216,12 @@ static void test_parse_cpu_set_extend(void) {
 
         log_info("/* %s */", __func__);
 
-        assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+        assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
         assert_se(CPU_COUNT_S(c.allocated, c.set) == 2);
         assert_se(s1 = cpu_set_to_string(&c));
         log_info("cpu_set_to_string: %s", s1);
 
-        assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+        assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
         assert_se(CPU_COUNT_S(c.allocated, c.set) == 3);
         assert_se(s2 = cpu_set_to_string(&c));
         log_info("cpu_set_to_string: %s", s2);
@@ -238,7 +238,7 @@ static void test_cpu_set_to_from_dbus(void) {
 
         log_info("/* %s */", __func__);
 
-        assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+        assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
         assert_se(s = cpu_set_to_string(&c));
         log_info("cpu_set_to_string: %s", s);
         assert_se(CPU_COUNT_S(c.allocated, c.set) == 104);
index 4a2bede431a6fe1459b1a74dcfe641065665ae36..bd04bb2efe61bc05601be54d827ba05ad6cda112 100755 (executable)
@@ -279,6 +279,18 @@ else
     # Maks must be ignored
     grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" $straceLog
 
+    echo "Unit file CPUAffinity=NUMA support"
+    writeTestUnitNUMAPolicy "bind" "0"
+    echo "CPUAffinity=numa" >> $testUnitNUMAConf
+    systemctl daemon-reload
+    systemctl start $testUnit
+    systemctlCheckNUMAProperties $testUnit "bind" "0"
+    pid=$(systemctl show --value -p MainPID $testUnit)
+    cpulist=$(cat /sys/devices/system/node/node0/cpulist)
+    affinity_systemd=$(systemctl show --value -p CPUAffinity $testUnit)
+    [ $cpulist = $affinity_systemd ]
+    pid1StopUnit $testUnit
+
     echo "systemd-run NUMAPolicy support"
     runUnit='numa-systemd-run-test.service'
 
@@ -309,6 +321,12 @@ else
     systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit $runUnit sleep 1000
     systemctlCheckNUMAProperties $runUnit "local" ""
     pid1StopUnit $runUnit
+
+    systemd-run -p NUMAPolicy=local -p NUMAMask=0 -p CPUAffinity=numa --unit $runUnit sleep 1000
+    systemctlCheckNUMAProperties $runUnit "local" ""
+    systemctl cat $runUnit | grep -q 'CPUAffinity=numa'
+    pid1StopUnit $runUnit
+
 fi
 
 # Cleanup