of systemd. Taint flag 'old-kernel' will be set. systemd will most likely
still function, but upstream support and testing are limited.
- Linux kernel ≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
+ Linux kernel ≥ 5.15 for MPOL_PREFERRED_MANY NUMA policy
+ ≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD,
and MOVE_MOUNT_BENEATH
≥ 6.6 for quota support on tmpfs
≥ 6.7 for cgroup2fs memory_hugetlb_accounting option
≥ 6.8 for STATX_MNT_ID_UNIQUE
- ≥ 6.9 for pidfs
+ ≥ 6.9 for pidfs and MPOL_WEIGHTED_INTERLEAVE NUMA policy
≥ 6.10 for fcntl(F_DUPFD_QUERY), unprivileged linkat(AT_EMPTY_PATH),
and block device 'partscan' sysfs attribute
≥ 6.12 for AT_HANDLE_MNT_ID_UNIQUE
<term><varname>NUMAPolicy=</varname></term>
<listitem><para>Controls the NUMA memory policy of the executed processes. Takes a policy type, one of:
- <option>default</option>, <option>preferred</option>, <option>bind</option>, <option>interleave</option> and
- <option>local</option>. A list of NUMA nodes that should be associated with the policy must be specified
- in <varname>NUMAMask=</varname>. For more details on each policy please see,
+ <option>default</option>, <option>preferred</option>, <option>bind</option>, <option>interleave</option>,
+ <option>local</option>, <option>preferred-many</option> (requires Linux 5.15 or newer) and
+ <option>weighted-interleave</option> (requires Linux 6.9 or newer, weights are configured via
+ <filename>/sys/kernel/mm/mempolicy/weighted_interleave/</filename>). A list of NUMA nodes that should be
+ associated with the policy must be specified in <varname>NUMAMask=</varname>. For more details on each
+ policy please see,
<citerefentry><refentrytitle>set_mempolicy</refentrytitle><manvolnum>2</manvolnum></citerefentry>. For overall
overview of NUMA support in Linux see,
<citerefentry project='man-pages'><refentrytitle>numa</refentrytitle><manvolnum>7</manvolnum></citerefentry>.
+ If the kernel does not support the requested policy, a warning is logged and the setting is ignored.
</para>
<xi:include href="version-info.xml" xpointer="v243"/></listitem>
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
r = apply_numa_policy(&context->numa_policy);
- if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
+ if (r == -ENOSYS)
log_debug_errno(r, "NUMA support not available, ignoring.");
+ else if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
+ log_warning_errno(r, "NUMA policy not supported by kernel, ignoring.");
else if (r < 0) {
*exit_status = EXIT_NUMA_POLICY;
return log_error_errno(r, "Failed to set NUMA memory policy: %m");
}
r = apply_numa_policy(&arg_numa_policy);
- if (r == -EOPNOTSUPP)
+ if (r == -ENOSYS)
log_debug_errno(r, "NUMA support not available, ignoring.");
+ else if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
+ log_warning_errno(r, "NUMA policy not supported by kernel, ignoring.");
else if (r < 0)
log_warning_errno(r, "Failed to set NUMA memory policy, ignoring: %m");
}
return 0;
}
- return sd_json_variant_new_string(ret, mpol_to_string(t));
+ _cleanup_free_ char *s = strdup(mpol_to_string(t));
+ if (!s)
+ return -ENOMEM;
+
+ return sd_json_variant_new_string(ret, json_underscorify(s));
}
static int numa_mask_build_json(sd_json_variant **ret, const char *name, void *userdata) {
assert(policy);
if (get_mempolicy(NULL, NULL, 0, NULL, 0) < 0 && errno == ENOSYS)
- return -EOPNOTSUPP;
+ /* NUMA syscall interface not available (kernel compiled without NUMA support).
+ * Return -ENOSYS so callers can distinguish this from -EOPNOTSUPP, which we
+ * return below when the syscall interface exists but the requested policy is
+ * not supported by this kernel version. */
+ return -ENOSYS;
if (!numa_policy_is_valid(policy))
return -EINVAL;
return r;
r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode);
- if (r < 0)
+ if (r < 0) {
+ // FIXME: This compatibility code path shall be removed once kernel 6.9
+ // becomes the new minimal baseline (MPOL_WEIGHTED_INTERLEAVE).
+ if (errno == EINVAL && IN_SET(numa_policy_get_type(policy),
+ MPOL_PREFERRED_MANY, MPOL_WEIGHTED_INTERLEAVE))
+ return -EOPNOTSUPP;
return -errno;
+ }
return 0;
}
}
static const char* const mpol_table[] = {
- [MPOL_DEFAULT] = "default",
- [MPOL_PREFERRED] = "preferred",
- [MPOL_BIND] = "bind",
- [MPOL_INTERLEAVE] = "interleave",
- [MPOL_LOCAL] = "local",
+ [MPOL_DEFAULT] = "default",
+ [MPOL_PREFERRED] = "preferred",
+ [MPOL_BIND] = "bind",
+ [MPOL_INTERLEAVE] = "interleave",
+ [MPOL_LOCAL] = "local",
+ [MPOL_PREFERRED_MANY] = "preferred-many",
+ [MPOL_WEIGHTED_INTERLEAVE] = "weighted-interleave",
};
DEFINE_STRING_TABLE_LOOKUP(mpol, int);
#include "shared-forward.h"
static inline bool mpol_is_valid(int t) {
- return t >= MPOL_DEFAULT && t <= MPOL_LOCAL;
+ return t >= MPOL_DEFAULT && t <= MPOL_WEIGHTED_INTERLEAVE;
}
typedef struct NUMAPolicy {
SD_VARLINK_DEFINE_ENUM_VALUE(preferred),
SD_VARLINK_DEFINE_ENUM_VALUE(bind),
SD_VARLINK_DEFINE_ENUM_VALUE(interleave),
- SD_VARLINK_DEFINE_ENUM_VALUE(local));
+ SD_VARLINK_DEFINE_ENUM_VALUE(local),
+ SD_VARLINK_DEFINE_ENUM_VALUE(preferred_many),
+ SD_VARLINK_DEFINE_ENUM_VALUE(weighted_interleave));
SD_VARLINK_DEFINE_ENUM_TYPE(
MountPropagationFlag,
pid1ReloadWithStrace
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
+ echo "PID1 NUMAPolicy support - Preferred-many policy w/o mask"
+ writePID1NUMAPolicy "preferred-many"
+ pid1ReloadWithJournal
+ grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
+
+ echo "PID1 NUMAPolicy support - Preferred-many policy w/ mask"
+ writePID1NUMAPolicy "preferred-many" "0"
+ pid1ReloadWithStrace
+ grep -E "set_mempolicy\((MPOL_PREFERRED_MANY|0x5 [^,]*), \[0x0*1\]" "$straceLog"
+
+ echo "PID1 NUMAPolicy support - Weighted-interleave policy w/o mask"
+ writePID1NUMAPolicy "weighted-interleave"
+ pid1ReloadWithJournal
+ grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
+
+ echo "PID1 NUMAPolicy support - Weighted-interleave policy w/ mask"
+ writePID1NUMAPolicy "weighted-interleave" "0"
+ pid1ReloadWithStrace
+ grep -E "set_mempolicy\((MPOL_WEIGHTED_INTERLEAVE|0x6 [^,]*), \[0x0*1\]" "$straceLog"
+
echo "Unit file NUMAPolicy support - Default policy w/o mask"
writeTestUnitNUMAPolicy "default"
pid1StartUnitWithStrace "$testUnit"
# Mask must be ignored
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
+ echo "Unit file NUMAPolicy support - Preferred-many policy w/o mask"
+ writeTestUnitNUMAPolicy "preferred-many"
+ pid1StartUnitWithStrace "$testUnit"
+ pid1StopUnit "$testUnit"
+ [[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]]
+
+ echo "Unit file NUMAPolicy support - Preferred-many policy w/ mask"
+ writeTestUnitNUMAPolicy "preferred-many" "0"
+ pid1StartUnitWithStrace "$testUnit"
+ systemctlCheckNUMAProperties "$testUnit" "preferred-many" "0"
+ varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"name\":\"$testUnit\"}" | jq -e '.context.Exec.NUMAPolicy == "preferred_many"'
+ pid1StopUnit "$testUnit"
+ grep -E "set_mempolicy\((MPOL_PREFERRED_MANY|0x5 [^,]*), \[0x0*1\]" "$straceLog"
+
+ echo "Unit file NUMAPolicy support - Weighted-interleave policy w/o mask"
+ writeTestUnitNUMAPolicy "weighted-interleave"
+ pid1StartUnitWithStrace "$testUnit"
+ pid1StopUnit "$testUnit"
+ [[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]]
+
+ echo "Unit file NUMAPolicy support - Weighted-interleave policy w/ mask"
+ writeTestUnitNUMAPolicy "weighted-interleave" "0"
+ pid1StartUnitWithStrace "$testUnit"
+ systemctlCheckNUMAProperties "$testUnit" "weighted-interleave" "0"
+ varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"name\":\"$testUnit\"}" | jq -e '.context.Exec.NUMAPolicy == "weighted_interleave"'
+ pid1StopUnit "$testUnit"
+ grep -E "set_mempolicy\((MPOL_WEIGHTED_INTERLEAVE|0x6 [^,]*), \[0x0*1\]" "$straceLog"
+
echo "Unit file CPUAffinity=NUMA support"
writeTestUnitNUMAPolicy "bind" "0"
echo "CPUAffinity=numa" >>"$testUnitNUMAConf"
systemctlCheckNUMAProperties "$runUnit" "local" ""
systemctl cat "$runUnit" | grep 'CPUAffinity=numa' >/dev/null
pid1StopUnit "$runUnit"
+
+ systemd-run -p NUMAPolicy=preferred-many -p NUMAMask=0 --unit "$runUnit" sleep 1000
+ systemctlCheckNUMAProperties "$runUnit" "preferred-many" "0"
+ pid1StopUnit "$runUnit"
+
+ systemd-run -p NUMAPolicy=weighted-interleave -p NUMAMask=0 --unit "$runUnit" sleep 1000
+ systemctlCheckNUMAProperties "$runUnit" "weighted-interleave" "0"
+ pid1StopUnit "$runUnit"
fi
# Cleanup