# FIXME: actually drop compat glue before v258
Linux kernel ≥ 5.6 for getrandom() GRND_INSECURE
- ≥ 5.7 for CLONE_INTO_CGROUP, BPF links and the BPF LSM hook
+ ≥ 5.7 for CLONE_INTO_CGROUP, cgroup2fs memory_recursiveprot option,
+ BPF links and the BPF LSM hook
⚠️ Kernel versions below 5.7 ("recommended baseline") have significant gaps
in functionality and are not recommended for use with this version
#include "loopback-setup.h"
#include "missing_syscall.h"
#include "mkdir-label.h"
+#include "mount-setup.h"
#include "mount-util.h"
#include "mountpoint-util.h"
#include "namespace-util.h"
return mount_private_apivfs("sysfs", mount_entry_path(m), "/sys", /* opts = */ NULL, p->runtime_scope);
}
-static bool check_recursiveprot_supported(void) {
- int r;
-
- /* memory_recursiveprot is only supported for kernels >= 5.7. Note mount_option_supported uses fsopen()
- * and fsconfig() which are supported for kernels >= 5.2. So if mount_option_supported() returns an
- * error, we can assume memory_recursiveprot is not supported. */
- r = mount_option_supported("cgroup2", "memory_recursiveprot", NULL);
- if (r < 0)
- log_debug_errno(r, "Failed to determine whether the 'memory_recursiveprot' mount option is supported, assuming not: %m");
- else if (r == 0)
- log_debug("This kernel version does not support 'memory_recursiveprot', not using mount option.");
-
- return r > 0;
-}
-
static int mount_private_cgroup2fs(const MountEntry *m, const NamespaceParameters *p) {
_cleanup_free_ char *opts = NULL;
assert(m);
assert(p);
- if (check_recursiveprot_supported()) {
- opts = strdup(strempty(mount_entry_options(m)));
+ if (cgroupfs_recursiveprot_supported()) {
+ opts = strextend_with_separator(NULL, ",", mount_entry_options(m) ?: POINTER_MAX, "memory_recursiveprot");
if (!opts)
return -ENOMEM;
-
- if (!strextend_with_separator(&opts, ",", "memory_recursiveprot"))
- return -ENOMEM;
}
return mount_private_apivfs("cgroup2", mount_entry_path(m), "/sys/fs/cgroup", opts ?: mount_entry_options(m), p->runtime_scope);
MountMode mode;
} MountPoint;
-/* The first three entries we might need before SELinux is up. The
- * fourth (securityfs) is needed by IMA to load a custom policy. The
- * other ones we can delay until SELinux and IMA are loaded. When
- * SMACK is enabled we need smackfs, too, so it's a fifth one. */
-#if ENABLE_SMACK
-#define N_EARLY_MOUNT 5
-#else
-#define N_EARLY_MOUNT 4
-#endif
-
-static bool check_recursiveprot_supported(void) {
+bool cgroupfs_recursiveprot_supported(void) {
int r;
if (!cg_is_unified_wanted())
return false;
- r = mount_option_supported("cgroup2", "memory_recursiveprot", NULL);
+ /* Added in kernel 5.7 */
+
+ r = mount_option_supported("cgroup2", "memory_recursiveprot", /* value = */ NULL);
if (r < 0)
- log_debug_errno(r, "Failed to determine whether the 'memory_recursiveprot' mount option is supported, assuming not: %m");
+ log_debug_errno(r, "Failed to determine whether cgroupfs supports 'memory_recursiveprot' mount option, assuming not: %m");
else if (r == 0)
- log_debug("This kernel version does not support 'memory_recursiveprot', not using mount option.");
+ log_debug("'memory_recursiveprot' not supported by cgroupfs, not using mount option.");
return r > 0;
}
{ "tmpfs", "/run", "tmpfs", "mode=0755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
NULL, MNT_FATAL|MNT_IN_CONTAINER },
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate,memory_recursiveprot", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- check_recursiveprot_supported, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ cgroupfs_recursiveprot_supported, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_NONE, },
};
+/* The first three entries we might need before SELinux is up. The
+ * fourth (securityfs) is needed by IMA to load a custom policy. The
+ * other ones we can delay until SELinux and IMA are loaded. When
+ * SMACK is enabled we need smackfs, too, so it's a fifth one. */
+#if ENABLE_SMACK
+#define N_EARLY_MOUNT 5
+#else
+#define N_EARLY_MOUNT 4
+#endif
+
assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
bool mount_point_is_api(const char *path) {