]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
manager: prohibit clone3() in seccomp filters 23126/head
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 19 Apr 2022 10:44:26 +0000 (12:44 +0200)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 19 Apr 2022 20:04:31 +0000 (22:04 +0200)
RestrictNamespaces should block clone3() like flatpak:
https://github.com/flatpak/flatpak/commit/a10f52a7565c549612c92b8e736a6698a53db330

clone3() passes arguments in a structure referenced by a pointer, so we can't
filter on the flags as with clone(). Let's disallow the whole function call.

src/shared/seccomp-util.c

index 50ad6bc244911c8528af4df24cc8b617a92c5a6c..49044a45aec801ba95b71c395b1f024a618e7c16 100644 (file)
@@ -1230,6 +1230,21 @@ int seccomp_restrict_namespaces(unsigned long retain) {
                 if (r < 0)
                         return r;
 
+                /* We cannot filter on individual flags to clone3(), and we need to disable the
+                 * syscall altogether. ENOSYS is used instead of EPERM, so that glibc and other
+                 * users shall fall back to clone(), as if on an older kernel.
+                 *
+                 * C.f. https://github.com/flatpak/flatpak/commit/a10f52a7565c549612c92b8e736a6698a53db330,
+                 * https://github.com/moby/moby/issues/42680. */
+
+                r = seccomp_rule_add_exact(
+                                seccomp,
+                                SCMP_ACT_ERRNO(ENOSYS),
+                                SCMP_SYS(clone3),
+                                0);
+                if (r < 0)
+                        log_debug_errno(r, "Failed to add clone3() rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
+
                 if ((retain & NAMESPACE_FLAGS_ALL) == 0)
                         /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
                          * altogether. */