]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
seccomp: on s390 the clone() parameters are reversed
authorLennart Poettering <lennart@poettering.net>
Wed, 8 Feb 2017 15:21:11 +0000 (16:21 +0100)
committerLennart Poettering <lennart@poettering.net>
Wed, 8 Feb 2017 21:21:27 +0000 (22:21 +0100)
Add a bit of code that tries to get the right parameter order in place
for some of the better known architectures, and skips
restrict_namespaces for other archs.

This also bypasses the test on archs where we don't know the right
order.

In this case I didn't bother with testing the case where no filter is
applied, since that is hopefully just an issue for now, as there's
nothing stopping us from supporting more archs, we just need to know
which order is right.

Fixes: #5241
man/systemd.exec.xml
src/basic/raw-clone.h
src/shared/seccomp-util.c
src/shared/seccomp-util.h
src/test/test-seccomp.c

index fd47b0a20a04017f82bb5a3f8754adbd0d3e948c..e7e5d6b0c74ef568e91b5aad2ac067a00819c610 100644 (file)
         <citerefentry><refentrytitle>setns</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls, taking
         the specified flags parameters into account. Note that — if this option is used — in addition to restricting
         creation and switching of the specified types of namespaces (or all of them, if true) access to the
-        <function>setns()</function> system call with a zero flags parameter is prohibited.
-        If running in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant>
-        capability (e.g. setting <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname>
-        is implied.
-        </para></listitem>
+        <function>setns()</function> system call with a zero flags parameter is prohibited.  This setting is only
+        supported on x86, x86-64, s390 and s390x, and enforces no restrictions on other architectures. If running in user
+        mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
+        <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is implied.  </para></listitem>
       </varlistentry>
 
       <varlistentry>
index d4738289999f1a1550f45f032d2a037da5f505f9..c6e531ada4d3fee231bb75ad527f124277fe8f18 100644 (file)
@@ -47,8 +47,8 @@
 static inline int raw_clone(unsigned long flags) {
         assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
                          CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
-#if defined(__s390__) || defined(__CRIS__)
-        /* On s390 and cris the order of the first and second arguments
+#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
+        /* On s390/s390x and cris the order of the first and second arguments
          * of the raw clone() system call is reversed. */
         return (int) syscall(__NR_clone, NULL, flags);
 #elif defined(__sparc__) && defined(__arch64__)
index 44706669b4fcf9ea0876eb1936fb98126409da96..e35f18471ca1ce4d42ee43fb1229cd249502c216 100644 (file)
@@ -750,10 +750,35 @@ int seccomp_restrict_namespaces(unsigned long retain) {
 
         SECCOMP_FOREACH_LOCAL_ARCH(arch) {
                 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+                int clone_reversed_order = -1;
                 unsigned i;
 
                 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
 
+                switch (arch) {
+
+                case SCMP_ARCH_X86_64:
+                case SCMP_ARCH_X86:
+                case SCMP_ARCH_X32:
+                        clone_reversed_order = 0;
+                        break;
+
+                case SCMP_ARCH_S390:
+                case SCMP_ARCH_S390X:
+                        /* On s390/s390x the first two parameters to clone are switched */
+                        clone_reversed_order = 1;
+                        break;
+
+                /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__s390__) && !defined(__s390x__)
+#warning "Consider adding the right clone() syscall definitions here!"
+#endif
+                }
+
+                if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */
+                        continue;
+
                 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
                 if (r < 0)
                         return r;
@@ -802,12 +827,20 @@ int seccomp_restrict_namespaces(unsigned long retain) {
                                 break;
                         }
 
-                        r = seccomp_rule_add_exact(
-                                        seccomp,
-                                        SCMP_ACT_ERRNO(EPERM),
-                                        SCMP_SYS(clone),
-                                        1,
-                                        SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+                        if (clone_reversed_order == 0)
+                                r = seccomp_rule_add_exact(
+                                                seccomp,
+                                                SCMP_ACT_ERRNO(EPERM),
+                                                SCMP_SYS(clone),
+                                                1,
+                                                SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+                        else
+                                r = seccomp_rule_add_exact(
+                                                seccomp,
+                                                SCMP_ACT_ERRNO(EPERM),
+                                                SCMP_SYS(clone),
+                                                1,
+                                                SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
                         if (r < 0) {
                                 log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
                                 break;
index bfbfb5ab3d7f083a4ec3217262715cd4a268405b..61f94de638d98a3f9c384e16ca92b1d62146cf8f 100644 (file)
@@ -91,6 +91,13 @@ int seccomp_memory_deny_write_execute(void);
 #define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1
 #endif
 
+/* we don't know the right order of the clone() parameters except for these archs, for now */
+#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__) || defined(__s390__)
+#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 0
+#else
+#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 1
+#endif
+
 extern const uint32_t seccomp_local_archs[];
 
 #define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
index 36592388105b3ce66143fd237dbbf3adc8664cca..34a1275162d8739aba2d884e56e42ee0f94ab03c 100644 (file)
@@ -158,6 +158,8 @@ static void test_restrict_namespace(void) {
         assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
         assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
 
+#if SECCOMP_RESTRICT_NAMESPACES_BROKEN == 0
+
         if (!is_seccomp_available())
                 return;
         if (geteuid() != 0)
@@ -216,6 +218,7 @@ static void test_restrict_namespace(void) {
         }
 
         assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS);
+#endif
 }
 
 static void test_protect_sysctl(void) {