<citerefentry><refentrytitle>setns</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls, taking
the specified flags parameters into account. Note that — if this option is used — in addition to restricting
creation and switching of the specified types of namespaces (or all of them, if true) access to the
- <function>setns()</function> system call with a zero flags parameter is prohibited.
- If running in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant>
- capability (e.g. setting <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname>
- is implied.
- </para></listitem>
+ <function>setns()</function> system call with a zero flags parameter is prohibited. This setting is only
+ supported on x86, x86-64, s390 and s390x, and enforces no restrictions on other architectures. If running in user
+ mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
+ <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is implied. </para></listitem>
</varlistentry>
<varlistentry>
static inline int raw_clone(unsigned long flags) {
assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
-#if defined(__s390__) || defined(__CRIS__)
- /* On s390 and cris the order of the first and second arguments
+#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
+ /* On s390/s390x and cris the order of the first and second arguments
* of the raw clone() system call is reversed. */
return (int) syscall(__NR_clone, NULL, flags);
#elif defined(__sparc__) && defined(__arch64__)
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int clone_reversed_order = -1;
unsigned i;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+ switch (arch) {
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_X32:
+ clone_reversed_order = 0;
+ break;
+
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ /* On s390/s390x the first two parameters to clone are switched */
+ clone_reversed_order = 1;
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__s390__) && !defined(__s390x__)
+#warning "Consider adding the right clone() syscall definitions here!"
+#endif
+ }
+
+ if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */
+ continue;
+
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
break;
}
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(clone),
- 1,
- SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ if (clone_reversed_order == 0)
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ else
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
if (r < 0) {
log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
break;
#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1
#endif
+/* we don't know the right order of the clone() parameters except for these archs, for now */
+#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__) || defined(__s390__)
+#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 0
+#else
+#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 1
+#endif
+
extern const uint32_t seccomp_local_archs[];
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
+#if SECCOMP_RESTRICT_NAMESPACES_BROKEN == 0
+
if (!is_seccomp_available())
return;
if (geteuid() != 0)
}
assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS);
+#endif
}
static void test_protect_sysctl(void) {