/* SPDX-License-Identifier: LGPL-2.1+ */
#include <errno.h>
+#include <fcntl.h>
#include <linux/seccomp.h>
#include <seccomp.h>
#include <stddef.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/shm.h>
+#include <sys/stat.h>
#include "af-list.h"
#include "alloc-util.h"
+#include "errno-list.h"
#include "macro.h"
#include "nsflags.h"
+#include "nulstr-util.h"
#include "process-util.h"
#include "seccomp-util.h"
#include "set.h"
#include "string-util.h"
#include "strv.h"
-#include "util.h"
-#include "errno-list.h"
const uint32_t seccomp_local_archs[] = {
"pause\0"
"prlimit64\0"
"restart_syscall\0"
+ "rseq\0"
"rt_sigreturn\0"
"sched_yield\0"
"set_robust_list\0"
"io_cancel\0"
"io_destroy\0"
"io_getevents\0"
+ "io_pgetevents\0"
"io_setup\0"
"io_submit\0"
},
.value =
"lookup_dcookie\0"
"perf_event_open\0"
- "process_vm_readv\0"
- "process_vm_writev\0"
"ptrace\0"
"rtas\0"
#ifdef __NR_s390_runtime_instr
"bpf\0"
"capset\0"
"chroot\0"
+ "fanotify_init\0"
"nfsservctl\0"
+ "open_by_handle_at\0"
"pivot_root\0"
"quotactl\0"
"setdomainname\0"
"ioprio_get\0"
"kcmp\0"
"madvise\0"
- "mincore\0"
"mprotect\0"
"mremap\0"
"name_to_handle_at\0"
const SyscallFilterSet *other;
other = syscall_filter_set_find(name);
- if (!other) {
- log_debug("Filter set %s is not known!", name);
- return -EINVAL;
- }
+ if (!other)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Filter set %s is not known!",
+ name);
return seccomp_add_syscall_filter_set(seccomp, other, action, exclude, log_missing);
return log_debug_errno(r, "Failed to add filter set: %m");
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
}
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
return 0;
}
-int seccomp_parse_syscall_filter_full(
+int seccomp_parse_syscall_filter(
const char *name,
int errno_num,
Hashmap *filter,
* away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem,
* not a problem in user configuration data and we shouldn't pretend otherwise by complaining
* about them. */
- r = seccomp_parse_syscall_filter_full(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line);
+ r = seccomp_parse_syscall_filter(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line);
if (r < 0)
return r;
}
continue;
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
}
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
}
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
}
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
assert_cc(SCMP_SYS(shmget) > 0);
assert_cc(SCMP_SYS(shmat) > 0);
assert_cc(SCMP_SYS(shmdt) > 0);
-#elif defined(__i386__) || defined(__powerpc64__)
-assert_cc(SCMP_SYS(shmget) < 0);
-assert_cc(SCMP_SYS(shmat) < 0);
-assert_cc(SCMP_SYS(shmdt) < 0);
#endif
int seccomp_memory_deny_write_execute(void) {
-
uint32_t arch;
int r;
case SCMP_ARCH_X86:
filter_syscall = SCMP_SYS(mmap2);
block_syscall = SCMP_SYS(mmap);
+ shmat_syscall = SCMP_SYS(shmat);
break;
case SCMP_ARCH_PPC:
continue;
#endif
- if (shmat_syscall != 0) {
+ if (shmat_syscall > 0) {
r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
}
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
return r;
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m");
}
r = seccomp_load(seccomp);
- if (IN_SET(r, -EPERM, -EACCES))
+ if (ERRNO_IS_SECCOMP_FATAL(r))
return r;
if (r < 0)
log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
return 0;
}
+
+int seccomp_protect_hostname(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sethostname),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add sethostname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setdomainname),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setdomainname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to apply hostname restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+static int seccomp_restrict_sxid(scmp_filter_ctx seccomp, mode_t m) {
+ /* Checks the mode_t parameter of the following system calls:
+ *
+ * → chmod() + fchmod() + fchmodat()
+ * → open() + creat() + openat()
+ * → mkdir() + mkdirat()
+ * → mknod() + mknodat()
+ *
+ * Returns error if *everything* failed, and 0 otherwise.
+ */
+ int r = 0;
+ bool any = false;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(chmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for chmod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for fchmod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for fchmodat: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdir),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mkdir: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdirat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mkdirat: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mknod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mknodat: %m");
+ else
+ any = true;
+
+#if SCMP_SYS(open) > 0
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(open),
+ 2,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for open: %m");
+ else
+ any = true;
+#endif
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(openat),
+ 2,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A3(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for openat: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(creat),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for creat: %m");
+ else
+ any = true;
+
+ return any ? 0 : r;
+}
+
+int seccomp_restrict_suid_sgid(void) {
+ uint32_t arch;
+ int r, k;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_restrict_sxid(seccomp, S_ISUID);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add suid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
+
+ k = seccomp_restrict_sxid(seccomp, S_ISGID);
+ if (k < 0)
+ log_debug_errno(r, "Failed to add sgid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
+
+ if (r < 0 && k < 0)
+ continue;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to apply suid/sgid restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+uint32_t scmp_act_kill_process(void) {
+
+ /* Returns SCMP_ACT_KILL_PROCESS if it's supported, and SCMP_ACT_KILL_THREAD otherwise. We never
+ * actually want to use SCMP_ACT_KILL_THREAD as its semantics are nuts (killing arbitrary threads of
+ * a program is just a bad idea), but on old kernels/old libseccomp it is all we have, and at least
+ * for single-threaded apps does the right thing. */
+
+#ifdef SCMP_ACT_KILL_PROCESS
+ if (seccomp_api_get() >= 3)
+ return SCMP_ACT_KILL_PROCESS;
+#endif
+
+ return SCMP_ACT_KILL; /* same as SCMP_ACT_KILL_THREAD */
+}