From 2331c02d06cae97b87637a0fc6bb4961b509ccf2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 1 Dec 2023 19:03:23 +0100 Subject: [PATCH] core: when applying syscall filters, use ENOSYS for unknown calls glibc starting using fchmodat2 to implement fchmod with flags [1], but current version of libseccomp does not support fchmodat2 [2]. This is causing problems with programs sandboxed by systemd. libseccomp needs to know a syscall to be able to set any kind of filter for it, so for syscalls unknown by libseccomp we would always do the default action, i.e. either return the errno set by SystemCallErrorNumber or send a fatal signal. For glibc to ignore the unknown syscall and gracefully fall back to the older implementation, we need to return ENOSYS. In particular, tar now fails with the default SystemCallFilter="@system-service" sandbox [3]. This is of course a wider problem: any time the kernel gains new syscalls, before libseccomp and systemd have caught up, we'd behave incorrectly. Let's do the same as we already were doing in nspawn since 3573e032f26724949e86626eace058d006b8bf70, and do the "default action" only for syscalls which are known by us and libseccomp, and return ENOSYS for anything else. This means that users can start using a sandbox with the new syscalls only after libseccomp and systemd have been updated, but before that happens they behaviour that is backwards-compatible. [1] https://github.com/bminor/glibc/commit/65341f7bbea824d2ff9d37db15d8be162df42bd3 [2] https://github.com/seccomp/libseccomp/issues/406 [2] https://github.com/systemd/systemd/issues/30250 Fixes https://github.com/systemd/systemd/issues/30250. In seccomp_restrict_sxid() there's a chunk conditionalized with '#if defined(__SNR_fchmodat2)'. We need to kep that because seccomp_restrict_sxid() seccomp_restrict_suid_sgid() uses SCMP_ACT_ALLOW as the default action. --- src/shared/seccomp-util.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index bb970d52647..95c704d225c 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -1129,7 +1129,9 @@ int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* filter log_trace("Operating on architecture: %s", seccomp_arch_to_string(arch)); - r = seccomp_init_for_arch(&seccomp, arch, default_action); + /* We install ENOSYS as the default action, but it will only apply to syscalls which are not + * in the @known set. */ + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ERRNO(ENOSYS)); if (r < 0) return r; @@ -1164,6 +1166,23 @@ int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* filter } } + NULSTR_FOREACH(name, syscall_filter_sets[SYSCALL_FILTER_SET_KNOWN].value) { + int id; + + id = seccomp_syscall_resolve_name(name); + if (id < 0) + continue; + + /* Ignore the syscall if it was already handled above */ + if (hashmap_contains(filter, INT_TO_PTR(id + 1))) + continue; + + r = seccomp_rule_add_exact(seccomp, default_action, id, 0); + if (r < 0 && r != -EDOM) /* EDOM means that the syscall is not available for arch */ + return log_debug_errno(r, "Failed to add rule for system call %s() / %d: %m", + name, id); + } + r = seccomp_load(seccomp); if (ERRNO_IS_NEG_SECCOMP_FATAL(r)) return r; -- 2.39.5