]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/seccomp-util.c
tree-wide: avoid some loaded terms
[thirdparty/systemd.git] / src / shared / seccomp-util.c
index 2a4334b784aa2b07af70e80c778fae3613a9045e..a8dd069a758dbb82edd9266bddb63e1cbadcee49 100644 (file)
@@ -24,7 +24,7 @@
 
 const uint32_t seccomp_local_archs[] = {
 
-        /* Note: always list the native arch we are compiled as last, so that users can blacklist seccomp(), but our own calls to it still succeed */
+        /* Note: always list the native arch we are compiled as last, so that users can deny-list seccomp(), but our own calls to it still succeed */
 
 #if defined(__x86_64__) && defined(__ILP32__)
                 SCMP_ARCH_X86,
@@ -384,6 +384,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
                 .value =
                 "lookup_dcookie\0"
                 "perf_event_open\0"
+                "pidfd_getfd\0"
                 "ptrace\0"
                 "rtas\0"
 #ifdef __NR_s390_runtime_instr
@@ -449,6 +450,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
                 "oldstat\0"
                 "open\0"
                 "openat\0"
+                "openat2\0"
                 "readlink\0"
                 "readlinkat\0"
                 "removexattr\0"
@@ -629,6 +631,14 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
                 "ustat\0"
                 "vserver\0"
         },
+        [SYSCALL_FILTER_SET_PKEY] = {
+                .name = "@pkey",
+                .help = "System calls used for memory protection keys",
+                .value =
+                "pkey_alloc\0"
+                "pkey_free\0"
+                "pkey_mprotect\0"
+        },
         [SYSCALL_FILTER_SET_PRIVILEGED] = {
                 .name = "@privileged",
                 .help = "All system calls which need super-user capabilities",
@@ -1102,7 +1112,7 @@ int seccomp_parse_syscall_filter(
 
                 /* If we previously wanted to forbid a syscall and now
                  * we want to allow it, then remove it from the list. */
-                if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_WHITELIST)) {
+                if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_ALLOW_LIST)) {
                         r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num));
                         if (r < 0)
                                 switch (r) {
@@ -1273,7 +1283,39 @@ int seccomp_protect_sysctl(void) {
         return 0;
 }
 
-int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
+int seccomp_protect_syslog(void) {
+        uint32_t arch;
+        int r;
+
+        SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+                _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+                r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+                if (r < 0)
+                        return r;
+
+                r = seccomp_rule_add_exact(
+                                seccomp,
+                                SCMP_ACT_ERRNO(EPERM),
+                                SCMP_SYS(syslog),
+                                0);
+
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to add syslog() rule for architecture %s, skipping %m", seccomp_arch_to_string(arch));
+                        continue;
+                }
+
+                r = seccomp_load(seccomp);
+                if (ERRNO_IS_SECCOMP_FATAL(r))
+                        return r;
+                if (r < 0)
+                        log_debug_errno(r, "Failed to install syslog protection rules for architecture %s, skipping %m", seccomp_arch_to_string(arch));
+        }
+
+        return 0;
+}
+
+int seccomp_restrict_address_families(Set *address_families, bool allow_list) {
         uint32_t arch;
         int r;
 
@@ -1320,13 +1362,13 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
                 if (r < 0)
                         return r;
 
-                if (whitelist) {
+                if (allow_list) {
                         int af, first = 0, last = 0;
                         void *afp;
 
-                        /* If this is a whitelist, we first block the address families that are out of range and then
-                         * everything that is not in the set. First, we find the lowest and highest address family in
-                         * the set. */
+                        /* If this is an allow list, we first block the address families that are out of
+                         * range and then everything that is not in the set. First, we find the lowest and
+                         * highest address family in the set. */
 
                         SET_FOREACH(afp, address_families, i) {
                                 af = PTR_TO_INT(afp);
@@ -1406,9 +1448,8 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
                 } else {
                         void *af;
 
-                        /* If this is a blacklist, then generate one rule for
-                         * each address family that are then combined in OR
-                         * checks. */
+                        /* If this is a deny list, then generate one rule for each address family that are
+                         * then combined in OR checks. */
 
                         SET_FOREACH(af, address_families, i) {
 
@@ -1464,11 +1505,11 @@ int seccomp_restrict_realtime(void) {
                         return r;
 
                 /* Go through all policies with lower values than that, and block them -- unless they appear in the
-                 * whitelist. */
+                 * allow list. */
                 for (p = 0; p < max_policy; p++) {
                         bool good = false;
 
-                        /* Check if this is in the whitelist. */
+                        /* Check if this is in the allow list. */
                         for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
                                 if (permitted_policies[i] == p) {
                                         good = true;
@@ -1491,8 +1532,8 @@ int seccomp_restrict_realtime(void) {
                         }
                 }
 
-                /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are
-                 * unsigned here, hence no need no check for < 0 values. */
+                /* Deny-list all other policies, i.e. the ones with higher values. Note that all comparisons
+                 * are unsigned here, hence no need no check for < 0 values. */
                 r = seccomp_rule_add_exact(
                                 seccomp,
                                 SCMP_ACT_ERRNO(EPERM),
@@ -1543,32 +1584,33 @@ assert_cc(SCMP_SYS(shmdt) > 0);
 
 int seccomp_memory_deny_write_execute(void) {
         uint32_t arch;
-        int r;
+        unsigned loaded = 0;
 
         SECCOMP_FOREACH_LOCAL_ARCH(arch) {
                 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
-                int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0;
+                int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0, r;
 
                 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
 
                 switch (arch) {
 
+                /* Note that on some architectures shmat() isn't available, and the call is multiplexed through ipc().
+                 * We ignore that here, which means there's still a way to get writable/executable
+                 * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
+
                 case SCMP_ARCH_X86:
                 case SCMP_ARCH_S390:
                         filter_syscall = SCMP_SYS(mmap2);
                         block_syscall = SCMP_SYS(mmap);
-                        shmat_syscall = SCMP_SYS(shmat);
+                        /* shmat multiplexed, see above */
                         break;
 
                 case SCMP_ARCH_PPC:
                 case SCMP_ARCH_PPC64:
                 case SCMP_ARCH_PPC64LE:
+                case SCMP_ARCH_S390X:
                         filter_syscall = SCMP_SYS(mmap);
-
-                        /* Note that shmat() isn't available, and the call is multiplexed through ipc().
-                         * We ignore that here, which means there's still a way to get writable/executable
-                         * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
-
+                        /* shmat multiplexed, see above */
                         break;
 
                 case SCMP_ARCH_ARM:
@@ -1579,8 +1621,7 @@ int seccomp_memory_deny_write_execute(void) {
                 case SCMP_ARCH_X86_64:
                 case SCMP_ARCH_X32:
                 case SCMP_ARCH_AARCH64:
-                case SCMP_ARCH_S390X:
-                        filter_syscall = SCMP_SYS(mmap); /* amd64, x32, s390x, and arm64 have only mmap */
+                        filter_syscall = SCMP_SYS(mmap); /* amd64, x32 and arm64 have only mmap */
                         shmat_syscall = SCMP_SYS(shmat);
                         break;
 
@@ -1626,7 +1667,7 @@ int seccomp_memory_deny_write_execute(void) {
 #endif
 
                 if (shmat_syscall > 0) {
-                        r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat),
+                        r = add_seccomp_syscall_filter(seccomp, arch, shmat_syscall,
                                                        1,
                                                        SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
                         if (r < 0)
@@ -1637,10 +1678,15 @@ int seccomp_memory_deny_write_execute(void) {
                 if (ERRNO_IS_SECCOMP_FATAL(r))
                         return r;
                 if (r < 0)
-                        log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+                        log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m",
+                                        seccomp_arch_to_string(arch));
+                loaded++;
         }
 
-        return 0;
+        if (loaded == 0)
+                log_debug("Failed to install any seccomp rules for MemoryDenyWriteExecute=.");
+
+        return loaded;
 }
 
 int seccomp_restrict_archs(Set *archs) {
@@ -1695,17 +1741,13 @@ int seccomp_restrict_archs(Set *archs) {
         return 0;
 }
 
-int parse_syscall_archs(char **l, Set **archs) {
-        _cleanup_set_free_ Set *_archs;
+int parse_syscall_archs(char **l, Set **ret_archs) {
+        _cleanup_set_free_ Set *archs = NULL;
         char **s;
         int r;
 
         assert(l);
-        assert(archs);
-
-        r = set_ensure_allocated(&_archs, NULL);
-        if (r < 0)
-                return r;
+        assert(ret_archs);
 
         STRV_FOREACH(s, l) {
                 uint32_t a;
@@ -1714,13 +1756,12 @@ int parse_syscall_archs(char **l, Set **archs) {
                 if (r < 0)
                         return -EINVAL;
 
-                r = set_put(_archs, UINT32_TO_PTR(a + 1));
+                r = set_ensure_put(&archs, NULL, UINT32_TO_PTR(a + 1));
                 if (r < 0)
                         return -ENOMEM;
         }
 
-        *archs = TAKE_PTR(_archs);
-
+        *ret_archs = TAKE_PTR(archs);
         return 0;
 }
 
@@ -1955,6 +1996,22 @@ static int seccomp_restrict_sxid(scmp_filter_ctx seccomp, mode_t m) {
         else
                 any = true;
 
+#if defined(__SNR_openat2)
+        /* The new openat2() system call can't be filtered sensibly, since it moves the flags parameter into
+         * an indirect structure. Let's block it entirely for now. That should be a reasonably OK thing to do
+         * for now, since openat2() is very new and code generally needs fallback logic anyway to be
+         * compatible with kernels that are not absolutely recent. */
+        r = seccomp_rule_add_exact(
+                        seccomp,
+                        SCMP_ACT_ERRNO(EPERM),
+                        SCMP_SYS(openat2),
+                        0);
+        if (r < 0)
+                log_debug_errno(r, "Failed to add filter for openat2: %m");
+        else
+                any = true;
+#endif
+
         r = seccomp_rule_add_exact(
                         seccomp,
                         SCMP_ACT_ERRNO(EPERM),