1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2016 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <sys/eventfd.h>
26 #include <sys/personality.h>
28 #include <sys/types.h>
31 #include "alloc-util.h"
36 #include "process-util.h"
37 #include "raw-clone.h"
38 #include "seccomp-util.h"
40 #include "string-util.h"
44 #if SCMP_SYS(socket) < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__)
45 /* On these archs, socket() is implemented via the socketcall() syscall multiplexer,
46 * and we can't restrict it hence via seccomp. */
47 # define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
49 # define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
52 static void test_seccomp_arch_to_string(void) {
56 a
= seccomp_arch_native();
58 name
= seccomp_arch_to_string(a
);
60 assert_se(seccomp_arch_from_string(name
, &b
) >= 0);
64 static void test_architecture_table(void) {
87 assert_se(seccomp_arch_from_string(n
, &c
) >= 0);
88 n2
= seccomp_arch_to_string(c
);
89 log_info("seccomp-arch: %s → 0x%"PRIx32
" → %s", n
, c
, n2
);
90 assert_se(streq_ptr(n
, n2
));
94 static void test_syscall_filter_set_find(void) {
95 assert_se(!syscall_filter_set_find(NULL
));
96 assert_se(!syscall_filter_set_find(""));
97 assert_se(!syscall_filter_set_find("quux"));
98 assert_se(!syscall_filter_set_find("@quux"));
100 assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets
+ SYSCALL_FILTER_SET_CLOCK
);
101 assert_se(syscall_filter_set_find("@default") == syscall_filter_sets
+ SYSCALL_FILTER_SET_DEFAULT
);
102 assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets
+ SYSCALL_FILTER_SET_RAW_IO
);
105 static void test_filter_sets(void) {
109 if (!is_seccomp_available())
114 for (i
= 0; i
< _SYSCALL_FILTER_SET_MAX
; i
++) {
117 log_info("Testing %s", syscall_filter_sets
[i
].name
);
122 if (pid
== 0) { /* Child? */
125 if (i
== SYSCALL_FILTER_SET_DEFAULT
) /* if we look at the default set, whitelist instead of blacklist */
126 r
= seccomp_load_syscall_filter_set(SCMP_ACT_ERRNO(EUCLEAN
), syscall_filter_sets
+ i
, SCMP_ACT_ALLOW
);
128 r
= seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ i
, SCMP_ACT_ERRNO(EUCLEAN
));
132 /* Test the sycall filter with one random system call */
133 fd
= eventfd(0, EFD_NONBLOCK
|EFD_CLOEXEC
);
134 if (IN_SET(i
, SYSCALL_FILTER_SET_IO_EVENT
, SYSCALL_FILTER_SET_DEFAULT
))
135 assert_se(fd
< 0 && errno
== EUCLEAN
);
144 assert_se(wait_for_terminate_and_warn(syscall_filter_sets
[i
].name
, pid
, true) == EXIT_SUCCESS
);
148 static void test_restrict_namespace(void) {
149 _cleanup_free_
char *s
= NULL
;
153 assert_se(namespace_flag_to_string(0) == NULL
);
154 assert_se(streq(namespace_flag_to_string(CLONE_NEWNS
), "mnt"));
155 assert_se(namespace_flag_to_string(CLONE_NEWNS
|CLONE_NEWIPC
) == NULL
);
156 assert_se(streq(namespace_flag_to_string(CLONE_NEWCGROUP
), "cgroup"));
158 assert_se(namespace_flag_from_string("mnt") == CLONE_NEWNS
);
159 assert_se(namespace_flag_from_string(NULL
) == 0);
160 assert_se(namespace_flag_from_string("") == 0);
161 assert_se(namespace_flag_from_string("uts") == CLONE_NEWUTS
);
162 assert_se(namespace_flag_from_string(namespace_flag_to_string(CLONE_NEWUTS
)) == CLONE_NEWUTS
);
163 assert_se(streq(namespace_flag_to_string(namespace_flag_from_string("ipc")), "ipc"));
165 assert_se(namespace_flag_from_string_many(NULL
, &ul
) == 0 && ul
== 0);
166 assert_se(namespace_flag_from_string_many("", &ul
) == 0 && ul
== 0);
167 assert_se(namespace_flag_from_string_many("mnt uts ipc", &ul
) == 0 && ul
== (CLONE_NEWNS
|CLONE_NEWUTS
|CLONE_NEWIPC
));
169 assert_se(namespace_flag_to_string_many(NAMESPACE_FLAGS_ALL
, &s
) == 0);
170 assert_se(streq(s
, "cgroup ipc net mnt pid user uts"));
171 assert_se(namespace_flag_from_string_many(s
, &ul
) == 0 && ul
== NAMESPACE_FLAGS_ALL
);
173 if (!is_seccomp_available())
183 assert_se(seccomp_restrict_namespaces(CLONE_NEWNS
|CLONE_NEWNET
) >= 0);
185 assert_se(unshare(CLONE_NEWNS
) == 0);
186 assert_se(unshare(CLONE_NEWNET
) == 0);
187 assert_se(unshare(CLONE_NEWUTS
) == -1);
188 assert_se(errno
== EPERM
);
189 assert_se(unshare(CLONE_NEWIPC
) == -1);
190 assert_se(errno
== EPERM
);
191 assert_se(unshare(CLONE_NEWNET
|CLONE_NEWUTS
) == -1);
192 assert_se(errno
== EPERM
);
194 /* We use fd 0 (stdin) here, which of course will fail with EINVAL on setns(). Except of course our
195 * seccomp filter worked, and hits first and makes it return EPERM */
196 assert_se(setns(0, CLONE_NEWNS
) == -1);
197 assert_se(errno
== EINVAL
);
198 assert_se(setns(0, CLONE_NEWNET
) == -1);
199 assert_se(errno
== EINVAL
);
200 assert_se(setns(0, CLONE_NEWUTS
) == -1);
201 assert_se(errno
== EPERM
);
202 assert_se(setns(0, CLONE_NEWIPC
) == -1);
203 assert_se(errno
== EPERM
);
204 assert_se(setns(0, CLONE_NEWNET
|CLONE_NEWUTS
) == -1);
205 assert_se(errno
== EPERM
);
206 assert_se(setns(0, 0) == -1);
207 assert_se(errno
== EPERM
);
209 pid
= raw_clone(CLONE_NEWNS
);
213 pid
= raw_clone(CLONE_NEWNET
);
217 pid
= raw_clone(CLONE_NEWUTS
);
219 assert_se(errno
== EPERM
);
220 pid
= raw_clone(CLONE_NEWIPC
);
222 assert_se(errno
== EPERM
);
223 pid
= raw_clone(CLONE_NEWNET
|CLONE_NEWUTS
);
225 assert_se(errno
== EPERM
);
230 assert_se(wait_for_terminate_and_warn("nsseccomp", pid
, true) == EXIT_SUCCESS
);
233 static void test_protect_sysctl(void) {
236 if (!is_seccomp_available())
241 if (detect_container() > 0) /* in containers _sysctl() is likely missing anyway */
249 assert_se(syscall(__NR__sysctl
, NULL
) < 0);
250 assert_se(errno
== EFAULT
);
253 assert_se(seccomp_protect_sysctl() >= 0);
256 assert_se(syscall(__NR__sysctl
, 0, 0, 0) < 0);
257 assert_se(errno
== EPERM
);
263 assert_se(wait_for_terminate_and_warn("sysctlseccomp", pid
, true) == EXIT_SUCCESS
);
266 static void test_restrict_address_families(void) {
269 if (!is_seccomp_available())
281 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
285 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
289 fd
= socket(AF_NETLINK
, SOCK_DGRAM
, 0);
293 assert_se(s
= set_new(NULL
));
294 assert_se(set_put(s
, INT_TO_PTR(AF_UNIX
)) >= 0);
296 assert_se(seccomp_restrict_address_families(s
, false) >= 0);
298 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
302 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
303 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
308 assert_se(errno
== EAFNOSUPPORT
);
311 fd
= socket(AF_NETLINK
, SOCK_DGRAM
, 0);
317 assert_se(set_put(s
, INT_TO_PTR(AF_INET
)) >= 0);
319 assert_se(seccomp_restrict_address_families(s
, true) >= 0);
321 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
325 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
326 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
331 assert_se(errno
== EAFNOSUPPORT
);
334 fd
= socket(AF_NETLINK
, SOCK_DGRAM
, 0);
335 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
340 assert_se(errno
== EAFNOSUPPORT
);
346 assert_se(wait_for_terminate_and_warn("socketseccomp", pid
, true) == EXIT_SUCCESS
);
349 static void test_restrict_realtime(void) {
352 if (!is_seccomp_available())
357 if (detect_container() > 0) /* in containers RT privs are likely missing anyway */
364 assert_se(sched_setscheduler(0, SCHED_FIFO
, &(struct sched_param
) { .sched_priority
= 1 }) >= 0);
365 assert_se(sched_setscheduler(0, SCHED_RR
, &(struct sched_param
) { .sched_priority
= 1 }) >= 0);
366 assert_se(sched_setscheduler(0, SCHED_IDLE
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
367 assert_se(sched_setscheduler(0, SCHED_BATCH
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
368 assert_se(sched_setscheduler(0, SCHED_OTHER
, &(struct sched_param
) {}) >= 0);
370 assert_se(seccomp_restrict_realtime() >= 0);
372 assert_se(sched_setscheduler(0, SCHED_IDLE
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
373 assert_se(sched_setscheduler(0, SCHED_BATCH
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
374 assert_se(sched_setscheduler(0, SCHED_OTHER
, &(struct sched_param
) {}) >= 0);
376 assert_se(sched_setscheduler(0, SCHED_FIFO
, &(struct sched_param
) { .sched_priority
= 1 }) < 0);
377 assert_se(errno
== EPERM
);
378 assert_se(sched_setscheduler(0, SCHED_RR
, &(struct sched_param
) { .sched_priority
= 1 }) < 0);
379 assert_se(errno
== EPERM
);
384 assert_se(wait_for_terminate_and_warn("realtimeseccomp", pid
, true) == EXIT_SUCCESS
);
387 static void test_memory_deny_write_execute_mmap(void) {
390 if (!is_seccomp_available())
401 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_EXEC
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
402 assert_se(p
!= MAP_FAILED
);
403 assert_se(munmap(p
, page_size()) >= 0);
405 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_READ
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
406 assert_se(p
!= MAP_FAILED
);
407 assert_se(munmap(p
, page_size()) >= 0);
409 assert_se(seccomp_memory_deny_write_execute() >= 0);
411 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_EXEC
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
412 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc64__) || defined(__arm__) || defined(__aarch64__)
413 assert_se(p
== MAP_FAILED
);
414 assert_se(errno
== EPERM
);
415 #else /* unknown architectures */
416 assert_se(p
!= MAP_FAILED
);
417 assert_se(munmap(p
, page_size()) >= 0);
420 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_READ
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
421 assert_se(p
!= MAP_FAILED
);
422 assert_se(munmap(p
, page_size()) >= 0);
427 assert_se(wait_for_terminate_and_warn("memoryseccomp-mmap", pid
, true) == EXIT_SUCCESS
);
430 static void test_memory_deny_write_execute_shmat(void) {
434 if (!is_seccomp_available())
439 shmid
= shmget(IPC_PRIVATE
, page_size(), 0);
440 assert_se(shmid
>= 0);
448 p
= shmat(shmid
, NULL
, 0);
449 assert_se(p
!= MAP_FAILED
);
450 assert_se(shmdt(p
) == 0);
452 p
= shmat(shmid
, NULL
, SHM_EXEC
);
453 assert_se(p
!= MAP_FAILED
);
454 assert_se(shmdt(p
) == 0);
456 assert_se(seccomp_memory_deny_write_execute() >= 0);
458 p
= shmat(shmid
, NULL
, SHM_EXEC
);
459 #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
460 assert_se(p
== MAP_FAILED
);
461 assert_se(errno
== EPERM
);
462 #else /* __i386__, __powerpc64__, and "unknown" architectures */
463 assert_se(p
!= MAP_FAILED
);
464 assert_se(shmdt(p
) == 0);
467 p
= shmat(shmid
, NULL
, 0);
468 assert_se(p
!= MAP_FAILED
);
469 assert_se(shmdt(p
) == 0);
474 assert_se(wait_for_terminate_and_warn("memoryseccomp-shmat", pid
, true) == EXIT_SUCCESS
);
477 static void test_restrict_archs(void) {
480 if (!is_seccomp_available())
489 _cleanup_set_free_ Set
*s
= NULL
;
491 assert_se(access("/", F_OK
) >= 0);
493 assert_se(s
= set_new(NULL
));
496 assert_se(set_put(s
, UINT32_TO_PTR(SCMP_ARCH_X86
+1)) >= 0);
498 assert_se(seccomp_restrict_archs(s
) >= 0);
500 assert_se(access("/", F_OK
) >= 0);
501 assert_se(seccomp_restrict_archs(NULL
) >= 0);
503 assert_se(access("/", F_OK
) >= 0);
508 assert_se(wait_for_terminate_and_warn("archseccomp", pid
, true) == EXIT_SUCCESS
);
511 static void test_load_syscall_filter_set_raw(void) {
514 if (!is_seccomp_available())
523 _cleanup_hashmap_free_ Hashmap
*s
= NULL
;
525 assert_se(access("/", F_OK
) >= 0);
526 assert_se(poll(NULL
, 0, 0) == 0);
528 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, NULL
, SCMP_ACT_KILL
) >= 0);
529 assert_se(access("/", F_OK
) >= 0);
530 assert_se(poll(NULL
, 0, 0) == 0);
532 assert_se(s
= hashmap_new(NULL
));
533 #if SCMP_SYS(access) >= 0
534 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_access
+ 1), INT_TO_PTR(-1)) >= 0);
536 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_faccessat
+ 1), INT_TO_PTR(-1)) >= 0);
539 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, s
, SCMP_ACT_ERRNO(EUCLEAN
)) >= 0);
541 assert_se(access("/", F_OK
) < 0);
542 assert_se(errno
== EUCLEAN
);
544 assert_se(poll(NULL
, 0, 0) == 0);
548 assert_se(s
= hashmap_new(NULL
));
549 #if SCMP_SYS(access) >= 0
550 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_access
+ 1), INT_TO_PTR(EILSEQ
)) >= 0);
552 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_faccessat
+ 1), INT_TO_PTR(EILSEQ
)) >= 0);
555 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, s
, SCMP_ACT_ERRNO(EUCLEAN
)) >= 0);
557 assert_se(access("/", F_OK
) < 0);
558 assert_se(errno
== EILSEQ
);
560 assert_se(poll(NULL
, 0, 0) == 0);
564 assert_se(s
= hashmap_new(NULL
));
565 #if SCMP_SYS(poll) >= 0
566 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_poll
+ 1), INT_TO_PTR(-1)) >= 0);
568 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_ppoll
+ 1), INT_TO_PTR(-1)) >= 0);
571 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, s
, SCMP_ACT_ERRNO(EUNATCH
)) >= 0);
573 assert_se(access("/", F_OK
) < 0);
574 assert_se(errno
== EILSEQ
);
576 assert_se(poll(NULL
, 0, 0) < 0);
577 assert_se(errno
== EUNATCH
);
581 assert_se(s
= hashmap_new(NULL
));
582 #if SCMP_SYS(poll) >= 0
583 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_poll
+ 1), INT_TO_PTR(EILSEQ
)) >= 0);
585 assert_se(hashmap_put(s
, UINT32_TO_PTR(__NR_ppoll
+ 1), INT_TO_PTR(EILSEQ
)) >= 0);
588 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, s
, SCMP_ACT_ERRNO(EUNATCH
)) >= 0);
590 assert_se(access("/", F_OK
) < 0);
591 assert_se(errno
== EILSEQ
);
593 assert_se(poll(NULL
, 0, 0) < 0);
594 assert_se(errno
== EILSEQ
);
599 assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid
, true) == EXIT_SUCCESS
);
602 static void test_lock_personality(void) {
603 unsigned long current
;
606 if (!is_seccomp_available())
611 assert_se(opinionated_personality(¤t
) >= 0);
613 log_info("current personality=%lu", current
);
619 assert_se(seccomp_lock_personality(current
) >= 0);
621 assert_se((unsigned long) safe_personality(current
) == current
);
623 /* Note, we also test that safe_personality() works correctly, by checkig whether errno is properly
624 * set, in addition to the return value */
626 assert_se(safe_personality(PER_LINUX
| ADDR_NO_RANDOMIZE
) == -EPERM
);
627 assert_se(errno
== EPERM
);
629 assert_se(safe_personality(PER_LINUX
| MMAP_PAGE_ZERO
) == -EPERM
);
630 assert_se(safe_personality(PER_LINUX
| ADDR_COMPAT_LAYOUT
) == -EPERM
);
631 assert_se(safe_personality(PER_LINUX
| READ_IMPLIES_EXEC
) == -EPERM
);
632 assert_se(safe_personality(PER_LINUX_32BIT
) == -EPERM
);
633 assert_se(safe_personality(PER_SVR4
) == -EPERM
);
634 assert_se(safe_personality(PER_BSD
) == -EPERM
);
635 assert_se(safe_personality(current
== PER_LINUX
? PER_LINUX32
: PER_LINUX
) == -EPERM
);
636 assert_se(safe_personality(PER_LINUX32_3GB
) == -EPERM
);
637 assert_se(safe_personality(PER_UW7
) == -EPERM
);
638 assert_se(safe_personality(0x42) == -EPERM
);
640 assert_se(safe_personality(PERSONALITY_INVALID
) == -EPERM
); /* maybe remove this later */
642 assert_se((unsigned long) personality(current
) == current
);
646 assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid
, true) == EXIT_SUCCESS
);
649 static void test_filter_sets_ordered(void) {
652 /* Ensure "@default" always remains at the beginning of the list */
653 assert_se(SYSCALL_FILTER_SET_DEFAULT
== 0);
654 assert_se(streq(syscall_filter_sets
[0].name
, "@default"));
656 for (i
= 0; i
< _SYSCALL_FILTER_SET_MAX
; i
++) {
657 const char *k
, *p
= NULL
;
659 /* Make sure each group has a description */
660 assert_se(!isempty(syscall_filter_sets
[0].help
));
662 /* Make sure the groups are ordered alphabetically, except for the first entry */
663 assert_se(i
< 2 || strcmp(syscall_filter_sets
[i
-1].name
, syscall_filter_sets
[i
].name
) < 0);
665 NULSTR_FOREACH(k
, syscall_filter_sets
[i
].value
) {
667 /* Ensure each syscall list is in itself ordered, but groups before names */
669 (*p
== '@' && *k
!= '@') ||
670 (((*p
== '@' && *k
== '@') ||
671 (*p
!= '@' && *k
!= '@')) &&
679 int main(int argc
, char *argv
[]) {
681 log_set_max_level(LOG_DEBUG
);
683 test_seccomp_arch_to_string();
684 test_architecture_table();
685 test_syscall_filter_set_find();
687 test_restrict_namespace();
688 test_protect_sysctl();
689 test_restrict_address_families();
690 test_restrict_realtime();
691 test_memory_deny_write_execute_mmap();
692 test_memory_deny_write_execute_shmat();
693 test_restrict_archs();
694 test_load_syscall_filter_set_raw();
695 test_lock_personality();
696 test_filter_sets_ordered();