2 This file is part of systemd.
4 Copyright 2016 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/eventfd.h>
25 #include <sys/personality.h>
27 #include <sys/types.h>
30 #include "alloc-util.h"
35 #include "process-util.h"
36 #include "raw-clone.h"
37 #include "seccomp-util.h"
39 #include "string-util.h"
43 #if SCMP_SYS(socket) < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__)
44 /* On these archs, socket() is implemented via the socketcall() syscall multiplexer,
45 * and we can't restrict it hence via seccomp. */
46 # define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
48 # define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
51 static void test_seccomp_arch_to_string(void) {
55 a
= seccomp_arch_native();
57 name
= seccomp_arch_to_string(a
);
59 assert_se(seccomp_arch_from_string(name
, &b
) >= 0);
63 static void test_architecture_table(void) {
86 assert_se(seccomp_arch_from_string(n
, &c
) >= 0);
87 n2
= seccomp_arch_to_string(c
);
88 log_info("seccomp-arch: %s → 0x%"PRIx32
" → %s", n
, c
, n2
);
89 assert_se(streq_ptr(n
, n2
));
93 static void test_syscall_filter_set_find(void) {
94 assert_se(!syscall_filter_set_find(NULL
));
95 assert_se(!syscall_filter_set_find(""));
96 assert_se(!syscall_filter_set_find("quux"));
97 assert_se(!syscall_filter_set_find("@quux"));
99 assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets
+ SYSCALL_FILTER_SET_CLOCK
);
100 assert_se(syscall_filter_set_find("@default") == syscall_filter_sets
+ SYSCALL_FILTER_SET_DEFAULT
);
101 assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets
+ SYSCALL_FILTER_SET_RAW_IO
);
104 static void test_filter_sets(void) {
108 if (!is_seccomp_available())
113 for (i
= 0; i
< _SYSCALL_FILTER_SET_MAX
; i
++) {
116 log_info("Testing %s", syscall_filter_sets
[i
].name
);
121 if (pid
== 0) { /* Child? */
124 if (i
== SYSCALL_FILTER_SET_DEFAULT
) /* if we look at the default set, whitelist instead of blacklist */
125 r
= seccomp_load_syscall_filter_set(SCMP_ACT_ERRNO(EUCLEAN
), syscall_filter_sets
+ i
, SCMP_ACT_ALLOW
);
127 r
= seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ i
, SCMP_ACT_ERRNO(EUCLEAN
));
131 /* Test the sycall filter with one random system call */
132 fd
= eventfd(0, EFD_NONBLOCK
|EFD_CLOEXEC
);
133 if (IN_SET(i
, SYSCALL_FILTER_SET_IO_EVENT
, SYSCALL_FILTER_SET_DEFAULT
))
134 assert_se(fd
< 0 && errno
== EUCLEAN
);
143 assert_se(wait_for_terminate_and_warn(syscall_filter_sets
[i
].name
, pid
, true) == EXIT_SUCCESS
);
147 static void test_restrict_namespace(void) {
148 _cleanup_free_
char *s
= NULL
;
152 assert_se(namespace_flag_to_string(0) == NULL
);
153 assert_se(streq(namespace_flag_to_string(CLONE_NEWNS
), "mnt"));
154 assert_se(namespace_flag_to_string(CLONE_NEWNS
|CLONE_NEWIPC
) == NULL
);
155 assert_se(streq(namespace_flag_to_string(CLONE_NEWCGROUP
), "cgroup"));
157 assert_se(namespace_flag_from_string("mnt") == CLONE_NEWNS
);
158 assert_se(namespace_flag_from_string(NULL
) == 0);
159 assert_se(namespace_flag_from_string("") == 0);
160 assert_se(namespace_flag_from_string("uts") == CLONE_NEWUTS
);
161 assert_se(namespace_flag_from_string(namespace_flag_to_string(CLONE_NEWUTS
)) == CLONE_NEWUTS
);
162 assert_se(streq(namespace_flag_to_string(namespace_flag_from_string("ipc")), "ipc"));
164 assert_se(namespace_flag_from_string_many(NULL
, &ul
) == 0 && ul
== 0);
165 assert_se(namespace_flag_from_string_many("", &ul
) == 0 && ul
== 0);
166 assert_se(namespace_flag_from_string_many("mnt uts ipc", &ul
) == 0 && ul
== (CLONE_NEWNS
|CLONE_NEWUTS
|CLONE_NEWIPC
));
168 assert_se(namespace_flag_to_string_many(NAMESPACE_FLAGS_ALL
, &s
) == 0);
169 assert_se(streq(s
, "cgroup ipc net mnt pid user uts"));
170 assert_se(namespace_flag_from_string_many(s
, &ul
) == 0 && ul
== NAMESPACE_FLAGS_ALL
);
172 if (!is_seccomp_available())
182 assert_se(seccomp_restrict_namespaces(CLONE_NEWNS
|CLONE_NEWNET
) >= 0);
184 assert_se(unshare(CLONE_NEWNS
) == 0);
185 assert_se(unshare(CLONE_NEWNET
) == 0);
186 assert_se(unshare(CLONE_NEWUTS
) == -1);
187 assert_se(errno
== EPERM
);
188 assert_se(unshare(CLONE_NEWIPC
) == -1);
189 assert_se(errno
== EPERM
);
190 assert_se(unshare(CLONE_NEWNET
|CLONE_NEWUTS
) == -1);
191 assert_se(errno
== EPERM
);
193 /* We use fd 0 (stdin) here, which of course will fail with EINVAL on setns(). Except of course our
194 * seccomp filter worked, and hits first and makes it return EPERM */
195 assert_se(setns(0, CLONE_NEWNS
) == -1);
196 assert_se(errno
== EINVAL
);
197 assert_se(setns(0, CLONE_NEWNET
) == -1);
198 assert_se(errno
== EINVAL
);
199 assert_se(setns(0, CLONE_NEWUTS
) == -1);
200 assert_se(errno
== EPERM
);
201 assert_se(setns(0, CLONE_NEWIPC
) == -1);
202 assert_se(errno
== EPERM
);
203 assert_se(setns(0, CLONE_NEWNET
|CLONE_NEWUTS
) == -1);
204 assert_se(errno
== EPERM
);
205 assert_se(setns(0, 0) == -1);
206 assert_se(errno
== EPERM
);
208 pid
= raw_clone(CLONE_NEWNS
);
212 pid
= raw_clone(CLONE_NEWNET
);
216 pid
= raw_clone(CLONE_NEWUTS
);
218 assert_se(errno
== EPERM
);
219 pid
= raw_clone(CLONE_NEWIPC
);
221 assert_se(errno
== EPERM
);
222 pid
= raw_clone(CLONE_NEWNET
|CLONE_NEWUTS
);
224 assert_se(errno
== EPERM
);
229 assert_se(wait_for_terminate_and_warn("nsseccomp", pid
, true) == EXIT_SUCCESS
);
232 static void test_protect_sysctl(void) {
235 if (!is_seccomp_available())
240 if (detect_container() > 0) /* in containers _sysctl() is likely missing anyway */
248 assert_se(syscall(__NR__sysctl
, NULL
) < 0);
249 assert_se(errno
== EFAULT
);
252 assert_se(seccomp_protect_sysctl() >= 0);
255 assert_se(syscall(__NR__sysctl
, 0, 0, 0) < 0);
256 assert_se(errno
== EPERM
);
262 assert_se(wait_for_terminate_and_warn("sysctlseccomp", pid
, true) == EXIT_SUCCESS
);
265 static void test_restrict_address_families(void) {
268 if (!is_seccomp_available())
280 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
284 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
288 fd
= socket(AF_NETLINK
, SOCK_DGRAM
, 0);
292 assert_se(s
= set_new(NULL
));
293 assert_se(set_put(s
, INT_TO_PTR(AF_UNIX
)) >= 0);
295 assert_se(seccomp_restrict_address_families(s
, false) >= 0);
297 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
301 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
302 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
307 assert_se(errno
== EAFNOSUPPORT
);
310 fd
= socket(AF_NETLINK
, SOCK_DGRAM
, 0);
316 assert_se(set_put(s
, INT_TO_PTR(AF_INET
)) >= 0);
318 assert_se(seccomp_restrict_address_families(s
, true) >= 0);
320 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
324 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
325 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
330 assert_se(errno
== EAFNOSUPPORT
);
333 fd
= socket(AF_NETLINK
, SOCK_DGRAM
, 0);
334 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
339 assert_se(errno
== EAFNOSUPPORT
);
345 assert_se(wait_for_terminate_and_warn("socketseccomp", pid
, true) == EXIT_SUCCESS
);
348 static void test_restrict_realtime(void) {
351 if (!is_seccomp_available())
356 if (detect_container() > 0) /* in containers RT privs are likely missing anyway */
363 assert_se(sched_setscheduler(0, SCHED_FIFO
, &(struct sched_param
) { .sched_priority
= 1 }) >= 0);
364 assert_se(sched_setscheduler(0, SCHED_RR
, &(struct sched_param
) { .sched_priority
= 1 }) >= 0);
365 assert_se(sched_setscheduler(0, SCHED_IDLE
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
366 assert_se(sched_setscheduler(0, SCHED_BATCH
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
367 assert_se(sched_setscheduler(0, SCHED_OTHER
, &(struct sched_param
) {}) >= 0);
369 assert_se(seccomp_restrict_realtime() >= 0);
371 assert_se(sched_setscheduler(0, SCHED_IDLE
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
372 assert_se(sched_setscheduler(0, SCHED_BATCH
, &(struct sched_param
) { .sched_priority
= 0 }) >= 0);
373 assert_se(sched_setscheduler(0, SCHED_OTHER
, &(struct sched_param
) {}) >= 0);
375 assert_se(sched_setscheduler(0, SCHED_FIFO
, &(struct sched_param
) { .sched_priority
= 1 }) < 0);
376 assert_se(errno
== EPERM
);
377 assert_se(sched_setscheduler(0, SCHED_RR
, &(struct sched_param
) { .sched_priority
= 1 }) < 0);
378 assert_se(errno
== EPERM
);
383 assert_se(wait_for_terminate_and_warn("realtimeseccomp", pid
, true) == EXIT_SUCCESS
);
386 static void test_memory_deny_write_execute_mmap(void) {
389 if (!is_seccomp_available())
400 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_EXEC
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
401 assert_se(p
!= MAP_FAILED
);
402 assert_se(munmap(p
, page_size()) >= 0);
404 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_READ
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
405 assert_se(p
!= MAP_FAILED
);
406 assert_se(munmap(p
, page_size()) >= 0);
408 assert_se(seccomp_memory_deny_write_execute() >= 0);
410 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_EXEC
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
411 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc64__) || defined(__arm__) || defined(__aarch64__)
412 assert_se(p
== MAP_FAILED
);
413 assert_se(errno
== EPERM
);
414 #else /* unknown architectures */
415 assert_se(p
!= MAP_FAILED
);
416 assert_se(munmap(p
, page_size()) >= 0);
419 p
= mmap(NULL
, page_size(), PROT_WRITE
|PROT_READ
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
420 assert_se(p
!= MAP_FAILED
);
421 assert_se(munmap(p
, page_size()) >= 0);
426 assert_se(wait_for_terminate_and_warn("memoryseccomp-mmap", pid
, true) == EXIT_SUCCESS
);
429 static void test_memory_deny_write_execute_shmat(void) {
433 if (!is_seccomp_available())
438 shmid
= shmget(IPC_PRIVATE
, page_size(), 0);
439 assert_se(shmid
>= 0);
447 p
= shmat(shmid
, NULL
, 0);
448 assert_se(p
!= MAP_FAILED
);
449 assert_se(shmdt(p
) == 0);
451 p
= shmat(shmid
, NULL
, SHM_EXEC
);
452 assert_se(p
!= MAP_FAILED
);
453 assert_se(shmdt(p
) == 0);
455 assert_se(seccomp_memory_deny_write_execute() >= 0);
457 p
= shmat(shmid
, NULL
, SHM_EXEC
);
458 #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
459 assert_se(p
== MAP_FAILED
);
460 assert_se(errno
== EPERM
);
461 #else /* __i386__, __powerpc64__, and "unknown" architectures */
462 assert_se(p
!= MAP_FAILED
);
463 assert_se(shmdt(p
) == 0);
466 p
= shmat(shmid
, NULL
, 0);
467 assert_se(p
!= MAP_FAILED
);
468 assert_se(shmdt(p
) == 0);
473 assert_se(wait_for_terminate_and_warn("memoryseccomp-shmat", pid
, true) == EXIT_SUCCESS
);
476 static void test_restrict_archs(void) {
479 if (!is_seccomp_available())
488 _cleanup_set_free_ Set
*s
= NULL
;
490 assert_se(access("/", F_OK
) >= 0);
492 assert_se(s
= set_new(NULL
));
495 assert_se(set_put(s
, UINT32_TO_PTR(SCMP_ARCH_X86
+1)) >= 0);
497 assert_se(seccomp_restrict_archs(s
) >= 0);
499 assert_se(access("/", F_OK
) >= 0);
500 assert_se(seccomp_restrict_archs(NULL
) >= 0);
502 assert_se(access("/", F_OK
) >= 0);
507 assert_se(wait_for_terminate_and_warn("archseccomp", pid
, true) == EXIT_SUCCESS
);
510 static void test_load_syscall_filter_set_raw(void) {
513 if (!is_seccomp_available())
522 _cleanup_set_free_ Set
*s
= NULL
;
524 assert_se(access("/", F_OK
) >= 0);
525 assert_se(poll(NULL
, 0, 0) == 0);
527 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, NULL
, SCMP_ACT_KILL
) >= 0);
528 assert_se(access("/", F_OK
) >= 0);
529 assert_se(poll(NULL
, 0, 0) == 0);
531 assert_se(s
= set_new(NULL
));
532 #if SCMP_SYS(access) >= 0
533 assert_se(set_put(s
, UINT32_TO_PTR(__NR_access
+ 1)) >= 0);
535 assert_se(set_put(s
, UINT32_TO_PTR(__NR_faccessat
+ 1)) >= 0);
538 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, s
, SCMP_ACT_ERRNO(EUCLEAN
)) >= 0);
540 assert_se(access("/", F_OK
) < 0);
541 assert_se(errno
== EUCLEAN
);
543 assert_se(poll(NULL
, 0, 0) == 0);
547 assert_se(s
= set_new(NULL
));
548 #if SCMP_SYS(poll) >= 0
549 assert_se(set_put(s
, UINT32_TO_PTR(__NR_poll
+ 1)) >= 0);
551 assert_se(set_put(s
, UINT32_TO_PTR(__NR_ppoll
+ 1)) >= 0);
554 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW
, s
, SCMP_ACT_ERRNO(EUNATCH
)) >= 0);
556 assert_se(access("/", F_OK
) < 0);
557 assert_se(errno
== EUCLEAN
);
559 assert_se(poll(NULL
, 0, 0) < 0);
560 assert_se(errno
== EUNATCH
);
565 assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid
, true) == EXIT_SUCCESS
);
568 static void test_lock_personality(void) {
569 unsigned long current
;
572 if (!is_seccomp_available())
577 assert_se(opinionated_personality(¤t
) >= 0);
579 log_info("current personality=%lu", current
);
585 assert_se(seccomp_lock_personality(current
) >= 0);
587 assert_se((unsigned long) safe_personality(current
) == current
);
589 /* Note, we also test that safe_personality() works correctly, by checkig whether errno is properly
590 * set, in addition to the return value */
592 assert_se(safe_personality(PER_LINUX
| ADDR_NO_RANDOMIZE
) == -EPERM
);
593 assert_se(errno
== EPERM
);
595 assert_se(safe_personality(PER_LINUX
| MMAP_PAGE_ZERO
) == -EPERM
);
596 assert_se(safe_personality(PER_LINUX
| ADDR_COMPAT_LAYOUT
) == -EPERM
);
597 assert_se(safe_personality(PER_LINUX
| READ_IMPLIES_EXEC
) == -EPERM
);
598 assert_se(safe_personality(PER_LINUX_32BIT
) == -EPERM
);
599 assert_se(safe_personality(PER_SVR4
) == -EPERM
);
600 assert_se(safe_personality(PER_BSD
) == -EPERM
);
601 assert_se(safe_personality(current
== PER_LINUX
? PER_LINUX32
: PER_LINUX
) == -EPERM
);
602 assert_se(safe_personality(PER_LINUX32_3GB
) == -EPERM
);
603 assert_se(safe_personality(PER_UW7
) == -EPERM
);
604 assert_se(safe_personality(0x42) == -EPERM
);
606 assert_se(safe_personality(PERSONALITY_INVALID
) == -EPERM
); /* maybe remove this later */
608 assert_se((unsigned long) personality(current
) == current
);
612 assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid
, true) == EXIT_SUCCESS
);
615 static void test_filter_sets_ordered(void) {
618 /* Ensure "@default" always remains at the beginning of the list */
619 assert_se(SYSCALL_FILTER_SET_DEFAULT
== 0);
620 assert_se(streq(syscall_filter_sets
[0].name
, "@default"));
622 for (i
= 0; i
< _SYSCALL_FILTER_SET_MAX
; i
++) {
623 const char *k
, *p
= NULL
;
625 /* Make sure each group has a description */
626 assert_se(!isempty(syscall_filter_sets
[0].help
));
628 /* Make sure the groups are ordered alphabetically, except for the first entry */
629 assert_se(i
< 2 || strcmp(syscall_filter_sets
[i
-1].name
, syscall_filter_sets
[i
].name
) < 0);
631 NULSTR_FOREACH(k
, syscall_filter_sets
[i
].value
) {
633 /* Ensure each syscall list is in itself ordered, but groups before names */
635 (*p
== '@' && *k
!= '@') ||
636 (((*p
== '@' && *k
== '@') ||
637 (*p
!= '@' && *k
!= '@')) &&
645 int main(int argc
, char *argv
[]) {
647 log_set_max_level(LOG_DEBUG
);
649 test_seccomp_arch_to_string();
650 test_architecture_table();
651 test_syscall_filter_set_find();
653 test_restrict_namespace();
654 test_protect_sysctl();
655 test_restrict_address_families();
656 test_restrict_realtime();
657 test_memory_deny_write_execute_mmap();
658 test_memory_deny_write_execute_shmat();
659 test_restrict_archs();
660 test_load_syscall_filter_set_raw();
661 test_lock_personality();
662 test_filter_sets_ordered();