]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/test/test-seccomp.c
Merge pull request #5270 from poettering/seccomp-namespace-fix
[thirdparty/systemd.git] / src / test / test-seccomp.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2016 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <sched.h>
21 #include <stdlib.h>
22 #include <sys/eventfd.h>
23 #include <sys/mman.h>
24 #include <unistd.h>
25 #include <sys/poll.h>
26
27 #include "alloc-util.h"
28 #include "fd-util.h"
29 #include "macro.h"
30 #include "missing.h"
31 #include "nsflags.h"
32 #include "process-util.h"
33 #include "raw-clone.h"
34 #include "seccomp-util.h"
35 #include "set.h"
36 #include "string-util.h"
37 #include "util.h"
38 #include "virt.h"
39
40 static void test_seccomp_arch_to_string(void) {
41 uint32_t a, b;
42 const char *name;
43
44 a = seccomp_arch_native();
45 assert_se(a > 0);
46 name = seccomp_arch_to_string(a);
47 assert_se(name);
48 assert_se(seccomp_arch_from_string(name, &b) >= 0);
49 assert_se(a == b);
50 }
51
52 static void test_architecture_table(void) {
53 const char *n, *n2;
54
55 NULSTR_FOREACH(n,
56 "native\0"
57 "x86\0"
58 "x86-64\0"
59 "x32\0"
60 "arm\0"
61 "arm64\0"
62 "mips\0"
63 "mips64\0"
64 "mips64-n32\0"
65 "mips-le\0"
66 "mips64-le\0"
67 "mips64-le-n32\0"
68 "ppc\0"
69 "ppc64\0"
70 "ppc64-le\0"
71 "s390\0"
72 "s390x\0") {
73 uint32_t c;
74
75 assert_se(seccomp_arch_from_string(n, &c) >= 0);
76 n2 = seccomp_arch_to_string(c);
77 log_info("seccomp-arch: %s → 0x%"PRIx32" → %s", n, c, n2);
78 assert_se(streq_ptr(n, n2));
79 }
80 }
81
82 static void test_syscall_filter_set_find(void) {
83 assert_se(!syscall_filter_set_find(NULL));
84 assert_se(!syscall_filter_set_find(""));
85 assert_se(!syscall_filter_set_find("quux"));
86 assert_se(!syscall_filter_set_find("@quux"));
87
88 assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK);
89 assert_se(syscall_filter_set_find("@default") == syscall_filter_sets + SYSCALL_FILTER_SET_DEFAULT);
90 assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO);
91 }
92
93 static void test_filter_sets(void) {
94 unsigned i;
95 int r;
96
97 if (!is_seccomp_available())
98 return;
99 if (geteuid() != 0)
100 return;
101
102 for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
103 pid_t pid;
104
105 log_info("Testing %s", syscall_filter_sets[i].name);
106
107 pid = fork();
108 assert_se(pid >= 0);
109
110 if (pid == 0) { /* Child? */
111 int fd;
112
113 if (i == SYSCALL_FILTER_SET_DEFAULT) /* if we look at the default set, whitelist instead of blacklist */
114 r = seccomp_load_syscall_filter_set(SCMP_ACT_ERRNO(EUCLEAN), syscall_filter_sets + i, SCMP_ACT_ALLOW);
115 else
116 r = seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EUCLEAN));
117 if (r < 0)
118 _exit(EXIT_FAILURE);
119
120 /* Test the sycall filter with one random system call */
121 fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
122 if (IN_SET(i, SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_DEFAULT))
123 assert_se(fd < 0 && errno == EUCLEAN);
124 else {
125 assert_se(fd >= 0);
126 safe_close(fd);
127 }
128
129 _exit(EXIT_SUCCESS);
130 }
131
132 assert_se(wait_for_terminate_and_warn(syscall_filter_sets[i].name, pid, true) == EXIT_SUCCESS);
133 }
134 }
135
136 static void test_restrict_namespace(void) {
137 _cleanup_free_ char *s = NULL;
138 unsigned long ul;
139 pid_t pid;
140
141 assert_se(namespace_flag_to_string(0) == NULL);
142 assert_se(streq(namespace_flag_to_string(CLONE_NEWNS), "mnt"));
143 assert_se(namespace_flag_to_string(CLONE_NEWNS|CLONE_NEWIPC) == NULL);
144 assert_se(streq(namespace_flag_to_string(CLONE_NEWCGROUP), "cgroup"));
145
146 assert_se(namespace_flag_from_string("mnt") == CLONE_NEWNS);
147 assert_se(namespace_flag_from_string(NULL) == 0);
148 assert_se(namespace_flag_from_string("") == 0);
149 assert_se(namespace_flag_from_string("uts") == CLONE_NEWUTS);
150 assert_se(namespace_flag_from_string(namespace_flag_to_string(CLONE_NEWUTS)) == CLONE_NEWUTS);
151 assert_se(streq(namespace_flag_to_string(namespace_flag_from_string("ipc")), "ipc"));
152
153 assert_se(namespace_flag_from_string_many(NULL, &ul) == 0 && ul == 0);
154 assert_se(namespace_flag_from_string_many("", &ul) == 0 && ul == 0);
155 assert_se(namespace_flag_from_string_many("mnt uts ipc", &ul) == 0 && ul == (CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC));
156
157 assert_se(namespace_flag_to_string_many(NAMESPACE_FLAGS_ALL, &s) == 0);
158 assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
159 assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
160
161 #if SECCOMP_RESTRICT_NAMESPACES_BROKEN == 0
162
163 if (!is_seccomp_available())
164 return;
165 if (geteuid() != 0)
166 return;
167
168 pid = fork();
169 assert_se(pid >= 0);
170
171 if (pid == 0) {
172
173 assert_se(seccomp_restrict_namespaces(CLONE_NEWNS|CLONE_NEWNET) >= 0);
174
175 assert_se(unshare(CLONE_NEWNS) == 0);
176 assert_se(unshare(CLONE_NEWNET) == 0);
177 assert_se(unshare(CLONE_NEWUTS) == -1);
178 assert_se(errno == EPERM);
179 assert_se(unshare(CLONE_NEWIPC) == -1);
180 assert_se(errno == EPERM);
181 assert_se(unshare(CLONE_NEWNET|CLONE_NEWUTS) == -1);
182 assert_se(errno == EPERM);
183
184 /* We use fd 0 (stdin) here, which of course will fail with EINVAL on setns(). Except of course our
185 * seccomp filter worked, and hits first and makes it return EPERM */
186 assert_se(setns(0, CLONE_NEWNS) == -1);
187 assert_se(errno == EINVAL);
188 assert_se(setns(0, CLONE_NEWNET) == -1);
189 assert_se(errno == EINVAL);
190 assert_se(setns(0, CLONE_NEWUTS) == -1);
191 assert_se(errno == EPERM);
192 assert_se(setns(0, CLONE_NEWIPC) == -1);
193 assert_se(errno == EPERM);
194 assert_se(setns(0, CLONE_NEWNET|CLONE_NEWUTS) == -1);
195 assert_se(errno == EPERM);
196 assert_se(setns(0, 0) == -1);
197 assert_se(errno == EPERM);
198
199 pid = raw_clone(CLONE_NEWNS);
200 assert_se(pid >= 0);
201 if (pid == 0)
202 _exit(EXIT_SUCCESS);
203 pid = raw_clone(CLONE_NEWNET);
204 assert_se(pid >= 0);
205 if (pid == 0)
206 _exit(EXIT_SUCCESS);
207 pid = raw_clone(CLONE_NEWUTS);
208 assert_se(pid < 0);
209 assert_se(errno == EPERM);
210 pid = raw_clone(CLONE_NEWIPC);
211 assert_se(pid < 0);
212 assert_se(errno == EPERM);
213 pid = raw_clone(CLONE_NEWNET|CLONE_NEWUTS);
214 assert_se(pid < 0);
215 assert_se(errno == EPERM);
216
217 _exit(EXIT_SUCCESS);
218 }
219
220 assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS);
221 #endif
222 }
223
224 static void test_protect_sysctl(void) {
225 pid_t pid;
226
227 if (!is_seccomp_available())
228 return;
229 if (geteuid() != 0)
230 return;
231
232 if (detect_container() > 0) /* in containers _sysctl() is likely missing anyway */
233 return;
234
235 pid = fork();
236 assert_se(pid >= 0);
237
238 if (pid == 0) {
239 assert_se(syscall(__NR__sysctl, NULL) < 0);
240 assert_se(errno == EFAULT);
241
242 assert_se(seccomp_protect_sysctl() >= 0);
243
244 assert_se(syscall(__NR__sysctl, 0, 0, 0) < 0);
245 assert_se(errno == EPERM);
246
247 _exit(EXIT_SUCCESS);
248 }
249
250 assert_se(wait_for_terminate_and_warn("sysctlseccomp", pid, true) == EXIT_SUCCESS);
251 }
252
253 static void test_restrict_address_families(void) {
254 pid_t pid;
255
256 if (!is_seccomp_available())
257 return;
258 if (geteuid() != 0)
259 return;
260
261 pid = fork();
262 assert_se(pid >= 0);
263
264 if (pid == 0) {
265 int fd;
266 Set *s;
267
268 fd = socket(AF_INET, SOCK_DGRAM, 0);
269 assert_se(fd >= 0);
270 safe_close(fd);
271
272 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
273 assert_se(fd >= 0);
274 safe_close(fd);
275
276 fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
277 assert_se(fd >= 0);
278 safe_close(fd);
279
280 assert_se(s = set_new(NULL));
281 assert_se(set_put(s, INT_TO_PTR(AF_UNIX)) >= 0);
282
283 assert_se(seccomp_restrict_address_families(s, false) >= 0);
284
285 fd = socket(AF_INET, SOCK_DGRAM, 0);
286 assert_se(fd >= 0);
287 safe_close(fd);
288
289 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
290 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
291 assert_se(fd >= 0);
292 safe_close(fd);
293 #else
294 assert_se(socket(AF_UNIX, SOCK_DGRAM, 0) < 0);
295 assert_se(errno == EAFNOSUPPORT);
296 #endif
297
298 fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
299 assert_se(fd >= 0);
300 safe_close(fd);
301
302 set_clear(s);
303
304 assert_se(set_put(s, INT_TO_PTR(AF_INET)) >= 0);
305
306 assert_se(seccomp_restrict_address_families(s, true) >= 0);
307
308 fd = socket(AF_INET, SOCK_DGRAM, 0);
309 assert_se(fd >= 0);
310 safe_close(fd);
311
312 #if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
313 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
314 assert_se(fd >= 0);
315 safe_close(fd);
316
317 fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
318 assert_se(fd >= 0);
319 safe_close(fd);
320 #else
321 assert_se(socket(AF_UNIX, SOCK_DGRAM, 0) < 0);
322 assert_se(errno == EAFNOSUPPORT);
323
324 assert_se(socket(AF_NETLINK, SOCK_DGRAM, 0) < 0);
325 assert_se(errno == EAFNOSUPPORT);
326 #endif
327
328 _exit(EXIT_SUCCESS);
329 }
330
331 assert_se(wait_for_terminate_and_warn("socketseccomp", pid, true) == EXIT_SUCCESS);
332 }
333
334 static void test_restrict_realtime(void) {
335 pid_t pid;
336
337 if (!is_seccomp_available())
338 return;
339 if (geteuid() != 0)
340 return;
341
342 if (detect_container() > 0) /* in containers RT privs are likely missing anyway */
343 return;
344
345 pid = fork();
346 assert_se(pid >= 0);
347
348 if (pid == 0) {
349 assert_se(sched_setscheduler(0, SCHED_FIFO, &(struct sched_param) { .sched_priority = 1 }) >= 0);
350 assert_se(sched_setscheduler(0, SCHED_RR, &(struct sched_param) { .sched_priority = 1 }) >= 0);
351 assert_se(sched_setscheduler(0, SCHED_IDLE, &(struct sched_param) { .sched_priority = 0 }) >= 0);
352 assert_se(sched_setscheduler(0, SCHED_BATCH, &(struct sched_param) { .sched_priority = 0 }) >= 0);
353 assert_se(sched_setscheduler(0, SCHED_OTHER, &(struct sched_param) {}) >= 0);
354
355 assert_se(seccomp_restrict_realtime() >= 0);
356
357 assert_se(sched_setscheduler(0, SCHED_IDLE, &(struct sched_param) { .sched_priority = 0 }) >= 0);
358 assert_se(sched_setscheduler(0, SCHED_BATCH, &(struct sched_param) { .sched_priority = 0 }) >= 0);
359 assert_se(sched_setscheduler(0, SCHED_OTHER, &(struct sched_param) {}) >= 0);
360
361 assert_se(sched_setscheduler(0, SCHED_FIFO, &(struct sched_param) { .sched_priority = 1 }) < 0);
362 assert_se(errno == EPERM);
363 assert_se(sched_setscheduler(0, SCHED_RR, &(struct sched_param) { .sched_priority = 1 }) < 0);
364 assert_se(errno == EPERM);
365
366 _exit(EXIT_SUCCESS);
367 }
368
369 assert_se(wait_for_terminate_and_warn("realtimeseccomp", pid, true) == EXIT_SUCCESS);
370 }
371
372 static void test_memory_deny_write_execute(void) {
373 pid_t pid;
374
375 if (!is_seccomp_available())
376 return;
377 if (geteuid() != 0)
378 return;
379
380 pid = fork();
381 assert_se(pid >= 0);
382
383 if (pid == 0) {
384 void *p;
385
386 p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
387 assert_se(p != MAP_FAILED);
388 assert_se(munmap(p, page_size()) >= 0);
389
390 p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
391 assert_se(p != MAP_FAILED);
392 assert_se(munmap(p, page_size()) >= 0);
393
394 assert_se(seccomp_memory_deny_write_execute() >= 0);
395
396 #if SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN
397 p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
398 assert_se(p != MAP_FAILED);
399 assert_se(munmap(p, page_size()) >= 0);
400 #else
401 p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
402 assert_se(p == MAP_FAILED);
403 assert_se(errno == EPERM);
404 #endif
405
406 p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
407 assert_se(p != MAP_FAILED);
408 assert_se(munmap(p, page_size()) >= 0);
409
410 _exit(EXIT_SUCCESS);
411 }
412
413 assert_se(wait_for_terminate_and_warn("memoryseccomp", pid, true) == EXIT_SUCCESS);
414 }
415
416 static void test_restrict_archs(void) {
417 pid_t pid;
418
419 if (!is_seccomp_available())
420 return;
421 if (geteuid() != 0)
422 return;
423
424 pid = fork();
425 assert_se(pid >= 0);
426
427 if (pid == 0) {
428 _cleanup_set_free_ Set *s = NULL;
429
430 assert_se(access("/", F_OK) >= 0);
431
432 assert_se(s = set_new(NULL));
433
434 #ifdef __x86_64__
435 assert_se(set_put(s, UINT32_TO_PTR(SCMP_ARCH_X86+1)) >= 0);
436 #endif
437 assert_se(seccomp_restrict_archs(s) >= 0);
438
439 assert_se(access("/", F_OK) >= 0);
440 assert_se(seccomp_restrict_archs(NULL) >= 0);
441
442 assert_se(access("/", F_OK) >= 0);
443
444 _exit(EXIT_SUCCESS);
445 }
446
447 assert_se(wait_for_terminate_and_warn("archseccomp", pid, true) == EXIT_SUCCESS);
448 }
449
450 static void test_load_syscall_filter_set_raw(void) {
451 pid_t pid;
452
453 if (!is_seccomp_available())
454 return;
455 if (geteuid() != 0)
456 return;
457
458 pid = fork();
459 assert_se(pid >= 0);
460
461 if (pid == 0) {
462 _cleanup_set_free_ Set *s = NULL;
463
464 assert_se(access("/", F_OK) >= 0);
465 assert_se(poll(NULL, 0, 0) == 0);
466
467 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, SCMP_ACT_KILL) >= 0);
468 assert_se(access("/", F_OK) >= 0);
469 assert_se(poll(NULL, 0, 0) == 0);
470
471 assert_se(s = set_new(NULL));
472 assert_se(set_put(s, UINT32_TO_PTR(__NR_access + 1)) >= 0);
473
474 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUCLEAN)) >= 0);
475
476 assert_se(access("/", F_OK) < 0);
477 assert_se(errno == EUCLEAN);
478
479 assert_se(poll(NULL, 0, 0) == 0);
480
481 s = set_free(s);
482
483 assert_se(s = set_new(NULL));
484 assert_se(set_put(s, UINT32_TO_PTR(__NR_poll + 1)) >= 0);
485
486 assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUNATCH)) >= 0);
487
488 assert_se(access("/", F_OK) < 0);
489 assert_se(errno == EUCLEAN);
490
491 assert_se(poll(NULL, 0, 0) < 0);
492 assert_se(errno == EUNATCH);
493
494 _exit(EXIT_SUCCESS);
495 }
496
497 assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid, true) == EXIT_SUCCESS);
498 }
499
500 int main(int argc, char *argv[]) {
501
502 log_set_max_level(LOG_DEBUG);
503
504 test_seccomp_arch_to_string();
505 test_architecture_table();
506 test_syscall_filter_set_find();
507 test_filter_sets();
508 test_restrict_namespace();
509 test_protect_sysctl();
510 test_restrict_address_families();
511 test_restrict_realtime();
512 test_memory_deny_write_execute();
513 test_restrict_archs();
514 test_load_syscall_filter_set_raw();
515
516 return 0;
517 }