]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/seccomp-util.c
seccomp: add two new filter sets: @reboot and @swap
[thirdparty/systemd.git] / src / shared / seccomp-util.c
CommitLineData
57183d11
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2014 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
a8fbdf54 20#include <errno.h>
57183d11 21#include <seccomp.h>
a8fbdf54 22#include <stddef.h>
d347d902
FS
23#include <sys/prctl.h>
24#include <linux/seccomp.h>
57183d11 25
add00535 26#include "alloc-util.h"
a8fbdf54 27#include "macro.h"
add00535 28#include "nsflags.h"
cf0fbc49 29#include "seccomp-util.h"
07630cea 30#include "string-util.h"
8130926d 31#include "util.h"
57183d11
LP
32
33const char* seccomp_arch_to_string(uint32_t c) {
aa34055f
ZJS
34 /* Maintain order used in <seccomp.h>.
35 *
36 * Names used here should be the same as those used for ConditionArchitecture=,
37 * except for "subarchitectures" like x32. */
57183d11 38
aa34055f
ZJS
39 switch(c) {
40 case SCMP_ARCH_NATIVE:
57183d11 41 return "native";
aa34055f 42 case SCMP_ARCH_X86:
57183d11 43 return "x86";
aa34055f 44 case SCMP_ARCH_X86_64:
57183d11 45 return "x86-64";
aa34055f 46 case SCMP_ARCH_X32:
57183d11 47 return "x32";
aa34055f 48 case SCMP_ARCH_ARM:
57183d11 49 return "arm";
aa34055f
ZJS
50 case SCMP_ARCH_AARCH64:
51 return "arm64";
52 case SCMP_ARCH_MIPS:
53 return "mips";
54 case SCMP_ARCH_MIPS64:
55 return "mips64";
56 case SCMP_ARCH_MIPS64N32:
57 return "mips64-n32";
58 case SCMP_ARCH_MIPSEL:
59 return "mips-le";
60 case SCMP_ARCH_MIPSEL64:
61 return "mips64-le";
62 case SCMP_ARCH_MIPSEL64N32:
63 return "mips64-le-n32";
64 case SCMP_ARCH_PPC:
65 return "ppc";
66 case SCMP_ARCH_PPC64:
67 return "ppc64";
68 case SCMP_ARCH_PPC64LE:
69 return "ppc64-le";
70 case SCMP_ARCH_S390:
6abfd303 71 return "s390";
aa34055f 72 case SCMP_ARCH_S390X:
6abfd303 73 return "s390x";
aa34055f
ZJS
74 default:
75 return NULL;
76 }
57183d11
LP
77}
78
79int seccomp_arch_from_string(const char *n, uint32_t *ret) {
80 if (!n)
81 return -EINVAL;
82
83 assert(ret);
84
85 if (streq(n, "native"))
86 *ret = SCMP_ARCH_NATIVE;
87 else if (streq(n, "x86"))
88 *ret = SCMP_ARCH_X86;
89 else if (streq(n, "x86-64"))
90 *ret = SCMP_ARCH_X86_64;
91 else if (streq(n, "x32"))
92 *ret = SCMP_ARCH_X32;
93 else if (streq(n, "arm"))
94 *ret = SCMP_ARCH_ARM;
aa34055f
ZJS
95 else if (streq(n, "arm64"))
96 *ret = SCMP_ARCH_AARCH64;
97 else if (streq(n, "mips"))
98 *ret = SCMP_ARCH_MIPS;
99 else if (streq(n, "mips64"))
100 *ret = SCMP_ARCH_MIPS64;
101 else if (streq(n, "mips64-n32"))
102 *ret = SCMP_ARCH_MIPS64N32;
103 else if (streq(n, "mips-le"))
104 *ret = SCMP_ARCH_MIPSEL;
105 else if (streq(n, "mips64-le"))
106 *ret = SCMP_ARCH_MIPSEL64;
107 else if (streq(n, "mips64-le-n32"))
108 *ret = SCMP_ARCH_MIPSEL64N32;
109 else if (streq(n, "ppc"))
110 *ret = SCMP_ARCH_PPC;
111 else if (streq(n, "ppc64"))
112 *ret = SCMP_ARCH_PPC64;
113 else if (streq(n, "ppc64-le"))
114 *ret = SCMP_ARCH_PPC64LE;
6abfd303
HB
115 else if (streq(n, "s390"))
116 *ret = SCMP_ARCH_S390;
117 else if (streq(n, "s390x"))
118 *ret = SCMP_ARCH_S390X;
57183d11
LP
119 else
120 return -EINVAL;
121
122 return 0;
123}
e9642be2 124
8d7b0c8f
LP
125int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action) {
126 scmp_filter_ctx seccomp;
127 int r;
128
129 /* Much like seccomp_init(), but tries to be a bit more conservative in its defaults: all secondary archs are
130 * added by default, and NNP is turned off. */
131
132 seccomp = seccomp_init(default_action);
133 if (!seccomp)
134 return -ENOMEM;
135
136 r = seccomp_add_secondary_archs(seccomp);
137 if (r < 0)
138 goto finish;
139
140 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
141 if (r < 0)
142 goto finish;
143
144 *ret = seccomp;
145 return 0;
146
147finish:
148 seccomp_release(seccomp);
149 return r;
150}
151
aa34055f 152int seccomp_add_secondary_archs(scmp_filter_ctx ctx) {
e9642be2
LP
153
154 /* Add in all possible secondary archs we are aware of that
155 * this kernel might support. */
156
aa34055f
ZJS
157 static const int seccomp_arches[] = {
158#if defined(__i386__) || defined(__x86_64__)
159 SCMP_ARCH_X86,
160 SCMP_ARCH_X86_64,
161 SCMP_ARCH_X32,
162
163#elif defined(__arm__) || defined(__aarch64__)
164 SCMP_ARCH_ARM,
165 SCMP_ARCH_AARCH64,
166
167#elif defined(__arm__) || defined(__aarch64__)
168 SCMP_ARCH_ARM,
169 SCMP_ARCH_AARCH64,
170
171#elif defined(__mips__) || defined(__mips64__)
172 SCMP_ARCH_MIPS,
173 SCMP_ARCH_MIPS64,
174 SCMP_ARCH_MIPS64N32,
175 SCMP_ARCH_MIPSEL,
176 SCMP_ARCH_MIPSEL64,
177 SCMP_ARCH_MIPSEL64N32,
178
179#elif defined(__powerpc__) || defined(__powerpc64__)
180 SCMP_ARCH_PPC,
181 SCMP_ARCH_PPC64,
182 SCMP_ARCH_PPC64LE,
e9642be2 183
6abfd303 184#elif defined(__s390__) || defined(__s390x__)
aa34055f
ZJS
185 SCMP_ARCH_S390,
186 SCMP_ARCH_S390X,
187#endif
188 };
6abfd303 189
aa34055f
ZJS
190 unsigned i;
191 int r;
6abfd303 192
aa34055f
ZJS
193 for (i = 0; i < ELEMENTSOF(seccomp_arches); i++) {
194 r = seccomp_arch_add(ctx, seccomp_arches[i]);
195 if (r < 0 && r != -EEXIST)
196 return r;
197 }
e9642be2
LP
198
199 return 0;
e9642be2 200}
201c1cc2 201
d347d902
FS
202static bool is_basic_seccomp_available(void) {
203 int r;
204 r = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
205 return r >= 0;
206}
207
208static bool is_seccomp_filter_available(void) {
209 int r;
210 r = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0);
211 return r < 0 && errno == EFAULT;
212}
213
83f12b27 214bool is_seccomp_available(void) {
83f12b27
FS
215 static int cached_enabled = -1;
216 if (cached_enabled < 0)
d347d902 217 cached_enabled = is_basic_seccomp_available() && is_seccomp_filter_available();
83f12b27
FS
218 return cached_enabled;
219}
220
8130926d 221const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
40eb6a80 222 [SYSCALL_FILTER_SET_DEFAULT] = {
40eb6a80 223 .name = "@default",
d5efc18b 224 .help = "System calls that are always permitted",
40eb6a80
ZJS
225 .value =
226 "clock_getres\0"
227 "clock_gettime\0"
228 "clock_nanosleep\0"
229 "execve\0"
230 "exit\0"
231 "exit_group\0"
232 "getrlimit\0" /* make sure processes can query stack size and such */
233 "gettimeofday\0"
234 "nanosleep\0"
235 "pause\0"
236 "rt_sigreturn\0"
237 "sigreturn\0"
238 "time\0"
239 },
133ddbbe 240 [SYSCALL_FILTER_SET_BASIC_IO] = {
133ddbbe 241 .name = "@basic-io",
d5efc18b 242 .help = "Basic IO",
133ddbbe
LP
243 .value =
244 "close\0"
245 "dup2\0"
246 "dup3\0"
247 "dup\0"
248 "lseek\0"
249 "pread64\0"
250 "preadv\0"
251 "pwrite64\0"
252 "pwritev\0"
253 "read\0"
254 "readv\0"
255 "write\0"
256 "writev\0"
257 },
8130926d 258 [SYSCALL_FILTER_SET_CLOCK] = {
8130926d 259 .name = "@clock",
d5efc18b 260 .help = "Change the system time",
201c1cc2
TM
261 .value =
262 "adjtimex\0"
1f9ac68b
LP
263 "clock_adjtime\0"
264 "clock_settime\0"
201c1cc2 265 "settimeofday\0"
1f9ac68b 266 "stime\0"
8130926d
LP
267 },
268 [SYSCALL_FILTER_SET_CPU_EMULATION] = {
8130926d 269 .name = "@cpu-emulation",
d5efc18b 270 .help = "System calls for CPU emulation functionality",
1f9ac68b
LP
271 .value =
272 "modify_ldt\0"
273 "subpage_prot\0"
274 "switch_endian\0"
275 "vm86\0"
276 "vm86old\0"
8130926d
LP
277 },
278 [SYSCALL_FILTER_SET_DEBUG] = {
8130926d 279 .name = "@debug",
d5efc18b 280 .help = "Debugging, performance monitoring and tracing functionality",
1f9ac68b
LP
281 .value =
282 "lookup_dcookie\0"
283 "perf_event_open\0"
284 "process_vm_readv\0"
285 "process_vm_writev\0"
286 "ptrace\0"
287 "rtas\0"
8130926d 288#ifdef __NR_s390_runtime_instr
1f9ac68b 289 "s390_runtime_instr\0"
8130926d 290#endif
1f9ac68b 291 "sys_debug_setcontext\0"
8130926d 292 },
1a1b13c9
LP
293 [SYSCALL_FILTER_SET_FILE_SYSTEM] = {
294 .name = "@file-system",
295 .help = "File system operations",
296 .value =
297 "access\0"
298 "chdir\0"
299 "chmod\0"
300 "close\0"
301 "creat\0"
302 "faccessat\0"
303 "fallocate\0"
304 "fchdir\0"
305 "fchmod\0"
306 "fchmodat\0"
307 "fcntl64\0"
308 "fcntl\0"
309 "fgetxattr\0"
310 "flistxattr\0"
311 "fsetxattr\0"
312 "fstat64\0"
313 "fstat\0"
314 "fstatat64\0"
315 "fstatfs64\0"
316 "fstatfs\0"
317 "ftruncate64\0"
318 "ftruncate\0"
319 "futimesat\0"
320 "getcwd\0"
321 "getdents64\0"
322 "getdents\0"
323 "getxattr\0"
324 "inotify_add_watch\0"
325 "inotify_init1\0"
326 "inotify_rm_watch\0"
327 "lgetxattr\0"
328 "link\0"
329 "linkat\0"
330 "listxattr\0"
331 "llistxattr\0"
332 "lremovexattr\0"
333 "lsetxattr\0"
334 "lstat64\0"
335 "lstat\0"
336 "mkdir\0"
337 "mkdirat\0"
338 "mknod\0"
339 "mknodat\0"
340 "mmap2\0"
341 "mmap\0"
342 "newfstatat\0"
343 "open\0"
344 "openat\0"
345 "readlink\0"
346 "readlinkat\0"
347 "removexattr\0"
348 "rename\0"
349 "renameat2\0"
350 "renameat\0"
351 "rmdir\0"
352 "setxattr\0"
353 "stat64\0"
354 "stat\0"
355 "statfs\0"
356 "symlink\0"
357 "symlinkat\0"
358 "truncate64\0"
359 "truncate\0"
360 "unlink\0"
361 "unlinkat\0"
362 "utimensat\0"
363 "utimes\0"
364 },
8130926d 365 [SYSCALL_FILTER_SET_IO_EVENT] = {
8130926d 366 .name = "@io-event",
d5efc18b 367 .help = "Event loop system calls",
201c1cc2
TM
368 .value =
369 "_newselect\0"
370 "epoll_create1\0"
371 "epoll_create\0"
372 "epoll_ctl\0"
373 "epoll_ctl_old\0"
374 "epoll_pwait\0"
375 "epoll_wait\0"
376 "epoll_wait_old\0"
377 "eventfd2\0"
378 "eventfd\0"
379 "poll\0"
380 "ppoll\0"
381 "pselect6\0"
382 "select\0"
8130926d
LP
383 },
384 [SYSCALL_FILTER_SET_IPC] = {
8130926d 385 .name = "@ipc",
d5efc18b
ZJS
386 .help = "SysV IPC, POSIX Message Queues or other IPC",
387 .value =
388 "ipc\0"
cd5bfd7e 389 "memfd_create\0"
201c1cc2
TM
390 "mq_getsetattr\0"
391 "mq_notify\0"
392 "mq_open\0"
393 "mq_timedreceive\0"
394 "mq_timedsend\0"
395 "mq_unlink\0"
396 "msgctl\0"
397 "msgget\0"
398 "msgrcv\0"
399 "msgsnd\0"
cd5bfd7e
LP
400 "pipe2\0"
401 "pipe\0"
201c1cc2
TM
402 "process_vm_readv\0"
403 "process_vm_writev\0"
404 "semctl\0"
405 "semget\0"
406 "semop\0"
407 "semtimedop\0"
408 "shmat\0"
409 "shmctl\0"
410 "shmdt\0"
411 "shmget\0"
8130926d
LP
412 },
413 [SYSCALL_FILTER_SET_KEYRING] = {
8130926d 414 .name = "@keyring",
d5efc18b 415 .help = "Kernel keyring access",
1f9ac68b
LP
416 .value =
417 "add_key\0"
418 "keyctl\0"
419 "request_key\0"
8130926d
LP
420 },
421 [SYSCALL_FILTER_SET_MODULE] = {
8130926d 422 .name = "@module",
d5efc18b 423 .help = "Loading and unloading of kernel modules",
201c1cc2 424 .value =
201c1cc2
TM
425 "delete_module\0"
426 "finit_module\0"
427 "init_module\0"
8130926d
LP
428 },
429 [SYSCALL_FILTER_SET_MOUNT] = {
8130926d 430 .name = "@mount",
d5efc18b 431 .help = "Mounting and unmounting of file systems",
201c1cc2
TM
432 .value =
433 "chroot\0"
434 "mount\0"
201c1cc2
TM
435 "pivot_root\0"
436 "umount2\0"
437 "umount\0"
8130926d
LP
438 },
439 [SYSCALL_FILTER_SET_NETWORK_IO] = {
8130926d 440 .name = "@network-io",
d5efc18b 441 .help = "Network or Unix socket IO, should not be needed if not network facing",
201c1cc2
TM
442 .value =
443 "accept4\0"
444 "accept\0"
445 "bind\0"
446 "connect\0"
447 "getpeername\0"
448 "getsockname\0"
449 "getsockopt\0"
450 "listen\0"
451 "recv\0"
452 "recvfrom\0"
453 "recvmmsg\0"
454 "recvmsg\0"
455 "send\0"
456 "sendmmsg\0"
457 "sendmsg\0"
458 "sendto\0"
459 "setsockopt\0"
460 "shutdown\0"
461 "socket\0"
462 "socketcall\0"
463 "socketpair\0"
8130926d
LP
464 },
465 [SYSCALL_FILTER_SET_OBSOLETE] = {
d5efc18b 466 /* some unknown even to libseccomp */
8130926d 467 .name = "@obsolete",
d5efc18b 468 .help = "Unusual, obsolete or unimplemented system calls",
201c1cc2
TM
469 .value =
470 "_sysctl\0"
471 "afs_syscall\0"
472 "break\0"
1f9ac68b 473 "create_module\0"
201c1cc2
TM
474 "ftime\0"
475 "get_kernel_syms\0"
201c1cc2
TM
476 "getpmsg\0"
477 "gtty\0"
201c1cc2 478 "lock\0"
201c1cc2 479 "mpx\0"
201c1cc2
TM
480 "prof\0"
481 "profil\0"
201c1cc2
TM
482 "putpmsg\0"
483 "query_module\0"
201c1cc2
TM
484 "security\0"
485 "sgetmask\0"
486 "ssetmask\0"
487 "stty\0"
1f9ac68b 488 "sysfs\0"
201c1cc2
TM
489 "tuxcall\0"
490 "ulimit\0"
491 "uselib\0"
1f9ac68b 492 "ustat\0"
201c1cc2 493 "vserver\0"
8130926d
LP
494 },
495 [SYSCALL_FILTER_SET_PRIVILEGED] = {
8130926d 496 .name = "@privileged",
d5efc18b 497 .help = "All system calls which need super-user capabilities",
201c1cc2
TM
498 .value =
499 "@clock\0"
500 "@module\0"
501 "@raw-io\0"
502 "acct\0"
503 "bdflush\0"
504 "bpf\0"
1f9ac68b 505 "capset\0"
201c1cc2
TM
506 "chown32\0"
507 "chown\0"
508 "chroot\0"
509 "fchown32\0"
510 "fchown\0"
511 "fchownat\0"
512 "kexec_file_load\0"
513 "kexec_load\0"
514 "lchown32\0"
515 "lchown\0"
516 "nfsservctl\0"
517 "pivot_root\0"
518 "quotactl\0"
519 "reboot\0"
520 "setdomainname\0"
521 "setfsuid32\0"
522 "setfsuid\0"
523 "setgroups32\0"
524 "setgroups\0"
525 "sethostname\0"
526 "setresuid32\0"
527 "setresuid\0"
528 "setreuid32\0"
529 "setreuid\0"
530 "setuid32\0"
531 "setuid\0"
201c1cc2
TM
532 "swapoff\0"
533 "swapon\0"
60f547cf 534 "_sysctl\0"
201c1cc2 535 "vhangup\0"
8130926d
LP
536 },
537 [SYSCALL_FILTER_SET_PROCESS] = {
8130926d 538 .name = "@process",
d5efc18b 539 .help = "Process control, execution, namespaceing operations",
201c1cc2
TM
540 .value =
541 "arch_prctl\0"
542 "clone\0"
201c1cc2
TM
543 "execveat\0"
544 "fork\0"
545 "kill\0"
546 "prctl\0"
547 "setns\0"
548 "tgkill\0"
549 "tkill\0"
550 "unshare\0"
551 "vfork\0"
8130926d
LP
552 },
553 [SYSCALL_FILTER_SET_RAW_IO] = {
8130926d 554 .name = "@raw-io",
d5efc18b 555 .help = "Raw I/O port access",
201c1cc2
TM
556 .value =
557 "ioperm\0"
558 "iopl\0"
1f9ac68b 559 "pciconfig_iobase\0"
201c1cc2
TM
560 "pciconfig_read\0"
561 "pciconfig_write\0"
8130926d 562#ifdef __NR_s390_pci_mmio_read
201c1cc2 563 "s390_pci_mmio_read\0"
8130926d
LP
564#endif
565#ifdef __NR_s390_pci_mmio_write
201c1cc2 566 "s390_pci_mmio_write\0"
8130926d
LP
567#endif
568 },
bd2ab3f4
LP
569 [SYSCALL_FILTER_SET_REBOOT] = {
570 .name = "@reboot",
571 .help = "Reboot and reboot preparation/kexec",
572 .value =
573 "kexec\0"
574 "kexec_file_load\0"
575 "reboot\0"
576 },
133ddbbe
LP
577 [SYSCALL_FILTER_SET_RESOURCES] = {
578 /* Alter resource settings */
579 .name = "@resources",
580 .value =
581 "sched_setparam\0"
582 "sched_setscheduler\0"
583 "sched_setaffinity\0"
584 "setpriority\0"
585 "setrlimit\0"
586 "set_mempolicy\0"
587 "migrate_pages\0"
588 "move_pages\0"
589 "mbind\0"
590 "sched_setattr\0"
591 "prlimit64\0"
592 },
bd2ab3f4
LP
593 [SYSCALL_FILTER_SET_SWAP] = {
594 .name = "@swap",
595 .help = "Enable/disable swap devices",
596 .value =
597 "swapoff\0"
598 "swapon\0"
599 },
201c1cc2 600};
8130926d
LP
601
602const SyscallFilterSet *syscall_filter_set_find(const char *name) {
603 unsigned i;
604
605 if (isempty(name) || name[0] != '@')
606 return NULL;
607
608 for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
609 if (streq(syscall_filter_sets[i].name, name))
610 return syscall_filter_sets + i;
611
612 return NULL;
613}
614
615int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) {
616 const char *sys;
617 int r;
618
619 assert(seccomp);
620 assert(set);
621
622 NULSTR_FOREACH(sys, set->value) {
623 int id;
624
625 if (sys[0] == '@') {
626 const SyscallFilterSet *other;
627
628 other = syscall_filter_set_find(sys);
629 if (!other)
630 return -EINVAL;
631
632 r = seccomp_add_syscall_filter_set(seccomp, other, action);
633 } else {
634 id = seccomp_syscall_resolve_name(sys);
635 if (id == __NR_SCMP_ERROR)
636 return -EINVAL;
637
638 r = seccomp_rule_add(seccomp, action, id, 0);
639 }
640 if (r < 0)
641 return r;
642 }
643
644 return 0;
645}
a3be2849
LP
646
647int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) {
648 scmp_filter_ctx seccomp;
649 int r;
650
651 assert(set);
652
653 /* The one-stop solution: allocate a seccomp object, add a filter to it, and apply it */
654
655 r = seccomp_init_conservative(&seccomp, default_action);
656 if (r < 0)
657 return r;
658
659 r = seccomp_add_syscall_filter_set(seccomp, set, action);
660 if (r < 0)
661 goto finish;
662
663 r = seccomp_load(seccomp);
664
665finish:
666 seccomp_release(seccomp);
667 return r;
add00535
LP
668}
669
670int seccomp_restrict_namespaces(unsigned long retain) {
671 scmp_filter_ctx seccomp;
672 unsigned i;
673 int r;
674
675 if (log_get_max_level() >= LOG_DEBUG) {
676 _cleanup_free_ char *s = NULL;
677
678 (void) namespace_flag_to_string_many(retain, &s);
679 log_debug("Restricting namespace to: %s.", strna(s));
680 }
681
682 /* NOOP? */
683 if ((retain & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL)
684 return 0;
685
686 r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
687 if (r < 0)
688 return r;
689
690 if ((retain & NAMESPACE_FLAGS_ALL) == 0)
691 /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
692 * altogether. */
693 r = seccomp_rule_add(
694 seccomp,
695 SCMP_ACT_ERRNO(EPERM),
696 SCMP_SYS(setns),
697 0);
698 else
699 /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the
700 * special invocation with a zero flags argument, right here. */
701 r = seccomp_rule_add(
702 seccomp,
703 SCMP_ACT_ERRNO(EPERM),
704 SCMP_SYS(setns),
705 1,
706 SCMP_A1(SCMP_CMP_EQ, 0));
707 if (r < 0)
708 goto finish;
709
710 for (i = 0; namespace_flag_map[i].name; i++) {
711 unsigned long f;
712
713 f = namespace_flag_map[i].flag;
714 if ((retain & f) == f) {
715 log_debug("Permitting %s.", namespace_flag_map[i].name);
716 continue;
717 }
a3be2849 718
add00535
LP
719 log_debug("Blocking %s.", namespace_flag_map[i].name);
720
721 r = seccomp_rule_add(
722 seccomp,
723 SCMP_ACT_ERRNO(EPERM),
724 SCMP_SYS(unshare),
725 1,
726 SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
727 if (r < 0)
728 goto finish;
729
730 r = seccomp_rule_add(
731 seccomp,
732 SCMP_ACT_ERRNO(EPERM),
733 SCMP_SYS(clone),
734 1,
735 SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
736 if (r < 0)
737 goto finish;
738
739 if ((retain & NAMESPACE_FLAGS_ALL) != 0) {
740 r = seccomp_rule_add(
741 seccomp,
742 SCMP_ACT_ERRNO(EPERM),
743 SCMP_SYS(setns),
744 1,
745 SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
746 if (r < 0)
747 goto finish;
748 }
749 }
750
751 r = seccomp_load(seccomp);
752
753finish:
754 seccomp_release(seccomp);
755 return r;
a3be2849 756}