]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/seccomp-util.c
Merge pull request #6735 from yuwata/multiple-capability-lines
[thirdparty/systemd.git] / src / shared / seccomp-util.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2014 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <linux/seccomp.h>
22 #include <seccomp.h>
23 #include <stddef.h>
24 #include <sys/mman.h>
25 #include <sys/prctl.h>
26 #include <sys/shm.h>
27
28 #include "af-list.h"
29 #include "alloc-util.h"
30 #include "macro.h"
31 #include "nsflags.h"
32 #include "process-util.h"
33 #include "seccomp-util.h"
34 #include "set.h"
35 #include "string-util.h"
36 #include "strv.h"
37 #include "util.h"
38 #include "errno-list.h"
39
40 const uint32_t seccomp_local_archs[] = {
41
42 /* Note: always list the native arch we are compiled as last, so that users can blacklist seccomp(), but our own calls to it still succeed */
43
44 #if defined(__x86_64__) && defined(__ILP32__)
45 SCMP_ARCH_X86,
46 SCMP_ARCH_X86_64,
47 SCMP_ARCH_X32, /* native */
48 #elif defined(__x86_64__) && !defined(__ILP32__)
49 SCMP_ARCH_X86,
50 SCMP_ARCH_X32,
51 SCMP_ARCH_X86_64, /* native */
52 #elif defined(__i386__)
53 SCMP_ARCH_X86,
54 #elif defined(__aarch64__)
55 SCMP_ARCH_ARM,
56 SCMP_ARCH_AARCH64, /* native */
57 #elif defined(__arm__)
58 SCMP_ARCH_ARM,
59 #elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
60 SCMP_ARCH_MIPSEL,
61 SCMP_ARCH_MIPS, /* native */
62 #elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
63 SCMP_ARCH_MIPS,
64 SCMP_ARCH_MIPSEL, /* native */
65 #elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
66 SCMP_ARCH_MIPSEL,
67 SCMP_ARCH_MIPS,
68 SCMP_ARCH_MIPSEL64N32,
69 SCMP_ARCH_MIPS64N32,
70 SCMP_ARCH_MIPSEL64,
71 SCMP_ARCH_MIPS64, /* native */
72 #elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
73 SCMP_ARCH_MIPS,
74 SCMP_ARCH_MIPSEL,
75 SCMP_ARCH_MIPS64N32,
76 SCMP_ARCH_MIPSEL64N32,
77 SCMP_ARCH_MIPS64,
78 SCMP_ARCH_MIPSEL64, /* native */
79 #elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
80 SCMP_ARCH_MIPSEL,
81 SCMP_ARCH_MIPS,
82 SCMP_ARCH_MIPSEL64,
83 SCMP_ARCH_MIPS64,
84 SCMP_ARCH_MIPSEL64N32,
85 SCMP_ARCH_MIPS64N32, /* native */
86 #elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
87 SCMP_ARCH_MIPS,
88 SCMP_ARCH_MIPSEL,
89 SCMP_ARCH_MIPS64,
90 SCMP_ARCH_MIPSEL64,
91 SCMP_ARCH_MIPS64N32,
92 SCMP_ARCH_MIPSEL64N32, /* native */
93 #elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN
94 SCMP_ARCH_PPC,
95 SCMP_ARCH_PPC64LE,
96 SCMP_ARCH_PPC64, /* native */
97 #elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN
98 SCMP_ARCH_PPC,
99 SCMP_ARCH_PPC64,
100 SCMP_ARCH_PPC64LE, /* native */
101 #elif defined(__powerpc__)
102 SCMP_ARCH_PPC,
103 #elif defined(__s390x__)
104 SCMP_ARCH_S390,
105 SCMP_ARCH_S390X, /* native */
106 #elif defined(__s390__)
107 SCMP_ARCH_S390,
108 #endif
109 (uint32_t) -1
110 };
111
112 const char* seccomp_arch_to_string(uint32_t c) {
113 /* Maintain order used in <seccomp.h>.
114 *
115 * Names used here should be the same as those used for ConditionArchitecture=,
116 * except for "subarchitectures" like x32. */
117
118 switch(c) {
119 case SCMP_ARCH_NATIVE:
120 return "native";
121 case SCMP_ARCH_X86:
122 return "x86";
123 case SCMP_ARCH_X86_64:
124 return "x86-64";
125 case SCMP_ARCH_X32:
126 return "x32";
127 case SCMP_ARCH_ARM:
128 return "arm";
129 case SCMP_ARCH_AARCH64:
130 return "arm64";
131 case SCMP_ARCH_MIPS:
132 return "mips";
133 case SCMP_ARCH_MIPS64:
134 return "mips64";
135 case SCMP_ARCH_MIPS64N32:
136 return "mips64-n32";
137 case SCMP_ARCH_MIPSEL:
138 return "mips-le";
139 case SCMP_ARCH_MIPSEL64:
140 return "mips64-le";
141 case SCMP_ARCH_MIPSEL64N32:
142 return "mips64-le-n32";
143 case SCMP_ARCH_PPC:
144 return "ppc";
145 case SCMP_ARCH_PPC64:
146 return "ppc64";
147 case SCMP_ARCH_PPC64LE:
148 return "ppc64-le";
149 case SCMP_ARCH_S390:
150 return "s390";
151 case SCMP_ARCH_S390X:
152 return "s390x";
153 default:
154 return NULL;
155 }
156 }
157
158 int seccomp_arch_from_string(const char *n, uint32_t *ret) {
159 if (!n)
160 return -EINVAL;
161
162 assert(ret);
163
164 if (streq(n, "native"))
165 *ret = SCMP_ARCH_NATIVE;
166 else if (streq(n, "x86"))
167 *ret = SCMP_ARCH_X86;
168 else if (streq(n, "x86-64"))
169 *ret = SCMP_ARCH_X86_64;
170 else if (streq(n, "x32"))
171 *ret = SCMP_ARCH_X32;
172 else if (streq(n, "arm"))
173 *ret = SCMP_ARCH_ARM;
174 else if (streq(n, "arm64"))
175 *ret = SCMP_ARCH_AARCH64;
176 else if (streq(n, "mips"))
177 *ret = SCMP_ARCH_MIPS;
178 else if (streq(n, "mips64"))
179 *ret = SCMP_ARCH_MIPS64;
180 else if (streq(n, "mips64-n32"))
181 *ret = SCMP_ARCH_MIPS64N32;
182 else if (streq(n, "mips-le"))
183 *ret = SCMP_ARCH_MIPSEL;
184 else if (streq(n, "mips64-le"))
185 *ret = SCMP_ARCH_MIPSEL64;
186 else if (streq(n, "mips64-le-n32"))
187 *ret = SCMP_ARCH_MIPSEL64N32;
188 else if (streq(n, "ppc"))
189 *ret = SCMP_ARCH_PPC;
190 else if (streq(n, "ppc64"))
191 *ret = SCMP_ARCH_PPC64;
192 else if (streq(n, "ppc64-le"))
193 *ret = SCMP_ARCH_PPC64LE;
194 else if (streq(n, "s390"))
195 *ret = SCMP_ARCH_S390;
196 else if (streq(n, "s390x"))
197 *ret = SCMP_ARCH_S390X;
198 else
199 return -EINVAL;
200
201 return 0;
202 }
203
204 int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) {
205 scmp_filter_ctx seccomp;
206 int r;
207
208 /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting
209 * any others. Also, turns off the NNP fiddling. */
210
211 seccomp = seccomp_init(default_action);
212 if (!seccomp)
213 return -ENOMEM;
214
215 if (arch != SCMP_ARCH_NATIVE &&
216 arch != seccomp_arch_native()) {
217
218 r = seccomp_arch_remove(seccomp, seccomp_arch_native());
219 if (r < 0)
220 goto finish;
221
222 r = seccomp_arch_add(seccomp, arch);
223 if (r < 0)
224 goto finish;
225
226 assert(seccomp_arch_exist(seccomp, arch) >= 0);
227 assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST);
228 assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST);
229 } else {
230 assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0);
231 assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0);
232 }
233
234 r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW);
235 if (r < 0)
236 goto finish;
237
238 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
239 if (r < 0)
240 goto finish;
241
242 *ret = seccomp;
243 return 0;
244
245 finish:
246 seccomp_release(seccomp);
247 return r;
248 }
249
250 static bool is_basic_seccomp_available(void) {
251 return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0;
252 }
253
254 static bool is_seccomp_filter_available(void) {
255 return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 &&
256 errno == EFAULT;
257 }
258
259 bool is_seccomp_available(void) {
260 static int cached_enabled = -1;
261
262 if (cached_enabled < 0)
263 cached_enabled =
264 is_basic_seccomp_available() &&
265 is_seccomp_filter_available();
266
267 return cached_enabled;
268 }
269
270 const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
271 [SYSCALL_FILTER_SET_DEFAULT] = {
272 .name = "@default",
273 .help = "System calls that are always permitted",
274 .value =
275 "clock_getres\0"
276 "clock_gettime\0"
277 "clock_nanosleep\0"
278 "execve\0"
279 "exit\0"
280 "exit_group\0"
281 "getrlimit\0" /* make sure processes can query stack size and such */
282 "gettimeofday\0"
283 "nanosleep\0"
284 "pause\0"
285 "rt_sigreturn\0"
286 "sigreturn\0"
287 "time\0"
288 },
289 [SYSCALL_FILTER_SET_BASIC_IO] = {
290 .name = "@basic-io",
291 .help = "Basic IO",
292 .value =
293 "close\0"
294 "dup2\0"
295 "dup3\0"
296 "dup\0"
297 "lseek\0"
298 "pread64\0"
299 "preadv\0"
300 "pwrite64\0"
301 "pwritev\0"
302 "read\0"
303 "readv\0"
304 "write\0"
305 "writev\0"
306 },
307 [SYSCALL_FILTER_SET_CLOCK] = {
308 .name = "@clock",
309 .help = "Change the system time",
310 .value =
311 "adjtimex\0"
312 "clock_adjtime\0"
313 "clock_settime\0"
314 "settimeofday\0"
315 "stime\0"
316 },
317 [SYSCALL_FILTER_SET_CPU_EMULATION] = {
318 .name = "@cpu-emulation",
319 .help = "System calls for CPU emulation functionality",
320 .value =
321 "modify_ldt\0"
322 "subpage_prot\0"
323 "switch_endian\0"
324 "vm86\0"
325 "vm86old\0"
326 },
327 [SYSCALL_FILTER_SET_DEBUG] = {
328 .name = "@debug",
329 .help = "Debugging, performance monitoring and tracing functionality",
330 .value =
331 "lookup_dcookie\0"
332 "perf_event_open\0"
333 "process_vm_readv\0"
334 "process_vm_writev\0"
335 "ptrace\0"
336 "rtas\0"
337 #ifdef __NR_s390_runtime_instr
338 "s390_runtime_instr\0"
339 #endif
340 "sys_debug_setcontext\0"
341 },
342 [SYSCALL_FILTER_SET_FILE_SYSTEM] = {
343 .name = "@file-system",
344 .help = "File system operations",
345 .value =
346 "access\0"
347 "chdir\0"
348 "chmod\0"
349 "close\0"
350 "creat\0"
351 "faccessat\0"
352 "fallocate\0"
353 "fchdir\0"
354 "fchmod\0"
355 "fchmodat\0"
356 "fcntl64\0"
357 "fcntl\0"
358 "fgetxattr\0"
359 "flistxattr\0"
360 "fsetxattr\0"
361 "fstat64\0"
362 "fstat\0"
363 "fstatat64\0"
364 "fstatfs64\0"
365 "fstatfs\0"
366 "ftruncate64\0"
367 "ftruncate\0"
368 "futimesat\0"
369 "getcwd\0"
370 "getdents64\0"
371 "getdents\0"
372 "getxattr\0"
373 "inotify_add_watch\0"
374 "inotify_init1\0"
375 "inotify_rm_watch\0"
376 "lgetxattr\0"
377 "link\0"
378 "linkat\0"
379 "listxattr\0"
380 "llistxattr\0"
381 "lremovexattr\0"
382 "lsetxattr\0"
383 "lstat64\0"
384 "lstat\0"
385 "mkdir\0"
386 "mkdirat\0"
387 "mknod\0"
388 "mknodat\0"
389 "mmap2\0"
390 "mmap\0"
391 "munmap\0"
392 "newfstatat\0"
393 "open\0"
394 "openat\0"
395 "readlink\0"
396 "readlinkat\0"
397 "removexattr\0"
398 "rename\0"
399 "renameat2\0"
400 "renameat\0"
401 "rmdir\0"
402 "setxattr\0"
403 "stat64\0"
404 "stat\0"
405 "statfs\0"
406 "statx\0"
407 "symlink\0"
408 "symlinkat\0"
409 "truncate64\0"
410 "truncate\0"
411 "unlink\0"
412 "unlinkat\0"
413 "utimensat\0"
414 "utimes\0"
415 },
416 [SYSCALL_FILTER_SET_IO_EVENT] = {
417 .name = "@io-event",
418 .help = "Event loop system calls",
419 .value =
420 "_newselect\0"
421 "epoll_create1\0"
422 "epoll_create\0"
423 "epoll_ctl\0"
424 "epoll_ctl_old\0"
425 "epoll_pwait\0"
426 "epoll_wait\0"
427 "epoll_wait_old\0"
428 "eventfd2\0"
429 "eventfd\0"
430 "poll\0"
431 "ppoll\0"
432 "pselect6\0"
433 "select\0"
434 },
435 [SYSCALL_FILTER_SET_IPC] = {
436 .name = "@ipc",
437 .help = "SysV IPC, POSIX Message Queues or other IPC",
438 .value =
439 "ipc\0"
440 "memfd_create\0"
441 "mq_getsetattr\0"
442 "mq_notify\0"
443 "mq_open\0"
444 "mq_timedreceive\0"
445 "mq_timedsend\0"
446 "mq_unlink\0"
447 "msgctl\0"
448 "msgget\0"
449 "msgrcv\0"
450 "msgsnd\0"
451 "pipe2\0"
452 "pipe\0"
453 "process_vm_readv\0"
454 "process_vm_writev\0"
455 "semctl\0"
456 "semget\0"
457 "semop\0"
458 "semtimedop\0"
459 "shmat\0"
460 "shmctl\0"
461 "shmdt\0"
462 "shmget\0"
463 },
464 [SYSCALL_FILTER_SET_KEYRING] = {
465 .name = "@keyring",
466 .help = "Kernel keyring access",
467 .value =
468 "add_key\0"
469 "keyctl\0"
470 "request_key\0"
471 },
472 [SYSCALL_FILTER_SET_MODULE] = {
473 .name = "@module",
474 .help = "Loading and unloading of kernel modules",
475 .value =
476 "delete_module\0"
477 "finit_module\0"
478 "init_module\0"
479 },
480 [SYSCALL_FILTER_SET_MOUNT] = {
481 .name = "@mount",
482 .help = "Mounting and unmounting of file systems",
483 .value =
484 "chroot\0"
485 "mount\0"
486 "pivot_root\0"
487 "umount2\0"
488 "umount\0"
489 },
490 [SYSCALL_FILTER_SET_NETWORK_IO] = {
491 .name = "@network-io",
492 .help = "Network or Unix socket IO, should not be needed if not network facing",
493 .value =
494 "accept4\0"
495 "accept\0"
496 "bind\0"
497 "connect\0"
498 "getpeername\0"
499 "getsockname\0"
500 "getsockopt\0"
501 "listen\0"
502 "recv\0"
503 "recvfrom\0"
504 "recvmmsg\0"
505 "recvmsg\0"
506 "send\0"
507 "sendmmsg\0"
508 "sendmsg\0"
509 "sendto\0"
510 "setsockopt\0"
511 "shutdown\0"
512 "socket\0"
513 "socketcall\0"
514 "socketpair\0"
515 },
516 [SYSCALL_FILTER_SET_OBSOLETE] = {
517 /* some unknown even to libseccomp */
518 .name = "@obsolete",
519 .help = "Unusual, obsolete or unimplemented system calls",
520 .value =
521 "_sysctl\0"
522 "afs_syscall\0"
523 "bdflush\0"
524 "break\0"
525 "create_module\0"
526 "ftime\0"
527 "get_kernel_syms\0"
528 "getpmsg\0"
529 "gtty\0"
530 "lock\0"
531 "mpx\0"
532 "prof\0"
533 "profil\0"
534 "putpmsg\0"
535 "query_module\0"
536 "security\0"
537 "sgetmask\0"
538 "ssetmask\0"
539 "stty\0"
540 "sysfs\0"
541 "tuxcall\0"
542 "ulimit\0"
543 "uselib\0"
544 "ustat\0"
545 "vserver\0"
546 },
547 [SYSCALL_FILTER_SET_PRIVILEGED] = {
548 .name = "@privileged",
549 .help = "All system calls which need super-user capabilities",
550 .value =
551 "@clock\0"
552 "@module\0"
553 "@raw-io\0"
554 "acct\0"
555 "bpf\0"
556 "capset\0"
557 "chown32\0"
558 "chown\0"
559 "chroot\0"
560 "fchown32\0"
561 "fchown\0"
562 "fchownat\0"
563 "kexec_file_load\0"
564 "kexec_load\0"
565 "lchown32\0"
566 "lchown\0"
567 "nfsservctl\0"
568 "pivot_root\0"
569 "quotactl\0"
570 "reboot\0"
571 "setdomainname\0"
572 "setfsuid32\0"
573 "setfsuid\0"
574 "setgroups32\0"
575 "setgroups\0"
576 "sethostname\0"
577 "setresuid32\0"
578 "setresuid\0"
579 "setreuid32\0"
580 "setreuid\0"
581 "setuid32\0"
582 "setuid\0"
583 "swapoff\0"
584 "swapon\0"
585 "_sysctl\0"
586 "vhangup\0"
587 },
588 [SYSCALL_FILTER_SET_PROCESS] = {
589 .name = "@process",
590 .help = "Process control, execution, namespaceing operations",
591 .value =
592 "arch_prctl\0"
593 "clone\0"
594 "execveat\0"
595 "fork\0"
596 "kill\0"
597 "prctl\0"
598 "setns\0"
599 "tgkill\0"
600 "tkill\0"
601 "unshare\0"
602 "vfork\0"
603 },
604 [SYSCALL_FILTER_SET_RAW_IO] = {
605 .name = "@raw-io",
606 .help = "Raw I/O port access",
607 .value =
608 "ioperm\0"
609 "iopl\0"
610 "pciconfig_iobase\0"
611 "pciconfig_read\0"
612 "pciconfig_write\0"
613 #ifdef __NR_s390_pci_mmio_read
614 "s390_pci_mmio_read\0"
615 #endif
616 #ifdef __NR_s390_pci_mmio_write
617 "s390_pci_mmio_write\0"
618 #endif
619 },
620 [SYSCALL_FILTER_SET_REBOOT] = {
621 .name = "@reboot",
622 .help = "Reboot and reboot preparation/kexec",
623 .value =
624 "kexec\0"
625 "kexec_file_load\0"
626 "reboot\0"
627 },
628 [SYSCALL_FILTER_SET_RESOURCES] = {
629 .name = "@resources",
630 .help = "Alter resource settings",
631 .value =
632 "sched_setparam\0"
633 "sched_setscheduler\0"
634 "sched_setaffinity\0"
635 "setpriority\0"
636 "setrlimit\0"
637 "set_mempolicy\0"
638 "migrate_pages\0"
639 "move_pages\0"
640 "mbind\0"
641 "sched_setattr\0"
642 "prlimit64\0"
643 },
644 [SYSCALL_FILTER_SET_SETUID] = {
645 .name = "@setuid",
646 .help = "Operations for changing user/group credentials",
647 .value =
648 "setgid32\0"
649 "setgid\0"
650 "setgroups32\0"
651 "setgroups\0"
652 "setregid32\0"
653 "setregid\0"
654 "setresgid32\0"
655 "setresgid\0"
656 "setresuid32\0"
657 "setresuid\0"
658 "setreuid32\0"
659 "setreuid\0"
660 "setuid32\0"
661 "setuid\0"
662 },
663 [SYSCALL_FILTER_SET_SWAP] = {
664 .name = "@swap",
665 .help = "Enable/disable swap devices",
666 .value =
667 "swapoff\0"
668 "swapon\0"
669 },
670 };
671
672 const SyscallFilterSet *syscall_filter_set_find(const char *name) {
673 unsigned i;
674
675 if (isempty(name) || name[0] != '@')
676 return NULL;
677
678 for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
679 if (streq(syscall_filter_sets[i].name, name))
680 return syscall_filter_sets + i;
681
682 return NULL;
683 }
684
685 static int seccomp_add_syscall_filter_set(
686 scmp_filter_ctx seccomp,
687 uint32_t default_action,
688 const SyscallFilterSet *set,
689 uint32_t action) {
690
691 const char *sys;
692 int r;
693
694 assert(seccomp);
695 assert(set);
696
697 NULSTR_FOREACH(sys, set->value) {
698 int id;
699
700 if (sys[0] == '@') {
701 const SyscallFilterSet *other;
702
703 other = syscall_filter_set_find(sys);
704 if (!other)
705 return -EINVAL;
706
707 r = seccomp_add_syscall_filter_set(seccomp, default_action, other, action);
708 if (r < 0)
709 return r;
710 } else {
711 id = seccomp_syscall_resolve_name(sys);
712 if (id == __NR_SCMP_ERROR)
713 return -EINVAL; /* Not known at all? Then that's a real error */
714
715 r = seccomp_rule_add_exact(seccomp, action, id, 0);
716 if (r < 0)
717 /* If the system call is not known on this architecture, then that's fine, let's ignore it */
718 log_debug_errno(r, "Failed to add rule for system call %s() / %d, ignoring: %m", sys, id);
719 }
720 }
721
722 return 0;
723 }
724
725 int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) {
726 uint32_t arch;
727 int r;
728
729 assert(set);
730
731 /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for
732 * earch local arch. */
733
734 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
735 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
736
737 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
738
739 r = seccomp_init_for_arch(&seccomp, arch, default_action);
740 if (r < 0)
741 return r;
742
743 r = seccomp_add_syscall_filter_set(seccomp, default_action, set, action);
744 if (r < 0) {
745 log_debug_errno(r, "Failed to add filter set, ignoring: %m");
746 continue;
747 }
748
749 r = seccomp_load(seccomp);
750 if (IN_SET(r, -EPERM, -EACCES))
751 return r;
752 if (r < 0)
753 log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
754 }
755
756 return 0;
757 }
758
759 int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Set* set, uint32_t action) {
760 uint32_t arch;
761 int r;
762
763 /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a
764 * SyscallFilterSet* table. */
765
766 if (set_isempty(set) && default_action == SCMP_ACT_ALLOW)
767 return 0;
768
769 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
770 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
771 Iterator i;
772 void *id;
773
774 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
775
776 r = seccomp_init_for_arch(&seccomp, arch, default_action);
777 if (r < 0)
778 return r;
779
780 SET_FOREACH(id, set, i) {
781 r = seccomp_rule_add_exact(seccomp, action, PTR_TO_INT(id) - 1, 0);
782 if (r < 0) {
783 /* If the system call is not known on this architecture, then that's fine, let's ignore it */
784 _cleanup_free_ char *n = NULL;
785
786 n = seccomp_syscall_resolve_num_arch(arch, PTR_TO_INT(id) - 1);
787 log_debug_errno(r, "Failed to add rule for system call %s() / %d, ignoring: %m", strna(n), PTR_TO_INT(id) - 1);
788 }
789 }
790
791 r = seccomp_load(seccomp);
792 if (IN_SET(r, -EPERM, -EACCES))
793 return r;
794 if (r < 0)
795 log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
796 }
797
798 return 0;
799 }
800
801 int seccomp_restrict_namespaces(unsigned long retain) {
802 uint32_t arch;
803 int r;
804
805 if (log_get_max_level() >= LOG_DEBUG) {
806 _cleanup_free_ char *s = NULL;
807
808 (void) namespace_flag_to_string_many(retain, &s);
809 log_debug("Restricting namespace to: %s.", strna(s));
810 }
811
812 /* NOOP? */
813 if ((retain & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL)
814 return 0;
815
816 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
817 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
818 unsigned i;
819
820 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
821
822 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
823 if (r < 0)
824 return r;
825
826 if ((retain & NAMESPACE_FLAGS_ALL) == 0)
827 /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
828 * altogether. */
829 r = seccomp_rule_add_exact(
830 seccomp,
831 SCMP_ACT_ERRNO(EPERM),
832 SCMP_SYS(setns),
833 0);
834 else
835 /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the
836 * special invocation with a zero flags argument, right here. */
837 r = seccomp_rule_add_exact(
838 seccomp,
839 SCMP_ACT_ERRNO(EPERM),
840 SCMP_SYS(setns),
841 1,
842 SCMP_A1(SCMP_CMP_EQ, 0));
843 if (r < 0) {
844 log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
845 continue;
846 }
847
848 for (i = 0; namespace_flag_map[i].name; i++) {
849 unsigned long f;
850
851 f = namespace_flag_map[i].flag;
852 if ((retain & f) == f) {
853 log_debug("Permitting %s.", namespace_flag_map[i].name);
854 continue;
855 }
856
857 log_debug("Blocking %s.", namespace_flag_map[i].name);
858
859 r = seccomp_rule_add_exact(
860 seccomp,
861 SCMP_ACT_ERRNO(EPERM),
862 SCMP_SYS(unshare),
863 1,
864 SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
865 if (r < 0) {
866 log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
867 break;
868 }
869
870 /* On s390/s390x the first two parameters to clone are switched */
871 if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
872 r = seccomp_rule_add_exact(
873 seccomp,
874 SCMP_ACT_ERRNO(EPERM),
875 SCMP_SYS(clone),
876 1,
877 SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
878 else
879 r = seccomp_rule_add_exact(
880 seccomp,
881 SCMP_ACT_ERRNO(EPERM),
882 SCMP_SYS(clone),
883 1,
884 SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
885 if (r < 0) {
886 log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
887 break;
888 }
889
890 if ((retain & NAMESPACE_FLAGS_ALL) != 0) {
891 r = seccomp_rule_add_exact(
892 seccomp,
893 SCMP_ACT_ERRNO(EPERM),
894 SCMP_SYS(setns),
895 1,
896 SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
897 if (r < 0) {
898 log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
899 break;
900 }
901 }
902 }
903 if (r < 0)
904 continue;
905
906 r = seccomp_load(seccomp);
907 if (IN_SET(r, -EPERM, -EACCES))
908 return r;
909 if (r < 0)
910 log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
911 }
912
913 return 0;
914 }
915
916 int seccomp_protect_sysctl(void) {
917 uint32_t arch;
918 int r;
919
920 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
921 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
922
923 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
924
925 if (IN_SET(arch, SCMP_ARCH_X32, SCMP_ARCH_AARCH64))
926 /* No _sysctl syscall */
927 continue;
928
929 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
930 if (r < 0)
931 return r;
932
933 r = seccomp_rule_add_exact(
934 seccomp,
935 SCMP_ACT_ERRNO(EPERM),
936 SCMP_SYS(_sysctl),
937 0);
938 if (r < 0) {
939 log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
940 continue;
941 }
942
943 r = seccomp_load(seccomp);
944 if (IN_SET(r, -EPERM, -EACCES))
945 return r;
946 if (r < 0)
947 log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
948 }
949
950 return 0;
951 }
952
953 int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
954 uint32_t arch;
955 int r;
956
957 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
958 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
959 bool supported;
960 Iterator i;
961
962 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
963
964 switch (arch) {
965
966 case SCMP_ARCH_X86_64:
967 case SCMP_ARCH_X32:
968 case SCMP_ARCH_ARM:
969 case SCMP_ARCH_AARCH64:
970 case SCMP_ARCH_PPC64:
971 case SCMP_ARCH_PPC64LE:
972 /* These we know we support (i.e. are the ones that do not use socketcall()) */
973 supported = true;
974 break;
975
976 case SCMP_ARCH_S390:
977 case SCMP_ARCH_S390X:
978 case SCMP_ARCH_PPC:
979 case SCMP_ARCH_X86:
980 default:
981 /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
982 * don't know */
983 supported = false;
984 break;
985 }
986
987 if (!supported)
988 continue;
989
990 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
991 if (r < 0)
992 return r;
993
994 if (whitelist) {
995 int af, first = 0, last = 0;
996 void *afp;
997
998 /* If this is a whitelist, we first block the address families that are out of range and then
999 * everything that is not in the set. First, we find the lowest and highest address family in
1000 * the set. */
1001
1002 SET_FOREACH(afp, address_families, i) {
1003 af = PTR_TO_INT(afp);
1004
1005 if (af <= 0 || af >= af_max())
1006 continue;
1007
1008 if (first == 0 || af < first)
1009 first = af;
1010
1011 if (last == 0 || af > last)
1012 last = af;
1013 }
1014
1015 assert((first == 0) == (last == 0));
1016
1017 if (first == 0) {
1018
1019 /* No entries in the valid range, block everything */
1020 r = seccomp_rule_add_exact(
1021 seccomp,
1022 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1023 SCMP_SYS(socket),
1024 0);
1025 if (r < 0) {
1026 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1027 continue;
1028 }
1029
1030 } else {
1031
1032 /* Block everything below the first entry */
1033 r = seccomp_rule_add_exact(
1034 seccomp,
1035 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1036 SCMP_SYS(socket),
1037 1,
1038 SCMP_A0(SCMP_CMP_LT, first));
1039 if (r < 0) {
1040 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1041 continue;
1042 }
1043
1044 /* Block everything above the last entry */
1045 r = seccomp_rule_add_exact(
1046 seccomp,
1047 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1048 SCMP_SYS(socket),
1049 1,
1050 SCMP_A0(SCMP_CMP_GT, last));
1051 if (r < 0) {
1052 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1053 continue;
1054 }
1055
1056 /* Block everything between the first and last entry */
1057 for (af = 1; af < af_max(); af++) {
1058
1059 if (set_contains(address_families, INT_TO_PTR(af)))
1060 continue;
1061
1062 r = seccomp_rule_add_exact(
1063 seccomp,
1064 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1065 SCMP_SYS(socket),
1066 1,
1067 SCMP_A0(SCMP_CMP_EQ, af));
1068 if (r < 0)
1069 break;
1070 }
1071
1072 if (r < 0) {
1073 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1074 continue;
1075 }
1076 }
1077
1078 } else {
1079 void *af;
1080
1081 /* If this is a blacklist, then generate one rule for
1082 * each address family that are then combined in OR
1083 * checks. */
1084
1085 SET_FOREACH(af, address_families, i) {
1086
1087 r = seccomp_rule_add_exact(
1088 seccomp,
1089 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1090 SCMP_SYS(socket),
1091 1,
1092 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1093 if (r < 0)
1094 break;
1095 }
1096
1097 if (r < 0) {
1098 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1099 continue;
1100 }
1101 }
1102
1103 r = seccomp_load(seccomp);
1104 if (IN_SET(r, -EPERM, -EACCES))
1105 return r;
1106 if (r < 0)
1107 log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1108 }
1109
1110 return 0;
1111 }
1112
1113 int seccomp_restrict_realtime(void) {
1114 static const int permitted_policies[] = {
1115 SCHED_OTHER,
1116 SCHED_BATCH,
1117 SCHED_IDLE,
1118 };
1119
1120 int r, max_policy = 0;
1121 uint32_t arch;
1122 unsigned i;
1123
1124 /* Determine the highest policy constant we want to allow */
1125 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1126 if (permitted_policies[i] > max_policy)
1127 max_policy = permitted_policies[i];
1128
1129 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1130 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1131 int p;
1132
1133 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1134
1135 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1136 if (r < 0)
1137 return r;
1138
1139 /* Go through all policies with lower values than that, and block them -- unless they appear in the
1140 * whitelist. */
1141 for (p = 0; p < max_policy; p++) {
1142 bool good = false;
1143
1144 /* Check if this is in the whitelist. */
1145 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1146 if (permitted_policies[i] == p) {
1147 good = true;
1148 break;
1149 }
1150
1151 if (good)
1152 continue;
1153
1154 /* Deny this policy */
1155 r = seccomp_rule_add_exact(
1156 seccomp,
1157 SCMP_ACT_ERRNO(EPERM),
1158 SCMP_SYS(sched_setscheduler),
1159 1,
1160 SCMP_A1(SCMP_CMP_EQ, p));
1161 if (r < 0) {
1162 log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1163 continue;
1164 }
1165 }
1166
1167 /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are
1168 * unsigned here, hence no need no check for < 0 values. */
1169 r = seccomp_rule_add_exact(
1170 seccomp,
1171 SCMP_ACT_ERRNO(EPERM),
1172 SCMP_SYS(sched_setscheduler),
1173 1,
1174 SCMP_A1(SCMP_CMP_GT, max_policy));
1175 if (r < 0) {
1176 log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1177 continue;
1178 }
1179
1180 r = seccomp_load(seccomp);
1181 if (IN_SET(r, -EPERM, -EACCES))
1182 return r;
1183 if (r < 0)
1184 log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1185 }
1186
1187 return 0;
1188 }
1189
1190 static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
1191 uint32_t arch,
1192 int nr,
1193 unsigned int arg_cnt,
1194 const struct scmp_arg_cmp arg) {
1195 int r;
1196
1197 r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
1198 if (r < 0) {
1199 _cleanup_free_ char *n = NULL;
1200
1201 n = seccomp_syscall_resolve_num_arch(arch, nr);
1202 log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
1203 strna(n),
1204 seccomp_arch_to_string(arch));
1205 }
1206
1207 return r;
1208 }
1209
1210 /* For known architectures, check that syscalls are indeed defined or not. */
1211 #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
1212 assert_cc(SCMP_SYS(shmget) > 0);
1213 assert_cc(SCMP_SYS(shmat) > 0);
1214 assert_cc(SCMP_SYS(shmdt) > 0);
1215 #elif defined(__i386__) || defined(__powerpc64__)
1216 assert_cc(SCMP_SYS(shmget) < 0);
1217 assert_cc(SCMP_SYS(shmat) < 0);
1218 assert_cc(SCMP_SYS(shmdt) < 0);
1219 #endif
1220
1221 int seccomp_memory_deny_write_execute(void) {
1222
1223 uint32_t arch;
1224 int r;
1225
1226 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1227 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1228 int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0;
1229
1230 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1231
1232 switch (arch) {
1233
1234 case SCMP_ARCH_X86:
1235 filter_syscall = SCMP_SYS(mmap2);
1236 block_syscall = SCMP_SYS(mmap);
1237 break;
1238
1239 case SCMP_ARCH_PPC64:
1240 case SCMP_ARCH_PPC64LE:
1241 filter_syscall = SCMP_SYS(mmap);
1242
1243 /* Note that shmat() isn't available, and the call is multiplexed through ipc().
1244 * We ignore that here, which means there's still a way to get writable/executable
1245 * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
1246
1247 break;
1248
1249 case SCMP_ARCH_ARM:
1250 filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
1251 shmat_syscall = SCMP_SYS(shmat);
1252 break;
1253
1254 case SCMP_ARCH_X86_64:
1255 case SCMP_ARCH_X32:
1256 case SCMP_ARCH_AARCH64:
1257 filter_syscall = SCMP_SYS(mmap); /* amd64, x32, and arm64 have only mmap */
1258 shmat_syscall = SCMP_SYS(shmat);
1259 break;
1260
1261 /* Please add more definitions here, if you port systemd to other architectures! */
1262
1263 #if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__)
1264 #warning "Consider adding the right mmap() syscall definitions here!"
1265 #endif
1266 }
1267
1268 /* Can't filter mmap() on this arch, then skip it */
1269 if (filter_syscall == 0)
1270 continue;
1271
1272 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1273 if (r < 0)
1274 return r;
1275
1276 r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
1277 1,
1278 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
1279 if (r < 0)
1280 continue;
1281
1282 if (block_syscall != 0) {
1283 r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
1284 if (r < 0)
1285 continue;
1286 }
1287
1288 r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
1289 1,
1290 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
1291 if (r < 0)
1292 continue;
1293
1294 if (shmat_syscall != 0) {
1295 r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat),
1296 1,
1297 SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
1298 if (r < 0)
1299 continue;
1300 }
1301
1302 r = seccomp_load(seccomp);
1303 if (IN_SET(r, -EPERM, -EACCES))
1304 return r;
1305 if (r < 0)
1306 log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1307 }
1308
1309 return 0;
1310 }
1311
1312 int seccomp_restrict_archs(Set *archs) {
1313 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1314 Iterator i;
1315 void *id;
1316 int r;
1317
1318 /* This installs a filter with no rules, but that restricts the system call architectures to the specified
1319 * list. */
1320
1321 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1322 if (!seccomp)
1323 return -ENOMEM;
1324
1325 SET_FOREACH(id, archs, i) {
1326 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1327 if (r == -EEXIST)
1328 continue;
1329 if (r < 0)
1330 return r;
1331 }
1332
1333 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1334 if (r < 0)
1335 return r;
1336
1337 return seccomp_load(seccomp);
1338 }
1339
1340 int parse_syscall_archs(char **l, Set **archs) {
1341 _cleanup_set_free_ Set *_archs;
1342 char **s;
1343 int r;
1344
1345 assert(l);
1346 assert(archs);
1347
1348 r = set_ensure_allocated(&_archs, NULL);
1349 if (r < 0)
1350 return r;
1351
1352 STRV_FOREACH(s, l) {
1353 uint32_t a;
1354
1355 r = seccomp_arch_from_string(*s, &a);
1356 if (r < 0)
1357 return -EINVAL;
1358
1359 r = set_put(_archs, UINT32_TO_PTR(a + 1));
1360 if (r < 0)
1361 return -ENOMEM;
1362 }
1363
1364 *archs = _archs;
1365 _archs = NULL;
1366
1367 return 0;
1368 }
1369
1370 int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) {
1371 const char *i;
1372 int r;
1373
1374 assert(set);
1375
1376 NULSTR_FOREACH(i, set->value) {
1377
1378 if (i[0] == '@') {
1379 const SyscallFilterSet *more;
1380
1381 more = syscall_filter_set_find(i);
1382 if (!more)
1383 return -ENXIO;
1384
1385
1386 r = seccomp_filter_set_add(filter, add, more);
1387 if (r < 0)
1388 return r;
1389 } else {
1390 int id;
1391
1392 id = seccomp_syscall_resolve_name(i);
1393 if (id == __NR_SCMP_ERROR)
1394 return -ENXIO;
1395
1396 if (add) {
1397 r = set_put(filter, INT_TO_PTR(id + 1));
1398 if (r < 0)
1399 return r;
1400 } else
1401 (void) set_remove(filter, INT_TO_PTR(id + 1));
1402 }
1403 }
1404
1405 return 0;
1406 }
1407
1408 int seccomp_lock_personality(unsigned long personality) {
1409 uint32_t arch;
1410 int r;
1411
1412 if (personality >= PERSONALITY_INVALID)
1413 return -EINVAL;
1414
1415 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1416 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1417
1418 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1419 if (r < 0)
1420 return r;
1421
1422 r = seccomp_rule_add_exact(
1423 seccomp,
1424 SCMP_ACT_ERRNO(EPERM),
1425 SCMP_SYS(personality),
1426 1,
1427 SCMP_A0(SCMP_CMP_NE, personality));
1428 if (r < 0)
1429 return r;
1430
1431 r = seccomp_load(seccomp);
1432 if (IN_SET(r, -EPERM, -EACCES))
1433 return r;
1434 if (r < 0)
1435 log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1436 }
1437
1438 return 0;
1439 }