]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/seccomp-util.c
analyze-security: always save syscall name
[thirdparty/systemd.git] / src / shared / seccomp-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
57183d11 2
a8fbdf54 3#include <errno.h>
3c27973b 4#include <fcntl.h>
469830d1 5#include <linux/seccomp.h>
a8fbdf54 6#include <stddef.h>
469830d1 7#include <sys/mman.h>
d347d902 8#include <sys/prctl.h>
469830d1 9#include <sys/shm.h>
3c27973b 10#include <sys/stat.h>
57183d11 11
e83156c2
YW
12/* include missing_syscall_def.h earlier to make __SNR_foo mapped to __NR_foo. */
13#include "missing_syscall_def.h"
14#include <seccomp.h>
15
469830d1 16#include "af-list.h"
add00535 17#include "alloc-util.h"
44aaddad 18#include "env-util.h"
d8b4d14d 19#include "errno-list.h"
a8fbdf54 20#include "macro.h"
add00535 21#include "nsflags.h"
d8b4d14d 22#include "nulstr-util.h"
78e864e5 23#include "process-util.h"
cf0fbc49 24#include "seccomp-util.h"
b16bd535 25#include "set.h"
07630cea 26#include "string-util.h"
b16bd535 27#include "strv.h"
469830d1 28
65976868
GDF
29/* This array will be modified at runtime as seccomp_restrict_archs is called. */
30uint32_t seccomp_local_archs[] = {
469830d1 31
6b000af4 32 /* Note: always list the native arch we are compiled as last, so that users can deny-list seccomp(), but our own calls to it still succeed */
f2d9751c
LP
33
34#if defined(__x86_64__) && defined(__ILP32__)
469830d1
LP
35 SCMP_ARCH_X86,
36 SCMP_ARCH_X86_64,
f2d9751c
LP
37 SCMP_ARCH_X32, /* native */
38#elif defined(__x86_64__) && !defined(__ILP32__)
39 SCMP_ARCH_X86,
469830d1 40 SCMP_ARCH_X32,
f2d9751c
LP
41 SCMP_ARCH_X86_64, /* native */
42#elif defined(__i386__)
43 SCMP_ARCH_X86,
44#elif defined(__aarch64__)
469830d1 45 SCMP_ARCH_ARM,
f2d9751c
LP
46 SCMP_ARCH_AARCH64, /* native */
47#elif defined(__arm__)
48 SCMP_ARCH_ARM,
49#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
50 SCMP_ARCH_MIPSEL,
51 SCMP_ARCH_MIPS, /* native */
52#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
469830d1 53 SCMP_ARCH_MIPS,
f2d9751c
LP
54 SCMP_ARCH_MIPSEL, /* native */
55#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
56 SCMP_ARCH_MIPSEL,
57 SCMP_ARCH_MIPS,
58 SCMP_ARCH_MIPSEL64N32,
469830d1 59 SCMP_ARCH_MIPS64N32,
f2d9751c
LP
60 SCMP_ARCH_MIPSEL64,
61 SCMP_ARCH_MIPS64, /* native */
62#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
63 SCMP_ARCH_MIPS,
469830d1 64 SCMP_ARCH_MIPSEL,
f2d9751c
LP
65 SCMP_ARCH_MIPS64N32,
66 SCMP_ARCH_MIPSEL64N32,
67 SCMP_ARCH_MIPS64,
68 SCMP_ARCH_MIPSEL64, /* native */
69#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
70 SCMP_ARCH_MIPSEL,
71 SCMP_ARCH_MIPS,
469830d1 72 SCMP_ARCH_MIPSEL64,
f2d9751c 73 SCMP_ARCH_MIPS64,
469830d1 74 SCMP_ARCH_MIPSEL64N32,
f2d9751c
LP
75 SCMP_ARCH_MIPS64N32, /* native */
76#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
77 SCMP_ARCH_MIPS,
78 SCMP_ARCH_MIPSEL,
79 SCMP_ARCH_MIPS64,
80 SCMP_ARCH_MIPSEL64,
81 SCMP_ARCH_MIPS64N32,
82 SCMP_ARCH_MIPSEL64N32, /* native */
83#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN
469830d1 84 SCMP_ARCH_PPC,
469830d1 85 SCMP_ARCH_PPC64LE,
f2d9751c
LP
86 SCMP_ARCH_PPC64, /* native */
87#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN
88 SCMP_ARCH_PPC,
89 SCMP_ARCH_PPC64,
90 SCMP_ARCH_PPC64LE, /* native */
91#elif defined(__powerpc__)
92 SCMP_ARCH_PPC,
f9252236
AJ
93#elif defined(__riscv) && __riscv_xlen == 64 && defined(SCMP_ARCH_RISCV64)
94 SCMP_ARCH_RISCV64,
f2d9751c
LP
95#elif defined(__s390x__)
96 SCMP_ARCH_S390,
97 SCMP_ARCH_S390X, /* native */
98#elif defined(__s390__)
469830d1 99 SCMP_ARCH_S390,
469830d1 100#endif
65976868 101 SECCOMP_LOCAL_ARCH_END
469830d1 102 };
57183d11
LP
103
104const char* seccomp_arch_to_string(uint32_t c) {
aa34055f
ZJS
105 /* Maintain order used in <seccomp.h>.
106 *
107 * Names used here should be the same as those used for ConditionArchitecture=,
108 * except for "subarchitectures" like x32. */
57183d11 109
79893116 110 switch (c) {
aa34055f 111 case SCMP_ARCH_NATIVE:
57183d11 112 return "native";
aa34055f 113 case SCMP_ARCH_X86:
57183d11 114 return "x86";
aa34055f 115 case SCMP_ARCH_X86_64:
57183d11 116 return "x86-64";
aa34055f 117 case SCMP_ARCH_X32:
57183d11 118 return "x32";
aa34055f 119 case SCMP_ARCH_ARM:
57183d11 120 return "arm";
aa34055f
ZJS
121 case SCMP_ARCH_AARCH64:
122 return "arm64";
123 case SCMP_ARCH_MIPS:
124 return "mips";
125 case SCMP_ARCH_MIPS64:
126 return "mips64";
127 case SCMP_ARCH_MIPS64N32:
128 return "mips64-n32";
129 case SCMP_ARCH_MIPSEL:
130 return "mips-le";
131 case SCMP_ARCH_MIPSEL64:
132 return "mips64-le";
133 case SCMP_ARCH_MIPSEL64N32:
134 return "mips64-le-n32";
135 case SCMP_ARCH_PPC:
136 return "ppc";
137 case SCMP_ARCH_PPC64:
138 return "ppc64";
139 case SCMP_ARCH_PPC64LE:
140 return "ppc64-le";
f9252236
AJ
141#ifdef SCMP_ARCH_RISCV64
142 case SCMP_ARCH_RISCV64:
143 return "riscv64";
144#endif
aa34055f 145 case SCMP_ARCH_S390:
6abfd303 146 return "s390";
aa34055f 147 case SCMP_ARCH_S390X:
6abfd303 148 return "s390x";
aa34055f
ZJS
149 default:
150 return NULL;
151 }
57183d11
LP
152}
153
154int seccomp_arch_from_string(const char *n, uint32_t *ret) {
155 if (!n)
156 return -EINVAL;
157
158 assert(ret);
159
160 if (streq(n, "native"))
161 *ret = SCMP_ARCH_NATIVE;
162 else if (streq(n, "x86"))
163 *ret = SCMP_ARCH_X86;
164 else if (streq(n, "x86-64"))
165 *ret = SCMP_ARCH_X86_64;
166 else if (streq(n, "x32"))
167 *ret = SCMP_ARCH_X32;
168 else if (streq(n, "arm"))
169 *ret = SCMP_ARCH_ARM;
aa34055f
ZJS
170 else if (streq(n, "arm64"))
171 *ret = SCMP_ARCH_AARCH64;
172 else if (streq(n, "mips"))
173 *ret = SCMP_ARCH_MIPS;
174 else if (streq(n, "mips64"))
175 *ret = SCMP_ARCH_MIPS64;
176 else if (streq(n, "mips64-n32"))
177 *ret = SCMP_ARCH_MIPS64N32;
178 else if (streq(n, "mips-le"))
179 *ret = SCMP_ARCH_MIPSEL;
180 else if (streq(n, "mips64-le"))
181 *ret = SCMP_ARCH_MIPSEL64;
182 else if (streq(n, "mips64-le-n32"))
183 *ret = SCMP_ARCH_MIPSEL64N32;
184 else if (streq(n, "ppc"))
185 *ret = SCMP_ARCH_PPC;
186 else if (streq(n, "ppc64"))
187 *ret = SCMP_ARCH_PPC64;
188 else if (streq(n, "ppc64-le"))
189 *ret = SCMP_ARCH_PPC64LE;
f9252236
AJ
190#ifdef SCMP_ARCH_RISCV64
191 else if (streq(n, "riscv64"))
192 *ret = SCMP_ARCH_RISCV64;
193#endif
6abfd303
HB
194 else if (streq(n, "s390"))
195 *ret = SCMP_ARCH_S390;
196 else if (streq(n, "s390x"))
197 *ret = SCMP_ARCH_S390X;
57183d11
LP
198 else
199 return -EINVAL;
200
201 return 0;
202}
e9642be2 203
469830d1 204int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) {
b4eaa6cc 205 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
8d7b0c8f
LP
206 int r;
207
469830d1
LP
208 /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting
209 * any others. Also, turns off the NNP fiddling. */
8d7b0c8f
LP
210
211 seccomp = seccomp_init(default_action);
212 if (!seccomp)
213 return -ENOMEM;
214
469830d1
LP
215 if (arch != SCMP_ARCH_NATIVE &&
216 arch != seccomp_arch_native()) {
217
1b52793d 218 r = seccomp_arch_remove(seccomp, seccomp_arch_native());
469830d1 219 if (r < 0)
b4eaa6cc 220 return r;
469830d1 221
1b52793d 222 r = seccomp_arch_add(seccomp, arch);
469830d1 223 if (r < 0)
b4eaa6cc 224 return r;
469830d1
LP
225
226 assert(seccomp_arch_exist(seccomp, arch) >= 0);
227 assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST);
228 assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST);
229 } else {
230 assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0);
231 assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0);
232 }
233
234 r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW);
8d7b0c8f 235 if (r < 0)
b4eaa6cc 236 return r;
8d7b0c8f
LP
237
238 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
239 if (r < 0)
b4eaa6cc 240 return r;
8d7b0c8f 241
44aaddad
SD
242#if SCMP_VER_MAJOR >= 3 || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 4)
243 if (getenv_bool("SYSTEMD_LOG_SECCOMP") > 0) {
244 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_LOG, 1);
245 if (r < 0)
246 log_debug_errno(r, "Failed to enable seccomp event logging: %m");
247 }
248#endif
249
b4eaa6cc 250 *ret = TAKE_PTR(seccomp);
8d7b0c8f 251 return 0;
8d7b0c8f
LP
252}
253
d347d902 254static bool is_basic_seccomp_available(void) {
4d5bd50a 255 return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0;
d347d902
FS
256}
257
258static bool is_seccomp_filter_available(void) {
4d5bd50a
LP
259 return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 &&
260 errno == EFAULT;
d347d902
FS
261}
262
83f12b27 263bool is_seccomp_available(void) {
83f12b27 264 static int cached_enabled = -1;
4d5bd50a 265
ce8f6d47
LP
266 if (cached_enabled < 0) {
267 int b;
268
269 b = getenv_bool_secure("SYSTEMD_SECCOMP");
270 if (b != 0) {
271 if (b < 0 && b != -ENXIO) /* ENXIO: env var unset */
272 log_debug_errno(b, "Failed to parse $SYSTEMD_SECCOMP value, ignoring.");
273
274 cached_enabled =
275 is_basic_seccomp_available() &&
276 is_seccomp_filter_available();
277 } else
278 cached_enabled = false;
279 }
4d5bd50a 280
83f12b27
FS
281 return cached_enabled;
282}
283
8130926d 284const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
40eb6a80 285 [SYSCALL_FILTER_SET_DEFAULT] = {
40eb6a80 286 .name = "@default",
d5efc18b 287 .help = "System calls that are always permitted",
40eb6a80 288 .value =
5f02870a 289 "arch_prctl\0" /* Used during platform-specific initialization by ld-linux.so. */
5abede32 290 "brk\0"
8e24b1d2 291 "cacheflush\0"
40eb6a80 292 "clock_getres\0"
6ca67710 293 "clock_getres_time64\0"
40eb6a80 294 "clock_gettime\0"
6ca67710 295 "clock_gettime64\0"
40eb6a80 296 "clock_nanosleep\0"
6ca67710 297 "clock_nanosleep_time64\0"
40eb6a80
ZJS
298 "execve\0"
299 "exit\0"
300 "exit_group\0"
e41b0f42 301 "futex\0"
6ca67710 302 "futex_time64\0"
e41b0f42
LP
303 "get_robust_list\0"
304 "get_thread_area\0"
09d3020b
DH
305 "getegid\0"
306 "getegid32\0"
307 "geteuid\0"
308 "geteuid32\0"
309 "getgid\0"
310 "getgid32\0"
311 "getgroups\0"
312 "getgroups32\0"
313 "getpgid\0"
314 "getpgrp\0"
315 "getpid\0"
316 "getppid\0"
14f4b1b5 317 "getrandom\0"
09d3020b
DH
318 "getresgid\0"
319 "getresgid32\0"
320 "getresuid\0"
321 "getresuid32\0"
40eb6a80 322 "getrlimit\0" /* make sure processes can query stack size and such */
09d3020b
DH
323 "getsid\0"
324 "gettid\0"
40eb6a80 325 "gettimeofday\0"
09d3020b
DH
326 "getuid\0"
327 "getuid32\0"
e41b0f42 328 "membarrier\0"
5abede32
LP
329 "mmap\0"
330 "mmap2\0"
47286254 331 "mprotect\0"
11b9105d 332 "munmap\0"
40eb6a80
ZJS
333 "nanosleep\0"
334 "pause\0"
4c3a9176 335 "prlimit64\0"
e41b0f42 336 "restart_syscall\0"
6fee3be0 337 "rseq\0"
40eb6a80 338 "rt_sigreturn\0"
7df660e4 339 "sched_getaffinity\0"
8f44de08 340 "sched_yield\0"
e41b0f42
LP
341 "set_robust_list\0"
342 "set_thread_area\0"
343 "set_tid_address\0"
ce5faeac 344 "set_tls\0"
40eb6a80
ZJS
345 "sigreturn\0"
346 "time\0"
4c3a9176 347 "ugetrlimit\0"
40eb6a80 348 },
44898c53
LP
349 [SYSCALL_FILTER_SET_AIO] = {
350 .name = "@aio",
351 .help = "Asynchronous IO",
352 .value =
353 "io_cancel\0"
354 "io_destroy\0"
355 "io_getevents\0"
a05cfe23 356 "io_pgetevents\0"
6ca67710 357 "io_pgetevents_time64\0"
44898c53
LP
358 "io_setup\0"
359 "io_submit\0"
9e486265
LP
360 "io_uring_enter\0"
361 "io_uring_register\0"
362 "io_uring_setup\0"
44898c53 363 },
133ddbbe 364 [SYSCALL_FILTER_SET_BASIC_IO] = {
133ddbbe 365 .name = "@basic-io",
d5efc18b 366 .help = "Basic IO",
133ddbbe 367 .value =
648a0ed0 368 "_llseek\0"
133ddbbe 369 "close\0"
6ea0d25c 370 "close_range\0"
648a0ed0 371 "dup\0"
133ddbbe
LP
372 "dup2\0"
373 "dup3\0"
133ddbbe
LP
374 "lseek\0"
375 "pread64\0"
376 "preadv\0"
44898c53 377 "preadv2\0"
133ddbbe
LP
378 "pwrite64\0"
379 "pwritev\0"
44898c53 380 "pwritev2\0"
133ddbbe
LP
381 "read\0"
382 "readv\0"
383 "write\0"
384 "writev\0"
385 },
44898c53
LP
386 [SYSCALL_FILTER_SET_CHOWN] = {
387 .name = "@chown",
388 .help = "Change ownership of files and directories",
389 .value =
390 "chown\0"
391 "chown32\0"
392 "fchown\0"
393 "fchown32\0"
394 "fchownat\0"
395 "lchown\0"
396 "lchown32\0"
397 },
8130926d 398 [SYSCALL_FILTER_SET_CLOCK] = {
8130926d 399 .name = "@clock",
d5efc18b 400 .help = "Change the system time",
201c1cc2
TM
401 .value =
402 "adjtimex\0"
1f9ac68b 403 "clock_adjtime\0"
6ca67710 404 "clock_adjtime64\0"
1f9ac68b 405 "clock_settime\0"
6ca67710 406 "clock_settime64\0"
201c1cc2 407 "settimeofday\0"
8130926d
LP
408 },
409 [SYSCALL_FILTER_SET_CPU_EMULATION] = {
8130926d 410 .name = "@cpu-emulation",
d5efc18b 411 .help = "System calls for CPU emulation functionality",
1f9ac68b
LP
412 .value =
413 "modify_ldt\0"
414 "subpage_prot\0"
415 "switch_endian\0"
416 "vm86\0"
417 "vm86old\0"
8130926d
LP
418 },
419 [SYSCALL_FILTER_SET_DEBUG] = {
8130926d 420 .name = "@debug",
d5efc18b 421 .help = "Debugging, performance monitoring and tracing functionality",
1f9ac68b
LP
422 .value =
423 "lookup_dcookie\0"
424 "perf_event_open\0"
8270e3d8 425 "pidfd_getfd\0"
1f9ac68b
LP
426 "ptrace\0"
427 "rtas\0"
6da432fd 428#if defined __s390__ || defined __s390x__
1f9ac68b 429 "s390_runtime_instr\0"
8130926d 430#endif
1f9ac68b 431 "sys_debug_setcontext\0"
8130926d 432 },
1a1b13c9
LP
433 [SYSCALL_FILTER_SET_FILE_SYSTEM] = {
434 .name = "@file-system",
435 .help = "File system operations",
436 .value =
437 "access\0"
438 "chdir\0"
439 "chmod\0"
440 "close\0"
441 "creat\0"
442 "faccessat\0"
bcf08acb 443 "faccessat2\0"
1a1b13c9
LP
444 "fallocate\0"
445 "fchdir\0"
446 "fchmod\0"
447 "fchmodat\0"
1a1b13c9 448 "fcntl\0"
ceaa6aa7 449 "fcntl64\0"
1a1b13c9
LP
450 "fgetxattr\0"
451 "flistxattr\0"
ceaa6aa7 452 "fremovexattr\0"
1a1b13c9 453 "fsetxattr\0"
1a1b13c9 454 "fstat\0"
ceaa6aa7 455 "fstat64\0"
1a1b13c9 456 "fstatat64\0"
1a1b13c9 457 "fstatfs\0"
ceaa6aa7 458 "fstatfs64\0"
1a1b13c9 459 "ftruncate\0"
ceaa6aa7 460 "ftruncate64\0"
1a1b13c9
LP
461 "futimesat\0"
462 "getcwd\0"
1a1b13c9 463 "getdents\0"
ceaa6aa7 464 "getdents64\0"
1a1b13c9
LP
465 "getxattr\0"
466 "inotify_add_watch\0"
ceaa6aa7 467 "inotify_init\0"
1a1b13c9
LP
468 "inotify_init1\0"
469 "inotify_rm_watch\0"
470 "lgetxattr\0"
471 "link\0"
472 "linkat\0"
473 "listxattr\0"
474 "llistxattr\0"
475 "lremovexattr\0"
476 "lsetxattr\0"
1a1b13c9 477 "lstat\0"
ceaa6aa7 478 "lstat64\0"
1a1b13c9
LP
479 "mkdir\0"
480 "mkdirat\0"
481 "mknod\0"
482 "mknodat\0"
1a1b13c9 483 "newfstatat\0"
ceaa6aa7
LP
484 "oldfstat\0"
485 "oldlstat\0"
486 "oldstat\0"
1a1b13c9
LP
487 "open\0"
488 "openat\0"
8270e3d8 489 "openat2\0"
1a1b13c9
LP
490 "readlink\0"
491 "readlinkat\0"
492 "removexattr\0"
493 "rename\0"
1a1b13c9 494 "renameat\0"
ceaa6aa7 495 "renameat2\0"
1a1b13c9
LP
496 "rmdir\0"
497 "setxattr\0"
1a1b13c9 498 "stat\0"
ceaa6aa7 499 "stat64\0"
1a1b13c9 500 "statfs\0"
ceaa6aa7 501 "statfs64\0"
a4135a74 502 "statx\0"
1a1b13c9
LP
503 "symlink\0"
504 "symlinkat\0"
1a1b13c9 505 "truncate\0"
ceaa6aa7 506 "truncate64\0"
1a1b13c9
LP
507 "unlink\0"
508 "unlinkat\0"
ceaa6aa7 509 "utime\0"
1a1b13c9 510 "utimensat\0"
6ca67710 511 "utimensat_time64\0"
1a1b13c9
LP
512 "utimes\0"
513 },
8130926d 514 [SYSCALL_FILTER_SET_IO_EVENT] = {
8130926d 515 .name = "@io-event",
d5efc18b 516 .help = "Event loop system calls",
201c1cc2
TM
517 .value =
518 "_newselect\0"
201c1cc2 519 "epoll_create\0"
215728ff 520 "epoll_create1\0"
201c1cc2
TM
521 "epoll_ctl\0"
522 "epoll_ctl_old\0"
523 "epoll_pwait\0"
34254e59 524 "epoll_pwait2\0"
201c1cc2
TM
525 "epoll_wait\0"
526 "epoll_wait_old\0"
201c1cc2 527 "eventfd\0"
215728ff 528 "eventfd2\0"
201c1cc2
TM
529 "poll\0"
530 "ppoll\0"
6ca67710 531 "ppoll_time64\0"
201c1cc2 532 "pselect6\0"
6ca67710 533 "pselect6_time64\0"
201c1cc2 534 "select\0"
8130926d
LP
535 },
536 [SYSCALL_FILTER_SET_IPC] = {
8130926d 537 .name = "@ipc",
d5efc18b
ZJS
538 .help = "SysV IPC, POSIX Message Queues or other IPC",
539 .value =
540 "ipc\0"
cd5bfd7e 541 "memfd_create\0"
201c1cc2
TM
542 "mq_getsetattr\0"
543 "mq_notify\0"
544 "mq_open\0"
545 "mq_timedreceive\0"
6ca67710 546 "mq_timedreceive_time64\0"
201c1cc2 547 "mq_timedsend\0"
6ca67710 548 "mq_timedsend_time64\0"
201c1cc2
TM
549 "mq_unlink\0"
550 "msgctl\0"
551 "msgget\0"
552 "msgrcv\0"
553 "msgsnd\0"
cd5bfd7e 554 "pipe\0"
215728ff 555 "pipe2\0"
34254e59 556 "process_madvise\0"
201c1cc2
TM
557 "process_vm_readv\0"
558 "process_vm_writev\0"
559 "semctl\0"
560 "semget\0"
561 "semop\0"
562 "semtimedop\0"
6ca67710 563 "semtimedop_time64\0"
201c1cc2
TM
564 "shmat\0"
565 "shmctl\0"
566 "shmdt\0"
567 "shmget\0"
8130926d
LP
568 },
569 [SYSCALL_FILTER_SET_KEYRING] = {
8130926d 570 .name = "@keyring",
d5efc18b 571 .help = "Kernel keyring access",
1f9ac68b
LP
572 .value =
573 "add_key\0"
574 "keyctl\0"
575 "request_key\0"
8130926d 576 },
cd0ddf6f
LP
577 [SYSCALL_FILTER_SET_MEMLOCK] = {
578 .name = "@memlock",
579 .help = "Memory locking control",
580 .value =
581 "mlock\0"
582 "mlock2\0"
583 "mlockall\0"
584 "munlock\0"
585 "munlockall\0"
586 },
8130926d 587 [SYSCALL_FILTER_SET_MODULE] = {
8130926d 588 .name = "@module",
d5efc18b 589 .help = "Loading and unloading of kernel modules",
201c1cc2 590 .value =
201c1cc2
TM
591 "delete_module\0"
592 "finit_module\0"
593 "init_module\0"
8130926d
LP
594 },
595 [SYSCALL_FILTER_SET_MOUNT] = {
8130926d 596 .name = "@mount",
d5efc18b 597 .help = "Mounting and unmounting of file systems",
201c1cc2
TM
598 .value =
599 "chroot\0"
9e486265
LP
600 "fsconfig\0"
601 "fsmount\0"
602 "fsopen\0"
603 "fspick\0"
201c1cc2 604 "mount\0"
34254e59 605 "mount_setattr\0"
9e486265
LP
606 "move_mount\0"
607 "open_tree\0"
201c1cc2 608 "pivot_root\0"
201c1cc2 609 "umount\0"
215728ff 610 "umount2\0"
8130926d
LP
611 },
612 [SYSCALL_FILTER_SET_NETWORK_IO] = {
8130926d 613 .name = "@network-io",
d5efc18b 614 .help = "Network or Unix socket IO, should not be needed if not network facing",
201c1cc2 615 .value =
201c1cc2 616 "accept\0"
215728ff 617 "accept4\0"
201c1cc2
TM
618 "bind\0"
619 "connect\0"
620 "getpeername\0"
621 "getsockname\0"
622 "getsockopt\0"
623 "listen\0"
624 "recv\0"
625 "recvfrom\0"
626 "recvmmsg\0"
6ca67710 627 "recvmmsg_time64\0"
201c1cc2
TM
628 "recvmsg\0"
629 "send\0"
630 "sendmmsg\0"
631 "sendmsg\0"
632 "sendto\0"
633 "setsockopt\0"
634 "shutdown\0"
635 "socket\0"
636 "socketcall\0"
637 "socketpair\0"
8130926d
LP
638 },
639 [SYSCALL_FILTER_SET_OBSOLETE] = {
d5efc18b 640 /* some unknown even to libseccomp */
8130926d 641 .name = "@obsolete",
d5efc18b 642 .help = "Unusual, obsolete or unimplemented system calls",
201c1cc2
TM
643 .value =
644 "_sysctl\0"
645 "afs_syscall\0"
802fa07a 646 "bdflush\0"
201c1cc2 647 "break\0"
1f9ac68b 648 "create_module\0"
201c1cc2
TM
649 "ftime\0"
650 "get_kernel_syms\0"
201c1cc2
TM
651 "getpmsg\0"
652 "gtty\0"
7e0c3b8f 653 "idle\0"
201c1cc2 654 "lock\0"
201c1cc2 655 "mpx\0"
201c1cc2
TM
656 "prof\0"
657 "profil\0"
201c1cc2
TM
658 "putpmsg\0"
659 "query_module\0"
201c1cc2
TM
660 "security\0"
661 "sgetmask\0"
662 "ssetmask\0"
ae5e9bf4 663 "stime\0"
201c1cc2 664 "stty\0"
1f9ac68b 665 "sysfs\0"
201c1cc2
TM
666 "tuxcall\0"
667 "ulimit\0"
668 "uselib\0"
1f9ac68b 669 "ustat\0"
201c1cc2 670 "vserver\0"
8130926d 671 },
9493b168
ZJS
672 [SYSCALL_FILTER_SET_PKEY] = {
673 .name = "@pkey",
674 .help = "System calls used for memory protection keys",
675 .value =
676 "pkey_alloc\0"
677 "pkey_free\0"
678 "pkey_mprotect\0"
679 },
8130926d 680 [SYSCALL_FILTER_SET_PRIVILEGED] = {
8130926d 681 .name = "@privileged",
d5efc18b 682 .help = "All system calls which need super-user capabilities",
201c1cc2 683 .value =
44898c53 684 "@chown\0"
201c1cc2
TM
685 "@clock\0"
686 "@module\0"
687 "@raw-io\0"
af0f047b
LP
688 "@reboot\0"
689 "@swap\0"
215728ff 690 "_sysctl\0"
201c1cc2 691 "acct\0"
201c1cc2 692 "bpf\0"
1f9ac68b 693 "capset\0"
201c1cc2 694 "chroot\0"
a05cfe23 695 "fanotify_init\0"
9e486265 696 "fanotify_mark\0"
201c1cc2 697 "nfsservctl\0"
a05cfe23 698 "open_by_handle_at\0"
201c1cc2
TM
699 "pivot_root\0"
700 "quotactl\0"
201c1cc2 701 "setdomainname\0"
201c1cc2 702 "setfsuid\0"
215728ff 703 "setfsuid32\0"
201c1cc2 704 "setgroups\0"
215728ff 705 "setgroups32\0"
201c1cc2 706 "sethostname\0"
201c1cc2 707 "setresuid\0"
215728ff 708 "setresuid32\0"
201c1cc2 709 "setreuid\0"
215728ff 710 "setreuid32\0"
e05ee49b 711 "setuid\0" /* We list the explicit system calls here, as @setuid also includes setgid() which is not necessarily privileged */
215728ff 712 "setuid32\0"
201c1cc2 713 "vhangup\0"
8130926d
LP
714 },
715 [SYSCALL_FILTER_SET_PROCESS] = {
8130926d 716 .name = "@process",
7b121df6 717 .help = "Process control, execution, namespacing operations",
201c1cc2 718 .value =
09d3020b 719 "capget\0" /* Able to query arbitrary processes */
201c1cc2 720 "clone\0"
c5503601
ZJS
721 /* ia64 as the only architecture has clone2, a replacement for clone, but ia64 doesn't
722 * implement seccomp, so we don't need to list it at all. C.f.
723 * acce2f71779c54086962fefce3833d886c655f62 in the kernel. */
9e486265 724 "clone3\0"
201c1cc2
TM
725 "execveat\0"
726 "fork\0"
b887d2eb 727 "getrusage\0"
201c1cc2 728 "kill\0"
9e486265 729 "pidfd_open\0"
46fcf95d 730 "pidfd_send_signal\0"
201c1cc2 731 "prctl\0"
b887d2eb
LP
732 "rt_sigqueueinfo\0"
733 "rt_tgsigqueueinfo\0"
201c1cc2 734 "setns\0"
a9518dc3 735 "swapcontext\0" /* Some archs e.g. powerpc32 are using it to do userspace context switches */
201c1cc2 736 "tgkill\0"
b887d2eb 737 "times\0"
201c1cc2
TM
738 "tkill\0"
739 "unshare\0"
740 "vfork\0"
b887d2eb
LP
741 "wait4\0"
742 "waitid\0"
743 "waitpid\0"
8130926d
LP
744 },
745 [SYSCALL_FILTER_SET_RAW_IO] = {
8130926d 746 .name = "@raw-io",
d5efc18b 747 .help = "Raw I/O port access",
201c1cc2
TM
748 .value =
749 "ioperm\0"
750 "iopl\0"
1f9ac68b 751 "pciconfig_iobase\0"
201c1cc2
TM
752 "pciconfig_read\0"
753 "pciconfig_write\0"
6da432fd 754#if defined __s390__ || defined __s390x__
201c1cc2
TM
755 "s390_pci_mmio_read\0"
756 "s390_pci_mmio_write\0"
8130926d
LP
757#endif
758 },
bd2ab3f4
LP
759 [SYSCALL_FILTER_SET_REBOOT] = {
760 .name = "@reboot",
761 .help = "Reboot and reboot preparation/kexec",
762 .value =
bd2ab3f4 763 "kexec_file_load\0"
e59608fa 764 "kexec_load\0"
bd2ab3f4
LP
765 "reboot\0"
766 },
133ddbbe 767 [SYSCALL_FILTER_SET_RESOURCES] = {
133ddbbe 768 .name = "@resources",
58a8f68b 769 .help = "Alter resource settings",
133ddbbe 770 .value =
0963c053
LP
771 "ioprio_set\0"
772 "mbind\0"
773 "migrate_pages\0"
774 "move_pages\0"
775 "nice\0"
0963c053
LP
776 "sched_setaffinity\0"
777 "sched_setattr\0"
133ddbbe
LP
778 "sched_setparam\0"
779 "sched_setscheduler\0"
0963c053 780 "set_mempolicy\0"
133ddbbe
LP
781 "setpriority\0"
782 "setrlimit\0"
133ddbbe 783 },
6eaaeee9
LP
784 [SYSCALL_FILTER_SET_SETUID] = {
785 .name = "@setuid",
786 .help = "Operations for changing user/group credentials",
787 .value =
6eaaeee9 788 "setgid\0"
215728ff 789 "setgid32\0"
6eaaeee9 790 "setgroups\0"
215728ff 791 "setgroups32\0"
6eaaeee9 792 "setregid\0"
215728ff 793 "setregid32\0"
6eaaeee9 794 "setresgid\0"
215728ff 795 "setresgid32\0"
6eaaeee9 796 "setresuid\0"
215728ff 797 "setresuid32\0"
6eaaeee9 798 "setreuid\0"
215728ff 799 "setreuid32\0"
6eaaeee9 800 "setuid\0"
215728ff 801 "setuid32\0"
6eaaeee9 802 },
cd0ddf6f
LP
803 [SYSCALL_FILTER_SET_SIGNAL] = {
804 .name = "@signal",
805 .help = "Process signal handling",
806 .value =
807 "rt_sigaction\0"
808 "rt_sigpending\0"
809 "rt_sigprocmask\0"
810 "rt_sigsuspend\0"
811 "rt_sigtimedwait\0"
6ca67710 812 "rt_sigtimedwait_time64\0"
cd0ddf6f
LP
813 "sigaction\0"
814 "sigaltstack\0"
815 "signal\0"
816 "signalfd\0"
817 "signalfd4\0"
818 "sigpending\0"
819 "sigprocmask\0"
820 "sigsuspend\0"
821 },
bd2ab3f4
LP
822 [SYSCALL_FILTER_SET_SWAP] = {
823 .name = "@swap",
824 .help = "Enable/disable swap devices",
825 .value =
826 "swapoff\0"
827 "swapon\0"
828 },
44898c53
LP
829 [SYSCALL_FILTER_SET_SYNC] = {
830 .name = "@sync",
831 .help = "Synchronize files and memory to storage",
832 .value =
833 "fdatasync\0"
834 "fsync\0"
835 "msync\0"
836 "sync\0"
837 "sync_file_range\0"
a8fb09f5 838 "sync_file_range2\0"
44898c53
LP
839 "syncfs\0"
840 },
70526841
LP
841 [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = {
842 .name = "@system-service",
843 .help = "General system service operations",
844 .value =
845 "@aio\0"
846 "@basic-io\0"
847 "@chown\0"
848 "@default\0"
849 "@file-system\0"
850 "@io-event\0"
851 "@ipc\0"
852 "@keyring\0"
853 "@memlock\0"
854 "@network-io\0"
855 "@process\0"
856 "@resources\0"
857 "@setuid\0"
858 "@signal\0"
859 "@sync\0"
860 "@timer\0"
70526841
LP
861 "capget\0"
862 "capset\0"
863 "copy_file_range\0"
864 "fadvise64\0"
865 "fadvise64_64\0"
866 "flock\0"
867 "get_mempolicy\0"
868 "getcpu\0"
869 "getpriority\0"
70526841
LP
870 "ioctl\0"
871 "ioprio_get\0"
872 "kcmp\0"
873 "madvise\0"
70526841
LP
874 "mremap\0"
875 "name_to_handle_at\0"
876 "oldolduname\0"
877 "olduname\0"
878 "personality\0"
879 "readahead\0"
880 "readdir\0"
881 "remap_file_pages\0"
882 "sched_get_priority_max\0"
883 "sched_get_priority_min\0"
70526841
LP
884 "sched_getattr\0"
885 "sched_getparam\0"
886 "sched_getscheduler\0"
887 "sched_rr_get_interval\0"
6ca67710 888 "sched_rr_get_interval_time64\0"
70526841
LP
889 "sched_yield\0"
890 "sendfile\0"
891 "sendfile64\0"
892 "setfsgid\0"
893 "setfsgid32\0"
894 "setfsuid\0"
895 "setfsuid32\0"
896 "setpgid\0"
897 "setsid\0"
898 "splice\0"
899 "sysinfo\0"
900 "tee\0"
901 "umask\0"
902 "uname\0"
903 "userfaultfd\0"
904 "vmsplice\0"
905 },
cd0ddf6f
LP
906 [SYSCALL_FILTER_SET_TIMER] = {
907 .name = "@timer",
908 .help = "Schedule operations by time",
909 .value =
910 "alarm\0"
911 "getitimer\0"
912 "setitimer\0"
913 "timer_create\0"
914 "timer_delete\0"
915 "timer_getoverrun\0"
916 "timer_gettime\0"
6ca67710 917 "timer_gettime64\0"
cd0ddf6f 918 "timer_settime\0"
6ca67710 919 "timer_settime64\0"
cd0ddf6f
LP
920 "timerfd_create\0"
921 "timerfd_gettime\0"
6ca67710 922 "timerfd_gettime64\0"
cd0ddf6f 923 "timerfd_settime\0"
6ca67710 924 "timerfd_settime64\0"
cd0ddf6f
LP
925 "times\0"
926 },
95aac012
ZJS
927 [SYSCALL_FILTER_SET_KNOWN] = {
928 .name = "@known",
929 .help = "All known syscalls declared in the kernel",
930 .value =
931#include "syscall-list.h"
932 },
201c1cc2 933};
8130926d
LP
934
935const SyscallFilterSet *syscall_filter_set_find(const char *name) {
8130926d
LP
936 if (isempty(name) || name[0] != '@')
937 return NULL;
938
077e8fc0 939 for (unsigned i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
8130926d
LP
940 if (streq(syscall_filter_sets[i].name, name))
941 return syscall_filter_sets + i;
942
943 return NULL;
944}
945
000c0520
ZJS
946static int add_syscall_filter_set(
947 scmp_filter_ctx seccomp,
948 const SyscallFilterSet *set,
949 uint32_t action,
950 char **exclude,
951 bool log_missing,
952 char ***added);
953
954int seccomp_add_syscall_filter_item(
955 scmp_filter_ctx *seccomp,
956 const char *name,
957 uint32_t action,
958 char **exclude,
959 bool log_missing,
960 char ***added) {
69b1b241
LP
961
962 assert(seccomp);
963 assert(name);
964
960e4569
LP
965 if (strv_contains(exclude, name))
966 return 0;
967
000c0520
ZJS
968 /* Any syscalls that are handled are added to the *added strv. The pointer
969 * must be either NULL or point to a valid pre-initialized possibly-empty strv. */
970
69b1b241
LP
971 if (name[0] == '@') {
972 const SyscallFilterSet *other;
973
974 other = syscall_filter_set_find(name);
baaa35ad
ZJS
975 if (!other)
976 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
977 "Filter set %s is not known!",
978 name);
69b1b241 979
000c0520 980 return add_syscall_filter_set(seccomp, other, action, exclude, log_missing, added);
b54f36c6 981
69b1b241 982 } else {
b54f36c6 983 int id, r;
69b1b241
LP
984
985 id = seccomp_syscall_resolve_name(name);
cff7bff8 986 if (id == __NR_SCMP_ERROR) {
b54f36c6
ZJS
987 if (log_missing)
988 log_debug("System call %s is not known, ignoring.", name);
ff217dc3 989 return 0;
cff7bff8 990 }
69b1b241
LP
991
992 r = seccomp_rule_add_exact(seccomp, action, id, 0);
b54f36c6 993 if (r < 0) {
69b1b241 994 /* If the system call is not known on this architecture, then that's fine, let's ignore it */
7e86bd73
ZJS
995 bool ignore = r == -EDOM;
996
997 if (!ignore || log_missing)
998 log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
999 name, id, ignore ? ", ignoring" : "");
1000 if (!ignore)
1001 return r;
b54f36c6 1002 }
69b1b241 1003
000c0520
ZJS
1004 if (added) {
1005 r = strv_extend(added, name);
1006 if (r < 0)
1007 return r;
1008 }
1009
b54f36c6
ZJS
1010 return 0;
1011 }
69b1b241
LP
1012}
1013
000c0520 1014static int add_syscall_filter_set(
469830d1 1015 scmp_filter_ctx seccomp,
469830d1 1016 const SyscallFilterSet *set,
960e4569 1017 uint32_t action,
b54f36c6 1018 char **exclude,
000c0520
ZJS
1019 bool log_missing,
1020 char ***added) {
469830d1 1021
8130926d
LP
1022 const char *sys;
1023 int r;
1024
000c0520
ZJS
1025 /* Any syscalls that are handled are added to the *added strv. It needs to be initialized. */
1026
8130926d
LP
1027 assert(seccomp);
1028 assert(set);
1029
1030 NULSTR_FOREACH(sys, set->value) {
000c0520 1031 r = seccomp_add_syscall_filter_item(seccomp, sys, action, exclude, log_missing, added);
69b1b241
LP
1032 if (r < 0)
1033 return r;
469830d1
LP
1034 }
1035
1036 return 0;
1037}
1038
b54f36c6 1039int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing) {
469830d1
LP
1040 uint32_t arch;
1041 int r;
1042
1043 assert(set);
1044
1045 /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for
a90db619 1046 * each local arch. */
469830d1
LP
1047
1048 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1049 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1050
1051 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1052
1053 r = seccomp_init_for_arch(&seccomp, arch, default_action);
8130926d
LP
1054 if (r < 0)
1055 return r;
469830d1 1056
000c0520 1057 r = add_syscall_filter_set(seccomp, set, action, NULL, log_missing, NULL);
7e86bd73
ZJS
1058 if (r < 0)
1059 return log_debug_errno(r, "Failed to add filter set: %m");
469830d1
LP
1060
1061 r = seccomp_load(seccomp);
7bc5e0b1 1062 if (ERRNO_IS_SECCOMP_FATAL(r))
469830d1
LP
1063 return r;
1064 if (r < 0)
1065 log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
8130926d
LP
1066 }
1067
1068 return 0;
1069}
a3be2849 1070
1862b310 1071int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* filter, uint32_t action, bool log_missing) {
469830d1 1072 uint32_t arch;
a3be2849
LP
1073 int r;
1074
1862b310
YW
1075 /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Hashmap* of syscalls, instead
1076 * of a SyscallFilterSet* table. */
a3be2849 1077
1862b310 1078 if (hashmap_isempty(filter) && default_action == SCMP_ACT_ALLOW)
469830d1 1079 return 0;
a3be2849 1080
469830d1
LP
1081 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1082 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
b54f36c6 1083 void *syscall_id, *val;
a3be2849 1084
469830d1 1085 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
a3be2849 1086
469830d1
LP
1087 r = seccomp_init_for_arch(&seccomp, arch, default_action);
1088 if (r < 0)
1089 return r;
a3be2849 1090
1862b310 1091 HASHMAP_FOREACH_KEY(val, syscall_id, filter) {
8cfa775f 1092 uint32_t a = action;
b54f36c6
ZJS
1093 int id = PTR_TO_INT(syscall_id) - 1;
1094 int error = PTR_TO_INT(val);
8cfa775f 1095
005bfaf1
TM
1096 if (error == SECCOMP_ERROR_NUMBER_KILL)
1097 a = scmp_act_kill_process();
9df2cdd8
TM
1098#ifdef SCMP_ACT_LOG
1099 else if (action == SCMP_ACT_LOG)
1100 a = SCMP_ACT_LOG;
1101#endif
68acc1af 1102 else if (error >= 0)
b54f36c6 1103 a = SCMP_ACT_ERRNO(error);
8cfa775f 1104
b54f36c6 1105 r = seccomp_rule_add_exact(seccomp, a, id, 0);
469830d1 1106 if (r < 0) {
1862b310
YW
1107 /* If the system call is not known on this architecture, then that's
1108 * fine, let's ignore it */
469830d1 1109 _cleanup_free_ char *n = NULL;
7e86bd73 1110 bool ignore;
469830d1 1111
b54f36c6 1112 n = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, id);
7e86bd73
ZJS
1113 ignore = r == -EDOM;
1114 if (!ignore || log_missing)
1115 log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
1116 strna(n), id, ignore ? ", ignoring" : "");
1117 if (!ignore)
1118 return r;
469830d1
LP
1119 }
1120 }
1121
1122 r = seccomp_load(seccomp);
7bc5e0b1 1123 if (ERRNO_IS_SECCOMP_FATAL(r))
469830d1
LP
1124 return r;
1125 if (r < 0)
a52765a5 1126 log_debug_errno(r, "Failed to install system call filter for architecture %s, skipping: %m",
1862b310 1127 seccomp_arch_to_string(arch));
469830d1
LP
1128 }
1129
1130 return 0;
add00535
LP
1131}
1132
58f6ab44 1133int seccomp_parse_syscall_filter(
898748d8
YW
1134 const char *name,
1135 int errno_num,
1136 Hashmap *filter,
13d92c63 1137 SeccompParseFlags flags,
898748d8
YW
1138 const char *unit,
1139 const char *filename,
1140 unsigned line) {
1141
1142 int r;
1143
1144 assert(name);
1145 assert(filter);
1146
084a46d7
YW
1147 if (!FLAGS_SET(flags, SECCOMP_PARSE_INVERT) && errno_num >= 0)
1148 return -EINVAL;
1149
898748d8
YW
1150 if (name[0] == '@') {
1151 const SyscallFilterSet *set;
1152 const char *i;
1153
1154 set = syscall_filter_set_find(name);
1155 if (!set) {
9e29ee40 1156 if (!FLAGS_SET(flags, SECCOMP_PARSE_PERMISSIVE))
898748d8 1157 return -EINVAL;
13d92c63 1158
9e29ee40 1159 log_syntax(unit, FLAGS_SET(flags, SECCOMP_PARSE_LOG) ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
13d92c63
LP
1160 "Unknown system call group, ignoring: %s", name);
1161 return 0;
898748d8
YW
1162 }
1163
1164 NULSTR_FOREACH(i, set->value) {
13d92c63
LP
1165 /* Call ourselves again, for the group to parse. Note that we downgrade logging here (i.e. take
1166 * away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem,
1167 * not a problem in user configuration data and we shouldn't pretend otherwise by complaining
1168 * about them. */
58f6ab44 1169 r = seccomp_parse_syscall_filter(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line);
898748d8
YW
1170 if (r < 0)
1171 return r;
1172 }
1173 } else {
1174 int id;
1175
1176 id = seccomp_syscall_resolve_name(name);
1177 if (id == __NR_SCMP_ERROR) {
9e29ee40 1178 if (!FLAGS_SET(flags, SECCOMP_PARSE_PERMISSIVE))
898748d8 1179 return -EINVAL;
13d92c63 1180
9e29ee40 1181 log_syntax(unit, FLAGS_SET(flags, SECCOMP_PARSE_LOG) ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
13d92c63
LP
1182 "Failed to parse system call, ignoring: %s", name);
1183 return 0;
898748d8
YW
1184 }
1185
68acc1af
YW
1186 /* If we previously wanted to forbid a syscall and now we want to allow it, then remove
1187 * it from the list. The entries in allow-list with non-negative error value will be
1188 * handled with SCMP_ACT_ERRNO() instead of the default action. */
1189 if (!FLAGS_SET(flags, SECCOMP_PARSE_INVERT) == FLAGS_SET(flags, SECCOMP_PARSE_ALLOW_LIST) ||
1190 (FLAGS_SET(flags, SECCOMP_PARSE_INVERT | SECCOMP_PARSE_ALLOW_LIST) && errno_num >= 0)) {
898748d8
YW
1191 r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num));
1192 if (r < 0)
851ee70a
LW
1193 switch (r) {
1194 case -ENOMEM:
9e29ee40 1195 return FLAGS_SET(flags, SECCOMP_PARSE_LOG) ? log_oom() : -ENOMEM;
851ee70a 1196 case -EEXIST:
9d7fe7c6
LW
1197 assert_se(hashmap_update(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)) == 0);
1198 break;
851ee70a
LW
1199 default:
1200 return r;
1201 }
898748d8
YW
1202 } else
1203 (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
1204 }
1205
1206 return 0;
1207}
1208
add00535 1209int seccomp_restrict_namespaces(unsigned long retain) {
469830d1 1210 uint32_t arch;
add00535
LP
1211 int r;
1212
f1d34068 1213 if (DEBUG_LOGGING) {
add00535
LP
1214 _cleanup_free_ char *s = NULL;
1215
86c2a9f1 1216 (void) namespace_flags_to_string(retain, &s);
add00535
LP
1217 log_debug("Restricting namespace to: %s.", strna(s));
1218 }
1219
1220 /* NOOP? */
d7a0f1f4 1221 if (FLAGS_SET(retain, NAMESPACE_FLAGS_ALL))
add00535
LP
1222 return 0;
1223
469830d1
LP
1224 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1225 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
add00535 1226
469830d1
LP
1227 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1228
1229 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1230 if (r < 0)
1231 return r;
1232
30193fe8
ZJS
1233 /* We cannot filter on individual flags to clone3(), and we need to disable the
1234 * syscall altogether. ENOSYS is used instead of EPERM, so that glibc and other
1235 * users shall fall back to clone(), as if on an older kernel.
1236 *
1237 * C.f. https://github.com/flatpak/flatpak/commit/a10f52a7565c549612c92b8e736a6698a53db330,
1238 * https://github.com/moby/moby/issues/42680. */
1239
1240 r = seccomp_rule_add_exact(
1241 seccomp,
1242 SCMP_ACT_ERRNO(ENOSYS),
1243 SCMP_SYS(clone3),
1244 0);
1245 if (r < 0)
1246 log_debug_errno(r, "Failed to add clone3() rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
1247
469830d1
LP
1248 if ((retain & NAMESPACE_FLAGS_ALL) == 0)
1249 /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
1250 * altogether. */
1251 r = seccomp_rule_add_exact(
1252 seccomp,
1253 SCMP_ACT_ERRNO(EPERM),
1254 SCMP_SYS(setns),
1255 0);
1256 else
1257 /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the
1258 * special invocation with a zero flags argument, right here. */
1259 r = seccomp_rule_add_exact(
1260 seccomp,
1261 SCMP_ACT_ERRNO(EPERM),
1262 SCMP_SYS(setns),
1263 1,
1264 SCMP_A1(SCMP_CMP_EQ, 0));
1265 if (r < 0) {
1266 log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1267 continue;
1268 }
1269
077e8fc0 1270 for (unsigned i = 0; namespace_flag_map[i].name; i++) {
469830d1
LP
1271 unsigned long f;
1272
1273 f = namespace_flag_map[i].flag;
d7a0f1f4 1274 if (FLAGS_SET(retain, f)) {
469830d1
LP
1275 log_debug("Permitting %s.", namespace_flag_map[i].name);
1276 continue;
1277 }
1278
1279 log_debug("Blocking %s.", namespace_flag_map[i].name);
1280
1281 r = seccomp_rule_add_exact(
1282 seccomp,
1283 SCMP_ACT_ERRNO(EPERM),
1284 SCMP_SYS(unshare),
1285 1,
1286 SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
1287 if (r < 0) {
1288 log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1289 break;
1290 }
1291
511ceb1f
ZJS
1292 /* On s390/s390x the first two parameters to clone are switched */
1293 if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
ae9d60ce
LP
1294 r = seccomp_rule_add_exact(
1295 seccomp,
1296 SCMP_ACT_ERRNO(EPERM),
1297 SCMP_SYS(clone),
1298 1,
1299 SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
1300 else
1301 r = seccomp_rule_add_exact(
1302 seccomp,
1303 SCMP_ACT_ERRNO(EPERM),
1304 SCMP_SYS(clone),
1305 1,
1306 SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
469830d1
LP
1307 if (r < 0) {
1308 log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1309 break;
1310 }
1311
1312 if ((retain & NAMESPACE_FLAGS_ALL) != 0) {
1313 r = seccomp_rule_add_exact(
1314 seccomp,
1315 SCMP_ACT_ERRNO(EPERM),
1316 SCMP_SYS(setns),
1317 1,
1318 SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
1319 if (r < 0) {
1320 log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1321 break;
1322 }
1323 }
1324 }
1325 if (r < 0)
1326 continue;
1327
1328 r = seccomp_load(seccomp);
7bc5e0b1 1329 if (ERRNO_IS_SECCOMP_FATAL(r))
469830d1
LP
1330 return r;
1331 if (r < 0)
1332 log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1333 }
1334
1335 return 0;
1336}
1337
1338int seccomp_protect_sysctl(void) {
1339 uint32_t arch;
1340 int r;
1341
1342 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1343 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1344
1345 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1346
f9252236
AJ
1347 if (IN_SET(arch,
1348 SCMP_ARCH_AARCH64,
1349#ifdef SCMP_ARCH_RISCV64
1350 SCMP_ARCH_RISCV64,
1351#endif
1352 SCMP_ARCH_X32
1353 ))
2e64e8f4
ZJS
1354 /* No _sysctl syscall */
1355 continue;
1356
469830d1
LP
1357 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1358 if (r < 0)
1359 return r;
1360
1361 r = seccomp_rule_add_exact(
add00535
LP
1362 seccomp,
1363 SCMP_ACT_ERRNO(EPERM),
469830d1 1364 SCMP_SYS(_sysctl),
add00535 1365 0);
469830d1
LP
1366 if (r < 0) {
1367 log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1368 continue;
1369 }
1370
1371 r = seccomp_load(seccomp);
7bc5e0b1 1372 if (ERRNO_IS_SECCOMP_FATAL(r))
469830d1
LP
1373 return r;
1374 if (r < 0)
1375 log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1376 }
1377
1378 return 0;
1379}
1380
620dbdd2
KK
1381int seccomp_protect_syslog(void) {
1382 uint32_t arch;
1383 int r;
1384
1385 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1386 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1387
1388 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1389 if (r < 0)
1390 return r;
1391
1392 r = seccomp_rule_add_exact(
1393 seccomp,
1394 SCMP_ACT_ERRNO(EPERM),
1395 SCMP_SYS(syslog),
1396 0);
1397
1398 if (r < 0) {
1399 log_debug_errno(r, "Failed to add syslog() rule for architecture %s, skipping %m", seccomp_arch_to_string(arch));
1400 continue;
1401 }
1402
1403 r = seccomp_load(seccomp);
1404 if (ERRNO_IS_SECCOMP_FATAL(r))
1405 return r;
1406 if (r < 0)
1407 log_debug_errno(r, "Failed to install syslog protection rules for architecture %s, skipping %m", seccomp_arch_to_string(arch));
1408 }
1409
1410 return 0;
1411}
1412
6b000af4 1413int seccomp_restrict_address_families(Set *address_families, bool allow_list) {
469830d1
LP
1414 uint32_t arch;
1415 int r;
1416
1417 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1418 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
9606bc4b 1419 bool supported;
469830d1
LP
1420
1421 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1422
9606bc4b
LP
1423 switch (arch) {
1424
1425 case SCMP_ARCH_X86_64:
1426 case SCMP_ARCH_X32:
1427 case SCMP_ARCH_ARM:
1428 case SCMP_ARCH_AARCH64:
f5aeac14
JC
1429 case SCMP_ARCH_MIPSEL64N32:
1430 case SCMP_ARCH_MIPS64N32:
1431 case SCMP_ARCH_MIPSEL64:
1432 case SCMP_ARCH_MIPS64:
f9252236
AJ
1433#ifdef SCMP_ARCH_RISCV64
1434 case SCMP_ARCH_RISCV64:
1435#endif
9606bc4b
LP
1436 /* These we know we support (i.e. are the ones that do not use socketcall()) */
1437 supported = true;
1438 break;
1439
9606bc4b
LP
1440 case SCMP_ARCH_S390:
1441 case SCMP_ARCH_S390X:
da1921a5 1442 case SCMP_ARCH_X86:
f5aeac14
JC
1443 case SCMP_ARCH_MIPSEL:
1444 case SCMP_ARCH_MIPS:
d5923e38
ZJS
1445 case SCMP_ARCH_PPC:
1446 case SCMP_ARCH_PPC64:
1447 case SCMP_ARCH_PPC64LE:
9606bc4b
LP
1448 default:
1449 /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
1450 * don't know */
1451 supported = false;
1452 break;
1453 }
1454
1455 if (!supported)
1456 continue;
1457
469830d1
LP
1458 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1459 if (r < 0)
1460 return r;
1461
6b000af4 1462 if (allow_list) {
077e8fc0 1463 int first = 0, last = 0;
469830d1
LP
1464 void *afp;
1465
6b000af4
LP
1466 /* If this is an allow list, we first block the address families that are out of
1467 * range and then everything that is not in the set. First, we find the lowest and
1468 * highest address family in the set. */
469830d1 1469
90e74a66 1470 SET_FOREACH(afp, address_families) {
077e8fc0 1471 int af = PTR_TO_INT(afp);
469830d1
LP
1472
1473 if (af <= 0 || af >= af_max())
1474 continue;
1475
1476 if (first == 0 || af < first)
1477 first = af;
1478
1479 if (last == 0 || af > last)
1480 last = af;
1481 }
1482
1483 assert((first == 0) == (last == 0));
1484
1485 if (first == 0) {
1486
1487 /* No entries in the valid range, block everything */
1488 r = seccomp_rule_add_exact(
1489 seccomp,
1490 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1491 SCMP_SYS(socket),
1492 0);
1493 if (r < 0) {
1494 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1495 continue;
1496 }
1497
1498 } else {
1499
1500 /* Block everything below the first entry */
1501 r = seccomp_rule_add_exact(
1502 seccomp,
1503 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1504 SCMP_SYS(socket),
1505 1,
1506 SCMP_A0(SCMP_CMP_LT, first));
1507 if (r < 0) {
1508 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1509 continue;
1510 }
1511
1512 /* Block everything above the last entry */
1513 r = seccomp_rule_add_exact(
1514 seccomp,
1515 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1516 SCMP_SYS(socket),
1517 1,
1518 SCMP_A0(SCMP_CMP_GT, last));
1519 if (r < 0) {
1520 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1521 continue;
1522 }
1523
1524 /* Block everything between the first and last entry */
077e8fc0 1525 for (int af = 1; af < af_max(); af++) {
469830d1
LP
1526
1527 if (set_contains(address_families, INT_TO_PTR(af)))
1528 continue;
1529
1530 r = seccomp_rule_add_exact(
1531 seccomp,
1532 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1533 SCMP_SYS(socket),
1534 1,
1535 SCMP_A0(SCMP_CMP_EQ, af));
1536 if (r < 0)
1537 break;
1538 }
469830d1
LP
1539 if (r < 0) {
1540 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1541 continue;
1542 }
1543 }
1544
1545 } else {
1546 void *af;
1547
6b000af4
LP
1548 /* If this is a deny list, then generate one rule for each address family that are
1549 * then combined in OR checks. */
469830d1 1550
90e74a66 1551 SET_FOREACH(af, address_families) {
469830d1
LP
1552 r = seccomp_rule_add_exact(
1553 seccomp,
1554 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1555 SCMP_SYS(socket),
1556 1,
1557 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1558 if (r < 0)
1559 break;
1560 }
469830d1
LP
1561 if (r < 0) {
1562 log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1563 continue;
1564 }
1565 }
1566
1567 r = seccomp_load(seccomp);
7bc5e0b1 1568 if (ERRNO_IS_SECCOMP_FATAL(r))
469830d1
LP
1569 return r;
1570 if (r < 0)
1571 log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1572 }
1573
1574 return 0;
1575}
1576
1577int seccomp_restrict_realtime(void) {
1578 static const int permitted_policies[] = {
1579 SCHED_OTHER,
1580 SCHED_BATCH,
1581 SCHED_IDLE,
1582 };
1583
1584 int r, max_policy = 0;
1585 uint32_t arch;
1586 unsigned i;
1587
1588 /* Determine the highest policy constant we want to allow */
1589 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1590 if (permitted_policies[i] > max_policy)
1591 max_policy = permitted_policies[i];
1592
1593 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1594 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1595 int p;
1596
1597 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1598
1599 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1600 if (r < 0)
1601 return r;
1602
1603 /* Go through all policies with lower values than that, and block them -- unless they appear in the
6b000af4 1604 * allow list. */
469830d1
LP
1605 for (p = 0; p < max_policy; p++) {
1606 bool good = false;
1607
6b000af4 1608 /* Check if this is in the allow list. */
469830d1
LP
1609 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1610 if (permitted_policies[i] == p) {
1611 good = true;
1612 break;
1613 }
1614
1615 if (good)
1616 continue;
1617
1618 /* Deny this policy */
1619 r = seccomp_rule_add_exact(
1620 seccomp,
1621 SCMP_ACT_ERRNO(EPERM),
1622 SCMP_SYS(sched_setscheduler),
1623 1,
1624 SCMP_A1(SCMP_CMP_EQ, p));
1625 if (r < 0) {
1626 log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1627 continue;
1628 }
1629 }
1630
6b000af4
LP
1631 /* Deny-list all other policies, i.e. the ones with higher values. Note that all comparisons
1632 * are unsigned here, hence no need no check for < 0 values. */
469830d1 1633 r = seccomp_rule_add_exact(
add00535
LP
1634 seccomp,
1635 SCMP_ACT_ERRNO(EPERM),
469830d1 1636 SCMP_SYS(sched_setscheduler),
add00535 1637 1,
469830d1
LP
1638 SCMP_A1(SCMP_CMP_GT, max_policy));
1639 if (r < 0) {
1640 log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1641 continue;
1642 }
add00535 1643
469830d1 1644 r = seccomp_load(seccomp);
7bc5e0b1 1645 if (ERRNO_IS_SECCOMP_FATAL(r))
469830d1
LP
1646 return r;
1647 if (r < 0)
1648 log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1649 }
1650
1651 return 0;
1652}
1653
6dc66688
ZJS
1654static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
1655 uint32_t arch,
1656 int nr,
14cb109d 1657 unsigned arg_cnt,
6dc66688
ZJS
1658 const struct scmp_arg_cmp arg) {
1659 int r;
1660
1661 r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
1662 if (r < 0) {
1663 _cleanup_free_ char *n = NULL;
1664
1665 n = seccomp_syscall_resolve_num_arch(arch, nr);
1666 log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
1667 strna(n),
1668 seccomp_arch_to_string(arch));
1669 }
1670
1671 return r;
1672}
1673
2a8d6e63 1674/* For known architectures, check that syscalls are indeed defined or not. */
f9252236 1675#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)
2a8d6e63
ZJS
1676assert_cc(SCMP_SYS(shmget) > 0);
1677assert_cc(SCMP_SYS(shmat) > 0);
1678assert_cc(SCMP_SYS(shmdt) > 0);
2a8d6e63 1679#endif
6dc66688 1680
469830d1
LP
1681int seccomp_memory_deny_write_execute(void) {
1682 uint32_t arch;
b069c2a3 1683 unsigned loaded = 0;
469830d1
LP
1684
1685 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1686 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
b069c2a3 1687 int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0, r;
add00535 1688
469830d1
LP
1689 log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
1690
8a50cf69
LP
1691 switch (arch) {
1692
bed4668d
CE
1693 /* Note that on some architectures shmat() isn't available, and the call is multiplexed through ipc().
1694 * We ignore that here, which means there's still a way to get writable/executable
1695 * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
1696
8a50cf69 1697 case SCMP_ARCH_X86:
57311925 1698 case SCMP_ARCH_S390:
8a50cf69
LP
1699 filter_syscall = SCMP_SYS(mmap2);
1700 block_syscall = SCMP_SYS(mmap);
bed4668d 1701 /* shmat multiplexed, see above */
2a8d6e63
ZJS
1702 break;
1703
63d00dfb 1704 case SCMP_ARCH_PPC:
2a8d6e63
ZJS
1705 case SCMP_ARCH_PPC64:
1706 case SCMP_ARCH_PPC64LE:
bed4668d 1707 case SCMP_ARCH_S390X:
2a8d6e63 1708 filter_syscall = SCMP_SYS(mmap);
bed4668d 1709 /* shmat multiplexed, see above */
8a50cf69
LP
1710 break;
1711
4278d1f5
ZJS
1712 case SCMP_ARCH_ARM:
1713 filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
1714 shmat_syscall = SCMP_SYS(shmat);
1715 break;
1716
8a50cf69
LP
1717 case SCMP_ARCH_X86_64:
1718 case SCMP_ARCH_X32:
79873bc8 1719 case SCMP_ARCH_AARCH64:
f9252236
AJ
1720#ifdef SCMP_ARCH_RISCV64
1721 case SCMP_ARCH_RISCV64:
1722#endif
1723 filter_syscall = SCMP_SYS(mmap); /* amd64, x32, arm64 and riscv64 have only mmap */
8a50cf69
LP
1724 shmat_syscall = SCMP_SYS(shmat);
1725 break;
1726
1727 /* Please add more definitions here, if you port systemd to other architectures! */
1728
f9252236 1729#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__) && !defined(__s390__) && !defined(__s390x__) && !(defined(__riscv) && __riscv_xlen == 64)
8a50cf69
LP
1730#warning "Consider adding the right mmap() syscall definitions here!"
1731#endif
1732 }
1733
1734 /* Can't filter mmap() on this arch, then skip it */
1735 if (filter_syscall == 0)
1736 continue;
1737
469830d1
LP
1738 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1739 if (r < 0)
1740 return r;
1741
6dc66688
ZJS
1742 r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
1743 1,
1744 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
1745 if (r < 0)
1746 continue;
8a50cf69
LP
1747
1748 if (block_syscall != 0) {
6dc66688
ZJS
1749 r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
1750 if (r < 0)
8a50cf69 1751 continue;
add00535 1752 }
a3be2849 1753
6dc66688
ZJS
1754 r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
1755 1,
b835eeb4
ZJS
1756 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
1757 if (r < 0)
1758 continue;
1759
1760 r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(pkey_mprotect),
1761 1,
6dc66688
ZJS
1762 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
1763 if (r < 0)
469830d1 1764 continue;
add00535 1765
67fb5f33 1766 if (shmat_syscall > 0) {
5ef3ed97 1767 r = add_seccomp_syscall_filter(seccomp, arch, shmat_syscall,
6dc66688
ZJS
1768 1,
1769 SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
1770 if (r < 0)
8a50cf69 1771 continue;
469830d1
LP
1772 }
1773
1774 r = seccomp_load(seccomp);
7bc5e0b1 1775 if (ERRNO_IS_SECCOMP_FATAL(r))
469830d1 1776 return r;
add00535 1777 if (r < 0)
b069c2a3
ZJS
1778 log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m",
1779 seccomp_arch_to_string(arch));
903659e7 1780 loaded++;
469830d1 1781 }
add00535 1782
903659e7 1783 if (loaded == 0)
b069c2a3 1784 log_debug("Failed to install any seccomp rules for MemoryDenyWriteExecute=.");
903659e7
CE
1785
1786 return loaded;
469830d1
LP
1787}
1788
1789int seccomp_restrict_archs(Set *archs) {
1790 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
469830d1 1791 int r;
65976868 1792 bool blocked_new = false;
469830d1
LP
1793
1794 /* This installs a filter with no rules, but that restricts the system call architectures to the specified
2428aaf8
AJ
1795 * list.
1796 *
1797 * There are some qualifications. However the most important use is to stop processes from bypassing
1798 * system call restrictions, in case they used a broader (multiplexing) syscall which is only available
1799 * in a non-native architecture. There are no holes in this use case, at least so far. */
469830d1 1800
2428aaf8
AJ
1801 /* Note libseccomp includes our "native" (current) architecture in the filter by default.
1802 * We do not remove it. For example, our callers expect to be able to call execve() afterwards
1803 * to run a program with the restrictions applied. */
469830d1
LP
1804 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1805 if (!seccomp)
1806 return -ENOMEM;
1807
65976868
GDF
1808 for (unsigned i = 0; seccomp_local_archs[i] != SECCOMP_LOCAL_ARCH_END; ++i) {
1809 uint32_t arch = seccomp_local_archs[i];
2428aaf8 1810
f833df38
BB
1811 /* See above comment, our "native" architecture is never blocked. */
1812 if (arch == seccomp_arch_native())
1813 continue;
1814
65976868
GDF
1815 /* That architecture might have already been blocked by a previous call to seccomp_restrict_archs. */
1816 if (arch == SECCOMP_LOCAL_ARCH_BLOCKED)
1817 continue;
2428aaf8 1818
65976868 1819 bool block = !set_contains(archs, UINT32_TO_PTR(arch + 1));
2428aaf8 1820
65976868
GDF
1821 /* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32
1822 * x32 syscalls should basically match x86-64 for everything except the pointer type.
1823 * The important thing is that you can block the old 32-bit x86 syscalls.
1824 * https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */
1825 if (block && arch == SCMP_ARCH_X86_64 && seccomp_arch_native() == SCMP_ARCH_X32)
1826 block = !set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1));
1827
1828 if (block) {
1829 seccomp_local_archs[i] = SECCOMP_LOCAL_ARCH_BLOCKED;
1830 blocked_new = true;
1831 } else {
1832 r = seccomp_arch_add(seccomp, arch);
1833 if (r < 0 && r != -EEXIST)
1834 return r;
1835 }
add00535
LP
1836 }
1837
65976868
GDF
1838 /* All architectures that will be blocked by the seccomp program were
1839 * already blocked. */
1840 if (!blocked_new)
1841 return 0;
1842
469830d1
LP
1843 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1844 if (r < 0)
1845 return r;
add00535 1846
1c6af69b 1847 r = seccomp_load(seccomp);
7bc5e0b1 1848 if (ERRNO_IS_SECCOMP_FATAL(r))
1c6af69b
LP
1849 return r;
1850 if (r < 0)
1851 log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m");
1852
1853 return 0;
a3be2849 1854}
b16bd535 1855
de7fef4b
ZJS
1856int parse_syscall_archs(char **l, Set **ret_archs) {
1857 _cleanup_set_free_ Set *archs = NULL;
b16bd535
YW
1858 int r;
1859
1860 assert(l);
de7fef4b 1861 assert(ret_archs);
b16bd535
YW
1862
1863 STRV_FOREACH(s, l) {
1864 uint32_t a;
1865
1866 r = seccomp_arch_from_string(*s, &a);
1867 if (r < 0)
1868 return -EINVAL;
1869
de7fef4b 1870 r = set_ensure_put(&archs, NULL, UINT32_TO_PTR(a + 1));
b16bd535
YW
1871 if (r < 0)
1872 return -ENOMEM;
1873 }
1874
de7fef4b 1875 *ret_archs = TAKE_PTR(archs);
b16bd535
YW
1876 return 0;
1877}
165a31c0 1878
8cfa775f 1879int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) {
165a31c0
LP
1880 const char *i;
1881 int r;
1882
1883 assert(set);
1884
1885 NULSTR_FOREACH(i, set->value) {
1886
1887 if (i[0] == '@') {
1888 const SyscallFilterSet *more;
1889
1890 more = syscall_filter_set_find(i);
1891 if (!more)
1892 return -ENXIO;
1893
165a31c0
LP
1894 r = seccomp_filter_set_add(filter, add, more);
1895 if (r < 0)
1896 return r;
1897 } else {
1898 int id;
1899
1900 id = seccomp_syscall_resolve_name(i);
ff217dc3
LP
1901 if (id == __NR_SCMP_ERROR) {
1902 log_debug("Couldn't resolve system call, ignoring: %s", i);
1903 continue;
1904 }
165a31c0
LP
1905
1906 if (add) {
8cfa775f 1907 r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1));
165a31c0
LP
1908 if (r < 0)
1909 return r;
1910 } else
8cfa775f 1911 (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
165a31c0
LP
1912 }
1913 }
1914
1915 return 0;
1916}
78e864e5
TM
1917
1918int seccomp_lock_personality(unsigned long personality) {
72eafe71 1919 uint32_t arch;
78e864e5
TM
1920 int r;
1921
72eafe71
LP
1922 if (personality >= PERSONALITY_INVALID)
1923 return -EINVAL;
78e864e5 1924
72eafe71
LP
1925 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1926 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
78e864e5 1927
72eafe71
LP
1928 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1929 if (r < 0)
1930 return r;
1931
1932 r = seccomp_rule_add_exact(
1933 seccomp,
1934 SCMP_ACT_ERRNO(EPERM),
1935 SCMP_SYS(personality),
1936 1,
1937 SCMP_A0(SCMP_CMP_NE, personality));
448ac526
LP
1938 if (r < 0) {
1939 log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1940 continue;
1941 }
72eafe71
LP
1942
1943 r = seccomp_load(seccomp);
7bc5e0b1 1944 if (ERRNO_IS_SECCOMP_FATAL(r))
72eafe71
LP
1945 return r;
1946 if (r < 0)
1947 log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1948 }
1949
1950 return 0;
78e864e5 1951}
aecd5ac6
TM
1952
1953int seccomp_protect_hostname(void) {
1954 uint32_t arch;
1955 int r;
1956
1957 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
1958 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
1959
1960 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
1961 if (r < 0)
1962 return r;
1963
1964 r = seccomp_rule_add_exact(
1965 seccomp,
1966 SCMP_ACT_ERRNO(EPERM),
1967 SCMP_SYS(sethostname),
1968 0);
9e6e543c
LP
1969 if (r < 0) {
1970 log_debug_errno(r, "Failed to add sethostname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
aecd5ac6 1971 continue;
9e6e543c 1972 }
aecd5ac6
TM
1973
1974 r = seccomp_rule_add_exact(
1975 seccomp,
1976 SCMP_ACT_ERRNO(EPERM),
1977 SCMP_SYS(setdomainname),
1978 0);
9e6e543c
LP
1979 if (r < 0) {
1980 log_debug_errno(r, "Failed to add setdomainname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
aecd5ac6 1981 continue;
9e6e543c 1982 }
aecd5ac6
TM
1983
1984 r = seccomp_load(seccomp);
7bc5e0b1 1985 if (ERRNO_IS_SECCOMP_FATAL(r))
aecd5ac6
TM
1986 return r;
1987 if (r < 0)
1988 log_debug_errno(r, "Failed to apply hostname restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
1989 }
1990
1991 return 0;
1992}
3c27973b 1993
da4dc9a6
ZJS
1994static int seccomp_restrict_sxid(scmp_filter_ctx seccomp, mode_t m) {
1995 /* Checks the mode_t parameter of the following system calls:
1996 *
1997 * → chmod() + fchmod() + fchmodat()
1998 * → open() + creat() + openat()
1999 * → mkdir() + mkdirat()
2000 * → mknod() + mknodat()
2001 *
2002 * Returns error if *everything* failed, and 0 otherwise.
2003 */
6d95e7d9 2004 int r;
da4dc9a6
ZJS
2005 bool any = false;
2006
2007 r = seccomp_rule_add_exact(
2008 seccomp,
2009 SCMP_ACT_ERRNO(EPERM),
2010 SCMP_SYS(chmod),
2011 1,
2012 SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
2013 if (r < 0)
2014 log_debug_errno(r, "Failed to add filter for chmod: %m");
2015 else
2016 any = true;
2017
2018 r = seccomp_rule_add_exact(
2019 seccomp,
2020 SCMP_ACT_ERRNO(EPERM),
2021 SCMP_SYS(fchmod),
2022 1,
2023 SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
2024 if (r < 0)
2025 log_debug_errno(r, "Failed to add filter for fchmod: %m");
2026 else
2027 any = true;
2028
2029 r = seccomp_rule_add_exact(
2030 seccomp,
2031 SCMP_ACT_ERRNO(EPERM),
2032 SCMP_SYS(fchmodat),
2033 1,
2034 SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
2035 if (r < 0)
2036 log_debug_errno(r, "Failed to add filter for fchmodat: %m");
2037 else
2038 any = true;
2039
2040 r = seccomp_rule_add_exact(
2041 seccomp,
2042 SCMP_ACT_ERRNO(EPERM),
2043 SCMP_SYS(mkdir),
2044 1,
2045 SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
2046 if (r < 0)
2047 log_debug_errno(r, "Failed to add filter for mkdir: %m");
2048 else
2049 any = true;
2050
2051 r = seccomp_rule_add_exact(
2052 seccomp,
2053 SCMP_ACT_ERRNO(EPERM),
2054 SCMP_SYS(mkdirat),
2055 1,
2056 SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
2057 if (r < 0)
2058 log_debug_errno(r, "Failed to add filter for mkdirat: %m");
2059 else
2060 any = true;
2061
2062 r = seccomp_rule_add_exact(
2063 seccomp,
2064 SCMP_ACT_ERRNO(EPERM),
2065 SCMP_SYS(mknod),
2066 1,
2067 SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
2068 if (r < 0)
2069 log_debug_errno(r, "Failed to add filter for mknod: %m");
2070 else
2071 any = true;
2072
2073 r = seccomp_rule_add_exact(
2074 seccomp,
2075 SCMP_ACT_ERRNO(EPERM),
2076 SCMP_SYS(mknodat),
2077 1,
2078 SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
2079 if (r < 0)
2080 log_debug_errno(r, "Failed to add filter for mknodat: %m");
2081 else
2082 any = true;
2083
da4dc9a6
ZJS
2084 r = seccomp_rule_add_exact(
2085 seccomp,
2086 SCMP_ACT_ERRNO(EPERM),
2087 SCMP_SYS(open),
2088 2,
2089 SCMP_A1(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
2090 SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
2091 if (r < 0)
2092 log_debug_errno(r, "Failed to add filter for open: %m");
2093 else
2094 any = true;
da4dc9a6
ZJS
2095
2096 r = seccomp_rule_add_exact(
2097 seccomp,
2098 SCMP_ACT_ERRNO(EPERM),
2099 SCMP_SYS(openat),
2100 2,
2101 SCMP_A2(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
2102 SCMP_A3(SCMP_CMP_MASKED_EQ, m, m));
2103 if (r < 0)
2104 log_debug_errno(r, "Failed to add filter for openat: %m");
2105 else
2106 any = true;
2107
ecc04067
LP
2108#if defined(__SNR_openat2)
2109 /* The new openat2() system call can't be filtered sensibly, since it moves the flags parameter into
2110 * an indirect structure. Let's block it entirely for now. That should be a reasonably OK thing to do
2111 * for now, since openat2() is very new and code generally needs fallback logic anyway to be
57353d29
MG
2112 * compatible with kernels that are not absolutely recent. We would normally return EPERM for a
2113 * policy check, but this isn't strictly a policy check. Instead, we return ENOSYS to force programs
2114 * to call open() or openat() instead. We can properly enforce policy for those functions. */
ecc04067
LP
2115 r = seccomp_rule_add_exact(
2116 seccomp,
57353d29 2117 SCMP_ACT_ERRNO(ENOSYS),
ecc04067
LP
2118 SCMP_SYS(openat2),
2119 0);
2120 if (r < 0)
2121 log_debug_errno(r, "Failed to add filter for openat2: %m");
2122 else
2123 any = true;
2124#endif
2125
da4dc9a6
ZJS
2126 r = seccomp_rule_add_exact(
2127 seccomp,
2128 SCMP_ACT_ERRNO(EPERM),
2129 SCMP_SYS(creat),
2130 1,
2131 SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
2132 if (r < 0)
2133 log_debug_errno(r, "Failed to add filter for creat: %m");
2134 else
2135 any = true;
2136
2137 return any ? 0 : r;
2138}
2139
3c27973b
LP
2140int seccomp_restrict_suid_sgid(void) {
2141 uint32_t arch;
da4dc9a6 2142 int r, k;
3c27973b
LP
2143
2144 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
2145 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
2146
2147 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
2148 if (r < 0)
2149 return r;
2150
da4dc9a6
ZJS
2151 r = seccomp_restrict_sxid(seccomp, S_ISUID);
2152 if (r < 0)
2153 log_debug_errno(r, "Failed to add suid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
3c27973b 2154
da4dc9a6
ZJS
2155 k = seccomp_restrict_sxid(seccomp, S_ISGID);
2156 if (k < 0)
2157 log_debug_errno(r, "Failed to add sgid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
3c27973b 2158
da4dc9a6 2159 if (r < 0 && k < 0)
3c27973b 2160 continue;
3c27973b
LP
2161
2162 r = seccomp_load(seccomp);
7bc5e0b1 2163 if (ERRNO_IS_SECCOMP_FATAL(r))
3c27973b
LP
2164 return r;
2165 if (r < 0)
2166 log_debug_errno(r, "Failed to apply suid/sgid restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
2167 }
2168
2169 return 0;
2170}
915fb324
LP
2171
2172uint32_t scmp_act_kill_process(void) {
2173
2174 /* Returns SCMP_ACT_KILL_PROCESS if it's supported, and SCMP_ACT_KILL_THREAD otherwise. We never
2175 * actually want to use SCMP_ACT_KILL_THREAD as its semantics are nuts (killing arbitrary threads of
2176 * a program is just a bad idea), but on old kernels/old libseccomp it is all we have, and at least
2177 * for single-threaded apps does the right thing. */
2178
2179#ifdef SCMP_ACT_KILL_PROCESS
2180 if (seccomp_api_get() >= 3)
2181 return SCMP_ACT_KILL_PROCESS;
2182#endif
2183
2184 return SCMP_ACT_KILL; /* same as SCMP_ACT_KILL_THREAD */
2185}
22eadc28
YW
2186
2187int parse_syscall_and_errno(const char *in, char **name, int *error) {
2188 _cleanup_free_ char *n = NULL;
2189 char *p;
2190 int e = -1;
2191
2192 assert(in);
2193 assert(name);
2194 assert(error);
2195
2196 /*
2197 * This parse "syscall:errno" like "uname:EILSEQ", "@sync:255".
2198 * If errno is omitted, then error is set to -1.
2199 * Empty syscall name is not allowed.
2200 * Here, we do not check that the syscall name is valid or not.
2201 */
2202
2203 p = strchr(in, ':');
2204 if (p) {
2205 e = seccomp_parse_errno_or_action(p + 1);
2206 if (e < 0)
2207 return e;
2208
2209 n = strndup(in, p - in);
2210 } else
2211 n = strdup(in);
2212
2213 if (!n)
2214 return -ENOMEM;
2215
2216 if (isempty(n))
2217 return -EINVAL;
2218
2219 *error = e;
2220 *name = TAKE_PTR(n);
2221
2222 return 0;
2223}
4a4654e0
LP
2224
2225static int block_open_flag(scmp_filter_ctx seccomp, int flag) {
2226 bool any = false;
2227 int r;
2228
2229 /* Blocks open() with the specified flag, where flag is O_SYNC or so. This makes these calls return
2230 * EINVAL, in the hope the client code will retry without O_SYNC then. */
2231
4a4654e0
LP
2232 r = seccomp_rule_add_exact(
2233 seccomp,
2234 SCMP_ACT_ERRNO(EINVAL),
2235 SCMP_SYS(open),
2236 1,
2237 SCMP_A1(SCMP_CMP_MASKED_EQ, flag, flag));
2238 if (r < 0)
2239 log_debug_errno(r, "Failed to add filter for open: %m");
2240 else
2241 any = true;
4a4654e0
LP
2242
2243 r = seccomp_rule_add_exact(
2244 seccomp,
2245 SCMP_ACT_ERRNO(EINVAL),
2246 SCMP_SYS(openat),
2247 1,
2248 SCMP_A2(SCMP_CMP_MASKED_EQ, flag, flag));
2249 if (r < 0)
2250 log_debug_errno(r, "Failed to add filter for openat: %m");
2251 else
2252 any = true;
2253
2254#if defined(__SNR_openat2)
2255 /* The new openat2() system call can't be filtered sensibly, see above. */
2256 r = seccomp_rule_add_exact(
2257 seccomp,
2258 SCMP_ACT_ERRNO(ENOSYS),
2259 SCMP_SYS(openat2),
2260 0);
2261 if (r < 0)
2262 log_debug_errno(r, "Failed to add filter for openat2: %m");
2263 else
2264 any = true;
2265#endif
2266
2267 return any ? 0 : r;
2268}
2269
2270int seccomp_suppress_sync(void) {
2271 uint32_t arch;
2272 int r;
2273
2274 /* This is mostly identical to SystemCallFilter=~@sync:0, but simpler to use, and separately
2275 * manageable, and also masks O_SYNC/O_DSYNC */
2276
2277 SECCOMP_FOREACH_LOCAL_ARCH(arch) {
2278 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
2279 const char *c;
2280
2281 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
2282 if (r < 0)
2283 return r;
2284
2285 NULSTR_FOREACH(c, syscall_filter_sets[SYSCALL_FILTER_SET_SYNC].value) {
2286 int id;
2287
2288 id = seccomp_syscall_resolve_name(c);
2289 if (id == __NR_SCMP_ERROR) {
2290 log_debug("System call %s is not known, ignoring.", c);
2291 continue;
2292 }
2293
2294 r = seccomp_rule_add_exact(
2295 seccomp,
2296 SCMP_ACT_ERRNO(0), /* success → we want this to be a NOP after all */
2297 id,
2298 0);
2299 if (r < 0)
2300 log_debug_errno(r, "Failed to add filter for system call %s, ignoring: %m", c);
2301 }
2302
2303 (void) block_open_flag(seccomp, O_SYNC);
2304#if O_DSYNC != O_SYNC
2305 (void) block_open_flag(seccomp, O_DSYNC);
2306#endif
2307
2308 r = seccomp_load(seccomp);
2309 if (ERRNO_IS_SECCOMP_FATAL(r))
2310 return r;
2311 if (r < 0)
2312 log_debug_errno(r, "Failed to apply sync() suppression for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
2313 }
2314
2315 return 0;
2316}