]>
Commit | Line | Data |
---|---|---|
f011b0b8 DH |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2016 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
18 | ***/ | |
19 | ||
20 | #include <errno.h> | |
21 | #include <linux/netlink.h> | |
22 | #include <sys/capability.h> | |
23 | #include <sys/types.h> | |
24 | ||
25 | #ifdef HAVE_SECCOMP | |
26 | #include <seccomp.h> | |
27 | #endif | |
28 | ||
469830d1 | 29 | #include "alloc-util.h" |
f011b0b8 | 30 | #include "log.h" |
469830d1 | 31 | #include "nspawn-seccomp.h" |
f011b0b8 DH |
32 | #ifdef HAVE_SECCOMP |
33 | #include "seccomp-util.h" | |
34 | #endif | |
469830d1 | 35 | #include "string-util.h" |
960e4569 | 36 | #include "strv.h" |
f011b0b8 DH |
37 | |
38 | #ifdef HAVE_SECCOMP | |
39 | ||
469830d1 LP |
40 | static int seccomp_add_default_syscall_filter( |
41 | scmp_filter_ctx ctx, | |
42 | uint32_t arch, | |
960e4569 LP |
43 | uint64_t cap_list_retain, |
44 | char **syscall_whitelist, | |
45 | char **syscall_blacklist) { | |
469830d1 | 46 | |
f011b0b8 DH |
47 | static const struct { |
48 | uint64_t capability; | |
402530d9 | 49 | const char* name; |
f011b0b8 | 50 | } blacklist[] = { |
7609340e LP |
51 | { 0, "@obsolete" }, |
52 | { 0, "@keyring" }, /* keyring is not namespaced */ | |
402530d9 | 53 | { 0, "bpf" }, |
402530d9 | 54 | { 0, "kexec_file_load" }, |
402530d9 | 55 | { 0, "kexec_load" }, |
402530d9 | 56 | { 0, "lookup_dcookie" }, |
402530d9 LP |
57 | { 0, "open_by_handle_at" }, |
58 | { 0, "perf_event_open" }, | |
402530d9 | 59 | { 0, "quotactl" }, |
7609340e | 60 | { 0, "@swap" }, |
402530d9 | 61 | { CAP_SYSLOG, "syslog" }, |
7609340e | 62 | { CAP_SYS_MODULE, "@module" }, |
402530d9 LP |
63 | { CAP_SYS_PACCT, "acct" }, |
64 | { CAP_SYS_PTRACE, "process_vm_readv" }, | |
65 | { CAP_SYS_PTRACE, "process_vm_writev" }, | |
66 | { CAP_SYS_PTRACE, "ptrace" }, | |
7609340e LP |
67 | { CAP_SYS_RAWIO, "@raw-io" }, |
68 | { CAP_SYS_TIME, "@clock" }, | |
f011b0b8 | 69 | }; |
402530d9 | 70 | |
469830d1 | 71 | int r, c = 0; |
402530d9 | 72 | size_t i; |
960e4569 | 73 | char **p; |
f011b0b8 DH |
74 | |
75 | for (i = 0; i < ELEMENTSOF(blacklist); i++) { | |
54a17e01 | 76 | if (blacklist[i].capability != 0 && (cap_list_retain & (1ULL << blacklist[i].capability))) |
f011b0b8 DH |
77 | continue; |
78 | ||
960e4569 | 79 | r = seccomp_add_syscall_filter_item(ctx, blacklist[i].name, SCMP_ACT_ERRNO(EPERM), syscall_whitelist); |
402530d9 | 80 | if (r < 0) |
469830d1 | 81 | /* If the system call is not known on this architecture, then that's fine, let's ignore it */ |
402530d9 LP |
82 | log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", blacklist[i].name); |
83 | else | |
469830d1 | 84 | c++; |
f011b0b8 DH |
85 | } |
86 | ||
960e4569 LP |
87 | STRV_FOREACH(p, syscall_blacklist) { |
88 | r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ERRNO(EPERM), syscall_whitelist); | |
89 | if (r < 0) | |
90 | log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", *p); | |
91 | else | |
92 | c++; | |
93 | } | |
94 | ||
469830d1 | 95 | return c; |
f011b0b8 DH |
96 | } |
97 | ||
960e4569 | 98 | int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **syscall_blacklist) { |
469830d1 | 99 | uint32_t arch; |
f011b0b8 DH |
100 | int r; |
101 | ||
1cec406d | 102 | if (!is_seccomp_available()) { |
960e4569 | 103 | log_debug("SECCOMP features not detected in the kernel, disabling SECCOMP filterering"); |
1cec406d FS |
104 | return 0; |
105 | } | |
106 | ||
469830d1 LP |
107 | SECCOMP_FOREACH_LOCAL_ARCH(arch) { |
108 | _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; | |
109 | int n; | |
110 | ||
111 | log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); | |
112 | ||
113 | r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); | |
114 | if (r < 0) | |
115 | return log_error_errno(r, "Failed to allocate seccomp object: %m"); | |
116 | ||
960e4569 | 117 | n = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist); |
469830d1 LP |
118 | if (n < 0) |
119 | return n; | |
120 | ||
121 | /* | |
122 | Audit is broken in containers, much of the userspace audit hookup will fail if running inside a | |
123 | container. We don't care and just turn off creation of audit sockets. | |
124 | ||
125 | This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail with EAFNOSUPPORT which audit userspace uses | |
126 | as indication that audit is disabled in the kernel. | |
127 | */ | |
128 | ||
129 | r = seccomp_rule_add_exact( | |
130 | seccomp, | |
131 | SCMP_ACT_ERRNO(EAFNOSUPPORT), | |
132 | SCMP_SYS(socket), | |
133 | 2, | |
134 | SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), | |
135 | SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); | |
136 | if (r < 0) | |
137 | log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m"); | |
138 | else | |
139 | n++; | |
140 | ||
141 | if (n <= 0) /* no rule added? then skip this architecture */ | |
142 | continue; | |
f011b0b8 | 143 | |
469830d1 LP |
144 | r = seccomp_load(seccomp); |
145 | if (IN_SET(r, -EPERM, -EACCES)) | |
146 | return log_error_errno(r, "Failed to install seccomp audit filter: %m"); | |
147 | if (r < 0) | |
148 | log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); | |
f011b0b8 DH |
149 | } |
150 | ||
469830d1 | 151 | return 0; |
f011b0b8 DH |
152 | } |
153 | ||
154 | #else | |
155 | ||
960e4569 | 156 | int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **syscall_blacklist) { |
f011b0b8 DH |
157 | return 0; |
158 | } | |
159 | ||
160 | #endif |