]>
Commit | Line | Data |
---|---|---|
f011b0b8 DH |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2016 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
18 | ***/ | |
19 | ||
20 | #include <errno.h> | |
21 | #include <linux/netlink.h> | |
22 | #include <sys/capability.h> | |
23 | #include <sys/types.h> | |
24 | ||
25 | #ifdef HAVE_SECCOMP | |
26 | #include <seccomp.h> | |
27 | #endif | |
28 | ||
29 | #include "log.h" | |
30 | ||
31 | #ifdef HAVE_SECCOMP | |
32 | #include "seccomp-util.h" | |
33 | #endif | |
34 | ||
35 | #include "nspawn-seccomp.h" | |
36 | ||
37 | #ifdef HAVE_SECCOMP | |
38 | ||
39 | static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx, | |
40 | uint64_t cap_list_retain) { | |
41 | unsigned i; | |
42 | int r; | |
43 | static const struct { | |
44 | uint64_t capability; | |
45 | int syscall_num; | |
46 | } blacklist[] = { | |
47 | { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, | |
48 | { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, | |
49 | { CAP_SYS_BOOT, SCMP_SYS(kexec_load) }, | |
50 | { CAP_SYS_ADMIN, SCMP_SYS(swapon) }, | |
51 | { CAP_SYS_ADMIN, SCMP_SYS(swapoff) }, | |
52 | { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at) }, | |
53 | { CAP_SYS_MODULE, SCMP_SYS(init_module) }, | |
54 | { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, | |
55 | { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, | |
56 | { CAP_SYSLOG, SCMP_SYS(syslog) }, | |
57 | }; | |
58 | ||
59 | for (i = 0; i < ELEMENTSOF(blacklist); i++) { | |
60 | if (cap_list_retain & (1ULL << blacklist[i].capability)) | |
61 | continue; | |
62 | ||
63 | r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); | |
64 | if (r == -EFAULT) | |
65 | continue; /* unknown syscall */ | |
66 | if (r < 0) { | |
67 | log_error_errno(r, "Failed to block syscall: %m"); | |
68 | return r; | |
69 | } | |
70 | } | |
71 | ||
72 | return 0; | |
73 | } | |
74 | ||
75 | int setup_seccomp(uint64_t cap_list_retain) { | |
76 | scmp_filter_ctx seccomp; | |
77 | int r; | |
78 | ||
79 | seccomp = seccomp_init(SCMP_ACT_ALLOW); | |
80 | if (!seccomp) | |
81 | return log_oom(); | |
82 | ||
83 | r = seccomp_add_secondary_archs(seccomp); | |
84 | if (r < 0) { | |
85 | log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m"); | |
86 | goto finish; | |
87 | } | |
88 | ||
89 | r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain); | |
90 | if (r < 0) | |
91 | goto finish; | |
92 | ||
93 | /* | |
94 | Audit is broken in containers, much of the userspace audit | |
95 | hookup will fail if running inside a container. We don't | |
96 | care and just turn off creation of audit sockets. | |
97 | ||
98 | This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail | |
99 | with EAFNOSUPPORT which audit userspace uses as indication | |
100 | that audit is disabled in the kernel. | |
101 | */ | |
102 | ||
103 | r = seccomp_rule_add( | |
104 | seccomp, | |
105 | SCMP_ACT_ERRNO(EAFNOSUPPORT), | |
106 | SCMP_SYS(socket), | |
107 | 2, | |
108 | SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), | |
109 | SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); | |
110 | if (r < 0) { | |
111 | log_error_errno(r, "Failed to add audit seccomp rule: %m"); | |
112 | goto finish; | |
113 | } | |
114 | ||
115 | r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); | |
116 | if (r < 0) { | |
117 | log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m"); | |
118 | goto finish; | |
119 | } | |
120 | ||
121 | r = seccomp_load(seccomp); | |
122 | if (r == -EINVAL) { | |
123 | log_debug_errno(r, "Kernel is probably not configured with CONFIG_SECCOMP. Disabling seccomp audit filter: %m"); | |
124 | r = 0; | |
125 | goto finish; | |
126 | } | |
127 | if (r < 0) { | |
128 | log_error_errno(r, "Failed to install seccomp audit filter: %m"); | |
129 | goto finish; | |
130 | } | |
131 | ||
132 | finish: | |
133 | seccomp_release(seccomp); | |
134 | return r; | |
135 | } | |
136 | ||
137 | #else | |
138 | ||
139 | int setup_seccomp(uint64_t cap_list_retain) { | |
140 | return 0; | |
141 | } | |
142 | ||
143 | #endif |