]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/capability-util.c
core: Record ExecMainStartTimestamp before forking
[thirdparty/systemd.git] / src / basic / capability-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <stdatomic.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <sys/prctl.h>
8 #include <unistd.h>
9
10 #include "alloc-util.h"
11 #include "capability-util.h"
12 #include "cap-list.h"
13 #include "fileio.h"
14 #include "log.h"
15 #include "logarithm.h"
16 #include "macro.h"
17 #include "missing_prctl.h"
18 #include "missing_threads.h"
19 #include "parse-util.h"
20 #include "user-util.h"
21
22 int have_effective_cap(int value) {
23 _cleanup_cap_free_ cap_t cap = NULL;
24 cap_flag_value_t fv = CAP_CLEAR; /* To avoid false-positive use-of-uninitialized-value error reported
25 * by fuzzers. */
26
27 cap = cap_get_proc();
28 if (!cap)
29 return -errno;
30
31 if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
32 return -errno;
33
34 return fv == CAP_SET;
35 }
36
37 unsigned cap_last_cap(void) {
38 static atomic_int saved = INT_MAX;
39 int r, c;
40
41 c = saved;
42 if (c != INT_MAX)
43 return c;
44
45 /* Available since linux-3.2 */
46 _cleanup_free_ char *content = NULL;
47 r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
48 if (r < 0)
49 log_debug_errno(r, "Failed to read /proc/sys/kernel/cap_last_cap, ignoring: %m");
50 else {
51 r = safe_atoi(content, &c);
52 if (r < 0)
53 log_debug_errno(r, "Failed to parse /proc/sys/kernel/cap_last_cap, ignoring: %m");
54 else {
55 if (c > CAP_LIMIT) /* Safety for the future: if one day the kernel learns more than
56 * 64 caps, then we are in trouble (since we, as much userspace
57 * and kernel space store capability masks in uint64_t types). We
58 * also want to use UINT64_MAX as marker for "unset". Hence let's
59 * hence protect ourselves against that and always cap at 62 for
60 * now. */
61 c = CAP_LIMIT;
62
63 saved = c;
64 return c;
65 }
66 }
67
68 /* Fall back to syscall-probing for pre linux-3.2, or where /proc/ is not mounted */
69 unsigned long p = (unsigned long) MIN(CAP_LAST_CAP, CAP_LIMIT);
70
71 if (prctl(PR_CAPBSET_READ, p) < 0) {
72
73 /* Hmm, look downwards, until we find one that works */
74 for (p--; p > 0; p--)
75 if (prctl(PR_CAPBSET_READ, p) >= 0)
76 break;
77
78 } else {
79
80 /* Hmm, look upwards, until we find one that doesn't work */
81 for (; p < CAP_LIMIT; p++)
82 if (prctl(PR_CAPBSET_READ, p+1) < 0)
83 break;
84 }
85
86 c = (int) p;
87 saved = c;
88 return c;
89 }
90
91 int capability_update_inherited_set(cap_t caps, uint64_t set) {
92 /* Add capabilities in the set to the inherited caps, drops capabilities not in the set.
93 * Do not apply them yet. */
94
95 for (unsigned i = 0; i <= cap_last_cap(); i++) {
96 cap_flag_value_t flag = set & (UINT64_C(1) << i) ? CAP_SET : CAP_CLEAR;
97 cap_value_t v;
98
99 v = (cap_value_t) i;
100
101 if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, flag) < 0)
102 return -errno;
103 }
104
105 return 0;
106 }
107
108 int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
109 _cleanup_cap_free_ cap_t caps = NULL;
110 int r;
111
112 /* Remove capabilities requested in ambient set, but not in the bounding set */
113 for (unsigned i = 0; i <= cap_last_cap(); i++) {
114 if (set == 0)
115 break;
116
117 if (FLAGS_SET(set, (UINT64_C(1) << i)) && prctl(PR_CAPBSET_READ, i) != 1) {
118 log_debug("Ambient capability %s requested but missing from bounding set,"
119 " suppressing automatically.", capability_to_name(i));
120 set &= ~(UINT64_C(1) << i);
121 }
122 }
123
124 /* Add the capabilities to the ambient set (an possibly also the inheritable set) */
125
126 /* Check that we can use PR_CAP_AMBIENT or quit early. */
127 if (!ambient_capabilities_supported())
128 return (set & all_capabilities()) == 0 ?
129 0 : -EOPNOTSUPP; /* if actually no ambient caps are to be set, be silent,
130 * otherwise fail recognizably */
131
132 if (also_inherit) {
133 caps = cap_get_proc();
134 if (!caps)
135 return -errno;
136
137 r = capability_update_inherited_set(caps, set);
138 if (r < 0)
139 return -errno;
140
141 if (cap_set_proc(caps) < 0)
142 return -errno;
143 }
144
145 for (unsigned i = 0; i <= cap_last_cap(); i++) {
146
147 if (set & (UINT64_C(1) << i)) {
148
149 /* Add the capability to the ambient set. */
150 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0)
151 return -errno;
152 } else {
153
154 /* Drop the capability so we don't inherit capabilities we didn't ask for. */
155 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0);
156 if (r < 0)
157 return -errno;
158
159 if (r)
160 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i, 0, 0) < 0)
161 return -errno;
162
163 }
164 }
165
166 return 0;
167 }
168
169 int capability_gain_cap_setpcap(cap_t *ret_before_caps) {
170 _cleanup_cap_free_ cap_t caps = NULL;
171 cap_flag_value_t fv;
172 caps = cap_get_proc();
173 if (!caps)
174 return -errno;
175
176 if (cap_get_flag(caps, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0)
177 return -errno;
178
179 if (fv != CAP_SET) {
180 _cleanup_cap_free_ cap_t temp_cap = NULL;
181 static const cap_value_t v = CAP_SETPCAP;
182
183 temp_cap = cap_dup(caps);
184 if (!temp_cap)
185 return -errno;
186
187 if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0)
188 return -errno;
189
190 if (cap_set_proc(temp_cap) < 0)
191 log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m");
192
193 /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means
194 * we'll fail later, when we actually intend to drop some capabilities or try to set securebits. */
195 }
196 if (ret_before_caps)
197 /* Return the capabilities as they have been before setting CAP_SETPCAP */
198 *ret_before_caps = TAKE_PTR(caps);
199
200 return 0;
201 }
202
203 int capability_bounding_set_drop(uint64_t keep, bool right_now) {
204 _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL;
205 int r;
206
207 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
208 * in the effective set (yes, the kernel drops that when
209 * executing init!), so get it back temporarily so that we can
210 * call PR_CAPBSET_DROP. */
211
212 r = capability_gain_cap_setpcap(&before_cap);
213 if (r < 0)
214 return r;
215
216 after_cap = cap_dup(before_cap);
217 if (!after_cap)
218 return -errno;
219
220 for (unsigned i = 0; i <= cap_last_cap(); i++) {
221 cap_value_t v;
222
223 if ((keep & (UINT64_C(1) << i)))
224 continue;
225
226 /* Drop it from the bounding set */
227 if (prctl(PR_CAPBSET_DROP, i) < 0) {
228 r = -errno;
229
230 /* If dropping the capability failed, let's see if we didn't have it in the first place. If so,
231 * continue anyway, as dropping a capability we didn't have in the first place doesn't really
232 * matter anyway. */
233 if (prctl(PR_CAPBSET_READ, i) != 0)
234 goto finish;
235 }
236 v = (cap_value_t) i;
237
238 /* Also drop it from the inheritable set, so
239 * that anything we exec() loses the
240 * capability for good. */
241 if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
242 r = -errno;
243 goto finish;
244 }
245
246 /* If we shall apply this right now drop it
247 * also from our own capability sets. */
248 if (right_now) {
249 if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
250 cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
251 r = -errno;
252 goto finish;
253 }
254 }
255 }
256
257 r = 0;
258
259 finish:
260 if (cap_set_proc(after_cap) < 0) {
261 /* If there are no actual changes anyway then let's ignore this error. */
262 if (cap_compare(before_cap, after_cap) != 0)
263 r = -errno;
264 }
265
266 return r;
267 }
268
269 static int drop_from_file(const char *fn, uint64_t keep) {
270 _cleanup_free_ char *p = NULL;
271 uint64_t current, after;
272 uint32_t hi, lo;
273 int r, k;
274
275 r = read_one_line_file(fn, &p);
276 if (r < 0)
277 return r;
278
279 k = sscanf(p, "%" PRIu32 " %" PRIu32, &lo, &hi);
280 if (k != 2)
281 return -EIO;
282
283 current = (uint64_t) lo | ((uint64_t) hi << 32);
284 after = current & keep;
285
286 if (current == after)
287 return 0;
288
289 lo = after & UINT32_MAX;
290 hi = (after >> 32) & UINT32_MAX;
291
292 return write_string_filef(fn, 0, "%" PRIu32 " %" PRIu32, lo, hi);
293 }
294
295 int capability_bounding_set_drop_usermode(uint64_t keep) {
296 int r;
297
298 r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep);
299 if (r < 0)
300 return r;
301
302 r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep);
303 if (r < 0)
304 return r;
305
306 return r;
307 }
308
309 int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
310 int r;
311
312 /* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but
313 * want to keep some capabilities. Since file capabilities have been introduced this cannot be done
314 * across exec() anymore, unless our binary has the capability configured in the file system, which
315 * we want to avoid. */
316
317 if (setresgid(gid, gid, gid) < 0)
318 return log_error_errno(errno, "Failed to change group ID: %m");
319
320 r = maybe_setgroups(0, NULL);
321 if (r < 0)
322 return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
323
324 /* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually
325 * don't want to keep any capabilities, since we want to be able to drop them from the bounding set
326 * too, and we can only do that if we have capabilities. */
327 if (prctl(PR_SET_KEEPCAPS, 1) < 0)
328 return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
329
330 if (setresuid(uid, uid, uid) < 0)
331 return log_error_errno(errno, "Failed to change user ID: %m");
332
333 if (prctl(PR_SET_KEEPCAPS, 0) < 0)
334 return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
335
336 /* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except
337 * the ones we want to keep */
338 r = capability_bounding_set_drop(keep_capabilities, true);
339 if (r < 0)
340 return log_error_errno(r, "Failed to drop capabilities: %m");
341
342 /* Now upgrade the permitted caps we still kept to effective caps */
343 if (keep_capabilities != 0) {
344 cap_value_t bits[log2u64(keep_capabilities) + 1];
345 _cleanup_cap_free_ cap_t d = NULL;
346 unsigned i, j = 0;
347
348 d = cap_init();
349 if (!d)
350 return log_oom();
351
352 for (i = 0; i < ELEMENTSOF(bits); i++)
353 if (keep_capabilities & (1ULL << i))
354 bits[j++] = i;
355
356 /* use enough bits */
357 assert(i == 64 || (keep_capabilities >> i) == 0);
358 /* don't use too many bits */
359 assert(keep_capabilities & (UINT64_C(1) << (i - 1)));
360
361 if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
362 cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
363 return log_error_errno(errno, "Failed to enable capabilities bits: %m");
364
365 if (cap_set_proc(d) < 0)
366 return log_error_errno(errno, "Failed to increase capabilities: %m");
367 }
368
369 return 0;
370 }
371
372 static int change_capability(cap_value_t cv, cap_flag_value_t flag) {
373 _cleanup_cap_free_ cap_t tmp_cap = NULL;
374
375 tmp_cap = cap_get_proc();
376 if (!tmp_cap)
377 return -errno;
378
379 if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, flag) < 0) ||
380 (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, flag) < 0) ||
381 (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, flag) < 0))
382 return -errno;
383
384 if (cap_set_proc(tmp_cap) < 0)
385 return -errno;
386
387 return 0;
388 }
389
390 int drop_capability(cap_value_t cv) {
391 return change_capability(cv, CAP_CLEAR);
392 }
393
394 int keep_capability(cap_value_t cv) {
395 return change_capability(cv, CAP_SET);
396 }
397
398 bool ambient_capabilities_supported(void) {
399 static int cache = -1;
400
401 if (cache >= 0)
402 return cache;
403
404 /* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are
405 * available. */
406
407 cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 ||
408 !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS);
409
410 return cache;
411 }
412
413 bool capability_quintet_mangle(CapabilityQuintet *q) {
414 uint64_t combined, drop = 0;
415 bool ambient_supported;
416
417 assert(q);
418
419 combined = q->effective | q->bounding | q->inheritable | q->permitted;
420
421 ambient_supported = q->ambient != CAP_MASK_UNSET;
422 if (ambient_supported)
423 combined |= q->ambient;
424
425 for (unsigned i = 0; i <= cap_last_cap(); i++) {
426 unsigned long bit = UINT64_C(1) << i;
427 if (!FLAGS_SET(combined, bit))
428 continue;
429
430 if (prctl(PR_CAPBSET_READ, i) > 0)
431 continue;
432
433 drop |= bit;
434
435 log_debug("Not in the current bounding set: %s", capability_to_name(i));
436 }
437
438 q->effective &= ~drop;
439 q->bounding &= ~drop;
440 q->inheritable &= ~drop;
441 q->permitted &= ~drop;
442
443 if (ambient_supported)
444 q->ambient &= ~drop;
445
446 return drop != 0; /* Let the caller know we changed something */
447 }
448
449 int capability_quintet_enforce(const CapabilityQuintet *q) {
450 _cleanup_cap_free_ cap_t c = NULL, modified = NULL;
451 int r;
452
453 if (q->ambient != CAP_MASK_UNSET) {
454 bool changed = false;
455
456 c = cap_get_proc();
457 if (!c)
458 return -errno;
459
460 /* In order to raise the ambient caps set we first need to raise the matching
461 * inheritable + permitted cap */
462 for (unsigned i = 0; i <= cap_last_cap(); i++) {
463 uint64_t m = UINT64_C(1) << i;
464 cap_value_t cv = (cap_value_t) i;
465 cap_flag_value_t old_value_inheritable, old_value_permitted;
466
467 if ((q->ambient & m) == 0)
468 continue;
469
470 if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0)
471 return -errno;
472 if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0)
473 return -errno;
474
475 if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET)
476 continue;
477
478 if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
479 return -errno;
480 if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
481 return -errno;
482
483 changed = true;
484 }
485
486 if (changed)
487 if (cap_set_proc(c) < 0)
488 return -errno;
489
490 r = capability_ambient_set_apply(q->ambient, false);
491 if (r < 0)
492 return r;
493 }
494
495 if (q->inheritable != CAP_MASK_UNSET || q->permitted != CAP_MASK_UNSET || q->effective != CAP_MASK_UNSET) {
496 bool changed = false;
497
498 if (!c) {
499 c = cap_get_proc();
500 if (!c)
501 return -errno;
502 }
503
504 for (unsigned i = 0; i <= cap_last_cap(); i++) {
505 uint64_t m = UINT64_C(1) << i;
506 cap_value_t cv = (cap_value_t) i;
507
508 if (q->inheritable != CAP_MASK_UNSET) {
509 cap_flag_value_t old_value, new_value;
510
511 if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) {
512 if (errno == EINVAL) /* If the kernel knows more caps than this
513 * version of libcap, then this will return
514 * EINVAL. In that case, simply ignore it,
515 * pretend it doesn't exist. */
516 continue;
517
518 return -errno;
519 }
520
521 new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
522
523 if (old_value != new_value) {
524 changed = true;
525
526 if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0)
527 return -errno;
528 }
529 }
530
531 if (q->permitted != CAP_MASK_UNSET) {
532 cap_flag_value_t old_value, new_value;
533
534 if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) {
535 if (errno == EINVAL)
536 continue;
537
538 return -errno;
539 }
540
541 new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
542
543 if (old_value != new_value) {
544 changed = true;
545
546 if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0)
547 return -errno;
548 }
549 }
550
551 if (q->effective != CAP_MASK_UNSET) {
552 cap_flag_value_t old_value, new_value;
553
554 if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) {
555 if (errno == EINVAL)
556 continue;
557
558 return -errno;
559 }
560
561 new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
562
563 if (old_value != new_value) {
564 changed = true;
565
566 if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0)
567 return -errno;
568 }
569 }
570 }
571
572 if (changed) {
573 /* In order to change the bounding caps, we need to keep CAP_SETPCAP for a bit
574 * longer. Let's add it to our list hence for now. */
575 if (q->bounding != CAP_MASK_UNSET) {
576 cap_value_t cv = CAP_SETPCAP;
577
578 modified = cap_dup(c);
579 if (!modified)
580 return -ENOMEM;
581
582 if (cap_set_flag(modified, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
583 return -errno;
584 if (cap_set_flag(modified, CAP_EFFECTIVE, 1, &cv, CAP_SET) < 0)
585 return -errno;
586
587 if (cap_compare(modified, c) == 0) {
588 /* No change? then drop this nonsense again */
589 cap_free(modified);
590 modified = NULL;
591 }
592 }
593
594 /* Now, let's enforce the caps for the first time. Note that this is where we acquire
595 * caps in any of the sets we currently don't have. We have to do this before
596 * dropping the bounding caps below, since at that point we can never acquire new
597 * caps in inherited/permitted/effective anymore, but only lose them. */
598 if (cap_set_proc(modified ?: c) < 0)
599 return -errno;
600 }
601 }
602
603 if (q->bounding != CAP_MASK_UNSET) {
604 r = capability_bounding_set_drop(q->bounding, false);
605 if (r < 0)
606 return r;
607 }
608
609 /* If needed, let's now set the caps again, this time in the final version, which differs from what
610 * we have already set only in the CAP_SETPCAP bit, which we needed for dropping the bounding
611 * bits. This call only undoes bits and doesn't acquire any which means the bounding caps don't
612 * matter. */
613 if (modified)
614 if (cap_set_proc(c) < 0)
615 return -errno;
616
617 return 0;
618 }
619
620 int capability_get_ambient(uint64_t *ret) {
621 uint64_t a = 0;
622 int r;
623
624 assert(ret);
625
626 if (!ambient_capabilities_supported()) {
627 *ret = 0;
628 return 0;
629 }
630
631 for (unsigned i = 0; i <= cap_last_cap(); i++) {
632 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0);
633 if (r < 0)
634 return -errno;
635
636 if (r)
637 a |= UINT64_C(1) << i;
638 }
639
640
641 *ret = a;
642 return 1;
643 }