]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/capability-util.c
Two follow-ups for recent PRs (#38062)
[thirdparty/systemd.git] / src / basic / capability-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <linux/prctl.h>
4 #include <stdatomic.h>
5 #include <stdio.h>
6 #include <sys/prctl.h>
7 #include <unistd.h>
8
9 #include "alloc-util.h"
10 #include "bitfield.h"
11 #include "cap-list.h"
12 #include "capability-util.h"
13 #include "fd-util.h"
14 #include "fileio.h"
15 #include "log.h"
16 #include "logarithm.h"
17 #include "parse-util.h"
18 #include "pidref.h"
19 #include "process-util.h"
20 #include "stat-util.h"
21 #include "user-util.h"
22
23 int have_effective_cap(int value) {
24 _cleanup_cap_free_ cap_t cap = NULL;
25 cap_flag_value_t fv = CAP_CLEAR; /* To avoid false-positive use-of-uninitialized-value error reported
26 * by fuzzers. */
27
28 cap = cap_get_proc();
29 if (!cap)
30 return -errno;
31
32 if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
33 return -errno;
34
35 return fv == CAP_SET;
36 }
37
38 unsigned cap_last_cap(void) {
39 static atomic_int saved = INT_MAX;
40 int r, c;
41
42 c = saved;
43 if (c != INT_MAX)
44 return c;
45
46 /* Available since linux-3.2 */
47 _cleanup_free_ char *content = NULL;
48 r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
49 if (r < 0)
50 log_debug_errno(r, "Failed to read /proc/sys/kernel/cap_last_cap, ignoring: %m");
51 else {
52 r = safe_atoi(content, &c);
53 if (r < 0)
54 log_debug_errno(r, "Failed to parse /proc/sys/kernel/cap_last_cap, ignoring: %m");
55 else {
56 if (c > CAP_LIMIT) /* Safety for the future: if one day the kernel learns more than
57 * 64 caps, then we are in trouble (since we, as much userspace
58 * and kernel space store capability masks in uint64_t types). We
59 * also want to use UINT64_MAX as marker for "unset". Hence let's
60 * hence protect ourselves against that and always cap at 62 for
61 * now. */
62 c = CAP_LIMIT;
63
64 saved = c;
65 return c;
66 }
67 }
68
69 /* Fall back to syscall-probing for pre linux-3.2, or where /proc/ is not mounted */
70 unsigned long p = (unsigned long) MIN(CAP_LAST_CAP, CAP_LIMIT);
71
72 if (prctl(PR_CAPBSET_READ, p) < 0) {
73
74 /* Hmm, look downwards, until we find one that works */
75 for (p--; p > 0; p--)
76 if (prctl(PR_CAPBSET_READ, p) >= 0)
77 break;
78
79 } else {
80
81 /* Hmm, look upwards, until we find one that doesn't work */
82 for (; p < CAP_LIMIT; p++)
83 if (prctl(PR_CAPBSET_READ, p+1) < 0)
84 break;
85 }
86
87 c = (int) p;
88 saved = c;
89 return c;
90 }
91
92 int capability_update_inherited_set(cap_t caps, uint64_t set) {
93 /* Add capabilities in the set to the inherited caps, drops capabilities not in the set.
94 * Do not apply them yet. */
95
96 for (unsigned i = 0; i <= cap_last_cap(); i++) {
97 cap_flag_value_t flag = set & (UINT64_C(1) << i) ? CAP_SET : CAP_CLEAR;
98 cap_value_t v;
99
100 v = (cap_value_t) i;
101
102 if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, flag) < 0)
103 return -errno;
104 }
105
106 return 0;
107 }
108
109 int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
110 _cleanup_cap_free_ cap_t caps = NULL;
111 int r;
112
113 /* Remove capabilities requested in ambient set, but not in the bounding set */
114 for (unsigned i = 0; i <= cap_last_cap(); i++) {
115 if (!BIT_SET(set, i))
116 continue;
117
118 if (prctl(PR_CAPBSET_READ, (unsigned long) i) != 1) {
119 log_debug("Ambient capability %s requested but missing from bounding set, suppressing automatically.",
120 capability_to_name(i));
121 CLEAR_BIT(set, i);
122 }
123 }
124
125 /* Add the capabilities to the ambient set (an possibly also the inheritable set) */
126
127 if (also_inherit) {
128 caps = cap_get_proc();
129 if (!caps)
130 return -errno;
131
132 r = capability_update_inherited_set(caps, set);
133 if (r < 0)
134 return -errno;
135
136 if (cap_set_proc(caps) < 0)
137 return -errno;
138 }
139
140 for (unsigned i = 0; i <= cap_last_cap(); i++) {
141 if (BIT_SET(set, i)) {
142 /* Add the capability to the ambient set. */
143 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0)
144 return -errno;
145 } else {
146 /* Drop the capability so we don't inherit capabilities we didn't ask for. */
147 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0);
148 if (r < 0)
149 return -errno;
150 if (r > 0)
151 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i, 0, 0) < 0)
152 return -errno;
153 }
154 }
155
156 return 0;
157 }
158
159 int capability_gain_cap_setpcap(cap_t *ret_before_caps) {
160 _cleanup_cap_free_ cap_t caps = NULL;
161 cap_flag_value_t fv;
162 caps = cap_get_proc();
163 if (!caps)
164 return -errno;
165
166 if (cap_get_flag(caps, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0)
167 return -errno;
168
169 if (fv != CAP_SET) {
170 _cleanup_cap_free_ cap_t temp_cap = NULL;
171 static const cap_value_t v = CAP_SETPCAP;
172
173 temp_cap = cap_dup(caps);
174 if (!temp_cap)
175 return -errno;
176
177 if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0)
178 return -errno;
179
180 if (cap_set_proc(temp_cap) < 0)
181 log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m");
182
183 /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means
184 * we'll fail later, when we actually intend to drop some capabilities or try to set securebits. */
185 }
186 if (ret_before_caps)
187 /* Return the capabilities as they have been before setting CAP_SETPCAP */
188 *ret_before_caps = TAKE_PTR(caps);
189
190 return 0;
191 }
192
193 int capability_bounding_set_drop(uint64_t keep, bool right_now) {
194 _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL;
195 int r;
196
197 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
198 * in the effective set (yes, the kernel drops that when
199 * executing init!), so get it back temporarily so that we can
200 * call PR_CAPBSET_DROP. */
201
202 r = capability_gain_cap_setpcap(&before_cap);
203 if (r < 0)
204 return r;
205
206 after_cap = cap_dup(before_cap);
207 if (!after_cap)
208 return -errno;
209
210 for (unsigned i = 0; i <= cap_last_cap(); i++) {
211 cap_value_t v;
212
213 if ((keep & (UINT64_C(1) << i)))
214 continue;
215
216 /* Drop it from the bounding set */
217 if (prctl(PR_CAPBSET_DROP, i) < 0) {
218 r = -errno;
219
220 /* If dropping the capability failed, let's see if we didn't have it in the first place. If so,
221 * continue anyway, as dropping a capability we didn't have in the first place doesn't really
222 * matter anyway. */
223 if (prctl(PR_CAPBSET_READ, i) != 0)
224 goto finish;
225 }
226 v = (cap_value_t) i;
227
228 /* Also drop it from the inheritable set, so
229 * that anything we exec() loses the
230 * capability for good. */
231 if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
232 r = -errno;
233 goto finish;
234 }
235
236 /* If we shall apply this right now drop it
237 * also from our own capability sets. */
238 if (right_now) {
239 if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
240 cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
241 r = -errno;
242 goto finish;
243 }
244 }
245 }
246
247 r = 0;
248
249 finish:
250 if (cap_set_proc(after_cap) < 0) {
251 /* If there are no actual changes anyway then let's ignore this error. */
252 if (cap_compare(before_cap, after_cap) != 0)
253 r = -errno;
254 }
255
256 return r;
257 }
258
259 static int drop_from_file(const char *fn, uint64_t keep) {
260 _cleanup_free_ char *p = NULL;
261 uint64_t current, after;
262 uint32_t hi, lo;
263 int r, k;
264
265 r = read_one_line_file(fn, &p);
266 if (r < 0)
267 return r;
268
269 k = sscanf(p, "%" PRIu32 " %" PRIu32, &lo, &hi);
270 if (k != 2)
271 return -EIO;
272
273 current = (uint64_t) lo | ((uint64_t) hi << 32);
274 after = current & keep;
275
276 if (current == after)
277 return 0;
278
279 lo = after & UINT32_MAX;
280 hi = (after >> 32) & UINT32_MAX;
281
282 return write_string_filef(fn, 0, "%" PRIu32 " %" PRIu32, lo, hi);
283 }
284
285 int capability_bounding_set_drop_usermode(uint64_t keep) {
286 int r;
287
288 r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep);
289 if (r < 0)
290 return r;
291
292 r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep);
293 if (r < 0)
294 return r;
295
296 return r;
297 }
298
299 int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
300 int r;
301
302 /* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but
303 * want to keep some capabilities. Since file capabilities have been introduced this cannot be done
304 * across exec() anymore, unless our binary has the capability configured in the file system, which
305 * we want to avoid. */
306
307 if (setresgid(gid, gid, gid) < 0)
308 return log_error_errno(errno, "Failed to change group ID: %m");
309
310 r = maybe_setgroups(0, NULL);
311 if (r < 0)
312 return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
313
314 /* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually
315 * don't want to keep any capabilities, since we want to be able to drop them from the bounding set
316 * too, and we can only do that if we have capabilities. */
317 if (prctl(PR_SET_KEEPCAPS, 1) < 0)
318 return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
319
320 if (setresuid(uid, uid, uid) < 0)
321 return log_error_errno(errno, "Failed to change user ID: %m");
322
323 if (prctl(PR_SET_KEEPCAPS, 0) < 0)
324 return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
325
326 /* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except
327 * the ones we want to keep */
328 r = capability_bounding_set_drop(keep_capabilities, true);
329 if (r < 0)
330 return log_error_errno(r, "Failed to drop capabilities: %m");
331
332 /* Now upgrade the permitted caps we still kept to effective caps */
333 if (keep_capabilities != 0) {
334 cap_value_t bits[log2u64(keep_capabilities) + 1];
335 _cleanup_cap_free_ cap_t d = NULL;
336 unsigned i, j = 0;
337
338 d = cap_init();
339 if (!d)
340 return log_oom();
341
342 for (i = 0; i < ELEMENTSOF(bits); i++)
343 if (keep_capabilities & (1ULL << i))
344 bits[j++] = i;
345
346 /* use enough bits */
347 assert(i == 64 || (keep_capabilities >> i) == 0);
348 /* don't use too many bits */
349 assert(keep_capabilities & (UINT64_C(1) << (i - 1)));
350
351 if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
352 cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
353 return log_error_errno(errno, "Failed to enable capabilities bits: %m");
354
355 if (cap_set_proc(d) < 0)
356 return log_error_errno(errno, "Failed to increase capabilities: %m");
357 }
358
359 return 0;
360 }
361
362 static int change_capability(cap_value_t cv, cap_flag_value_t flag) {
363 _cleanup_cap_free_ cap_t tmp_cap = NULL;
364
365 tmp_cap = cap_get_proc();
366 if (!tmp_cap)
367 return -errno;
368
369 if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, flag) < 0) ||
370 (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, flag) < 0) ||
371 (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, flag) < 0))
372 return -errno;
373
374 if (cap_set_proc(tmp_cap) < 0)
375 return -errno;
376
377 return 0;
378 }
379
380 int drop_capability(cap_value_t cv) {
381 return change_capability(cv, CAP_CLEAR);
382 }
383
384 int keep_capability(cap_value_t cv) {
385 return change_capability(cv, CAP_SET);
386 }
387
388 bool capability_quintet_mangle(CapabilityQuintet *q) {
389 uint64_t combined, drop = 0;
390
391 assert(q);
392
393 combined = q->effective | q->bounding | q->inheritable | q->permitted | q->ambient;
394
395 for (unsigned i = 0; i <= cap_last_cap(); i++) {
396 if (!BIT_SET(combined, i))
397 continue;
398
399 if (prctl(PR_CAPBSET_READ, (unsigned long) i) > 0)
400 continue;
401
402 SET_BIT(drop, i);
403
404 log_debug("Dropping capability not in the current bounding set: %s", capability_to_name(i));
405 }
406
407 q->effective &= ~drop;
408 q->bounding &= ~drop;
409 q->inheritable &= ~drop;
410 q->permitted &= ~drop;
411 q->ambient &= ~drop;
412
413 return drop != 0; /* Let the caller know we changed something */
414 }
415
416 int capability_quintet_enforce(const CapabilityQuintet *q) {
417 _cleanup_cap_free_ cap_t c = NULL, modified = NULL;
418 int r;
419
420 if (q->ambient != CAP_MASK_UNSET) {
421 bool changed = false;
422
423 c = cap_get_proc();
424 if (!c)
425 return -errno;
426
427 /* In order to raise the ambient caps set we first need to raise the matching
428 * inheritable + permitted cap */
429 for (unsigned i = 0; i <= cap_last_cap(); i++) {
430 uint64_t m = UINT64_C(1) << i;
431 cap_value_t cv = (cap_value_t) i;
432 cap_flag_value_t old_value_inheritable, old_value_permitted;
433
434 if ((q->ambient & m) == 0)
435 continue;
436
437 if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0)
438 return -errno;
439 if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0)
440 return -errno;
441
442 if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET)
443 continue;
444
445 if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
446 return -errno;
447 if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
448 return -errno;
449
450 changed = true;
451 }
452
453 if (changed)
454 if (cap_set_proc(c) < 0)
455 return -errno;
456
457 r = capability_ambient_set_apply(q->ambient, false);
458 if (r < 0)
459 return r;
460 }
461
462 if (q->inheritable != CAP_MASK_UNSET || q->permitted != CAP_MASK_UNSET || q->effective != CAP_MASK_UNSET) {
463 bool changed = false;
464
465 if (!c) {
466 c = cap_get_proc();
467 if (!c)
468 return -errno;
469 }
470
471 for (unsigned i = 0; i <= cap_last_cap(); i++) {
472 uint64_t m = UINT64_C(1) << i;
473 cap_value_t cv = (cap_value_t) i;
474
475 if (q->inheritable != CAP_MASK_UNSET) {
476 cap_flag_value_t old_value, new_value;
477
478 if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) {
479 if (errno == EINVAL) /* If the kernel knows more caps than this
480 * version of libcap, then this will return
481 * EINVAL. In that case, simply ignore it,
482 * pretend it doesn't exist. */
483 continue;
484
485 return -errno;
486 }
487
488 new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
489
490 if (old_value != new_value) {
491 changed = true;
492
493 if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0)
494 return -errno;
495 }
496 }
497
498 if (q->permitted != CAP_MASK_UNSET) {
499 cap_flag_value_t old_value, new_value;
500
501 if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) {
502 if (errno == EINVAL)
503 continue;
504
505 return -errno;
506 }
507
508 new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
509
510 if (old_value != new_value) {
511 changed = true;
512
513 if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0)
514 return -errno;
515 }
516 }
517
518 if (q->effective != CAP_MASK_UNSET) {
519 cap_flag_value_t old_value, new_value;
520
521 if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) {
522 if (errno == EINVAL)
523 continue;
524
525 return -errno;
526 }
527
528 new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
529
530 if (old_value != new_value) {
531 changed = true;
532
533 if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0)
534 return -errno;
535 }
536 }
537 }
538
539 if (changed) {
540 /* In order to change the bounding caps, we need to keep CAP_SETPCAP for a bit
541 * longer. Let's add it to our list hence for now. */
542 if (q->bounding != CAP_MASK_UNSET) {
543 cap_value_t cv = CAP_SETPCAP;
544
545 modified = cap_dup(c);
546 if (!modified)
547 return -ENOMEM;
548
549 if (cap_set_flag(modified, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
550 return -errno;
551 if (cap_set_flag(modified, CAP_EFFECTIVE, 1, &cv, CAP_SET) < 0)
552 return -errno;
553
554 if (cap_compare(modified, c) == 0) {
555 /* No change? then drop this nonsense again */
556 cap_free(modified);
557 modified = NULL;
558 }
559 }
560
561 /* Now, let's enforce the caps for the first time. Note that this is where we acquire
562 * caps in any of the sets we currently don't have. We have to do this before
563 * dropping the bounding caps below, since at that point we can never acquire new
564 * caps in inherited/permitted/effective anymore, but only lose them. */
565 if (cap_set_proc(modified ?: c) < 0)
566 return -errno;
567 }
568 }
569
570 if (q->bounding != CAP_MASK_UNSET) {
571 r = capability_bounding_set_drop(q->bounding, false);
572 if (r < 0)
573 return r;
574 }
575
576 /* If needed, let's now set the caps again, this time in the final version, which differs from what
577 * we have already set only in the CAP_SETPCAP bit, which we needed for dropping the bounding
578 * bits. This call only undoes bits and doesn't acquire any which means the bounding caps don't
579 * matter. */
580 if (modified)
581 if (cap_set_proc(c) < 0)
582 return -errno;
583
584 return 0;
585 }
586
587 int capability_get_ambient(uint64_t *ret) {
588 uint64_t a = 0;
589 int r;
590
591 assert(ret);
592
593 for (unsigned i = 0; i <= cap_last_cap(); i++) {
594 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0);
595 if (r < 0)
596 return -errno;
597 if (r > 0)
598 SET_BIT(a, i);
599 }
600
601 *ret = a;
602 return 1;
603 }
604
605 int pidref_get_capability(const PidRef *pidref, CapabilityQuintet *ret) {
606 int r;
607
608 if (!pidref_is_set(pidref))
609 return -ESRCH;
610 if (pidref_is_remote(pidref))
611 return -EREMOTE;
612
613 const char *path = procfs_file_alloca(pidref->pid, "status");
614 _cleanup_fclose_ FILE *f = fopen(path, "re");
615 if (!f) {
616 if (errno == ENOENT && proc_mounted() == 0)
617 return -ENOSYS;
618
619 return -errno;
620 }
621
622 CapabilityQuintet q = CAPABILITY_QUINTET_NULL;
623 for (;;) {
624 _cleanup_free_ char *line = NULL;
625
626 r = read_line(f, LONG_LINE_MAX, &line);
627 if (r < 0)
628 return r;
629 if (r == 0)
630 break;
631
632 static const struct {
633 const char *field;
634 size_t offset;
635 } fields[] = {
636 { "CapBnd:", offsetof(CapabilityQuintet, bounding) },
637 { "CapInh:", offsetof(CapabilityQuintet, inheritable) },
638 { "CapPrm:", offsetof(CapabilityQuintet, permitted) },
639 { "CapEff:", offsetof(CapabilityQuintet, effective) },
640 { "CapAmb:", offsetof(CapabilityQuintet, ambient) },
641 };
642
643 FOREACH_ELEMENT(i, fields) {
644
645 const char *p = first_word(line, i->field);
646 if (!p)
647 continue;
648
649 uint64_t *v = (uint64_t*) ((uint8_t*) &q + i->offset);
650
651 if (*v != CAP_MASK_UNSET)
652 return -EBADMSG;
653
654 r = safe_atoux64(p, v);
655 if (r < 0)
656 return r;
657
658 if (*v == CAP_MASK_UNSET)
659 return -EBADMSG;
660 }
661 }
662
663 if (!capability_quintet_is_fully_set(&q))
664 return -EBADMSG;
665
666 r = pidref_verify(pidref);
667 if (r < 0)
668 return r;
669
670 if (ret)
671 *ret = q;
672
673 return 0;
674 }