]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/exec-credential.c
udev: gracefully handle ENODEV or friends in opening device node
[thirdparty/systemd.git] / src / core / exec-credential.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/mount.h>
4 #include <unistd.h>
5
6 #include "acl-util.h"
7 #include "cgroup.h"
8 #include "creds-util.h"
9 #include "errno-util.h"
10 #include "exec-credential.h"
11 #include "execute.h"
12 #include "fileio.h"
13 #include "fs-util.h"
14 #include "glob-util.h"
15 #include "io-util.h"
16 #include "iovec-util.h"
17 #include "label-util.h"
18 #include "log.h"
19 #include "mkdir-label.h"
20 #include "mount-util.h"
21 #include "mountpoint-util.h"
22 #include "ordered-set.h"
23 #include "path-lookup.h"
24 #include "path-util.h"
25 #include "process-util.h"
26 #include "random-util.h"
27 #include "recurse-dir.h"
28 #include "rm-rf.h"
29 #include "siphash24.h"
30 #include "stat-util.h"
31 #include "strv.h"
32 #include "tmpfile-util.h"
33 #include "user-util.h"
34
35 ExecSetCredential* exec_set_credential_free(ExecSetCredential *sc) {
36 if (!sc)
37 return NULL;
38
39 free(sc->id);
40 free(sc->data);
41 return mfree(sc);
42 }
43
44 ExecLoadCredential* exec_load_credential_free(ExecLoadCredential *lc) {
45 if (!lc)
46 return NULL;
47
48 free(lc->id);
49 free(lc->path);
50 return mfree(lc);
51 }
52
53 ExecImportCredential* exec_import_credential_free(ExecImportCredential *ic) {
54 if (!ic)
55 return NULL;
56
57 free(ic->glob);
58 free(ic->rename);
59 return mfree(ic);
60 }
61
62 static void exec_import_credential_hash_func(const ExecImportCredential *ic, struct siphash *state) {
63 assert(ic);
64 assert(state);
65
66 siphash24_compress_string(ic->glob, state);
67 if (ic->rename)
68 siphash24_compress_string(ic->rename, state);
69 }
70
71 static int exec_import_credential_compare_func(const ExecImportCredential *a, const ExecImportCredential *b) {
72 int r;
73
74 assert(a);
75 assert(b);
76
77 r = strcmp(a->glob, b->glob);
78 if (r != 0)
79 return r;
80
81 return strcmp_ptr(a->rename, b->rename);
82 }
83
84 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
85 exec_set_credential_hash_ops,
86 char, string_hash_func, string_compare_func,
87 ExecSetCredential, exec_set_credential_free);
88
89 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
90 exec_load_credential_hash_ops,
91 char, string_hash_func, string_compare_func,
92 ExecLoadCredential, exec_load_credential_free);
93
94 DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
95 exec_import_credential_hash_ops,
96 ExecImportCredential,
97 exec_import_credential_hash_func,
98 exec_import_credential_compare_func,
99 exec_import_credential_free);
100
101 int exec_context_put_load_credential(ExecContext *c, const char *id, const char *path, bool encrypted) {
102 ExecLoadCredential *old;
103 int r;
104
105 assert(c);
106 assert(id);
107 assert(path);
108
109 old = hashmap_get(c->load_credentials, id);
110 if (old) {
111 r = free_and_strdup(&old->path, path);
112 if (r < 0)
113 return r;
114
115 old->encrypted = encrypted;
116 } else {
117 _cleanup_(exec_load_credential_freep) ExecLoadCredential *lc = NULL;
118
119 lc = new(ExecLoadCredential, 1);
120 if (!lc)
121 return -ENOMEM;
122
123 *lc = (ExecLoadCredential) {
124 .id = strdup(id),
125 .path = strdup(path),
126 .encrypted = encrypted,
127 };
128 if (!lc->id || !lc->path)
129 return -ENOMEM;
130
131 r = hashmap_ensure_put(&c->load_credentials, &exec_load_credential_hash_ops, lc->id, lc);
132 assert(r != -EEXIST);
133 if (r < 0)
134 return r;
135
136 TAKE_PTR(lc);
137 }
138
139 return 0;
140 }
141
142 int exec_context_put_set_credential(
143 ExecContext *c,
144 const char *id,
145 void *data_consume,
146 size_t size,
147 bool encrypted) {
148
149 _cleanup_free_ void *data = data_consume;
150 ExecSetCredential *old;
151 int r;
152
153 /* Takes the ownership of data both on success and failure */
154
155 assert(c);
156 assert(id);
157 assert(data || size == 0);
158
159 old = hashmap_get(c->set_credentials, id);
160 if (old) {
161 free_and_replace(old->data, data);
162 old->size = size;
163 old->encrypted = encrypted;
164 } else {
165 _cleanup_(exec_set_credential_freep) ExecSetCredential *sc = NULL;
166
167 sc = new(ExecSetCredential, 1);
168 if (!sc)
169 return -ENOMEM;
170
171 *sc = (ExecSetCredential) {
172 .id = strdup(id),
173 .data = TAKE_PTR(data),
174 .size = size,
175 .encrypted = encrypted,
176 };
177 if (!sc->id)
178 return -ENOMEM;
179
180 r = hashmap_ensure_put(&c->set_credentials, &exec_set_credential_hash_ops, sc->id, sc);
181 assert(r != -EEXIST);
182 if (r < 0)
183 return r;
184
185 TAKE_PTR(sc);
186 }
187
188 return 0;
189 }
190
191 int exec_context_put_import_credential(ExecContext *c, const char *glob, const char *rename) {
192 _cleanup_(exec_import_credential_freep) ExecImportCredential *ic = NULL;
193 int r;
194
195 assert(c);
196 assert(glob);
197
198 rename = empty_to_null(rename);
199
200 ic = new(ExecImportCredential, 1);
201 if (!ic)
202 return -ENOMEM;
203
204 *ic = (ExecImportCredential) {
205 .glob = strdup(glob),
206 };
207 if (!ic->glob)
208 return -ENOMEM;
209 if (rename) {
210 ic->rename = strdup(rename);
211 if (!ic->rename)
212 return -ENOMEM;
213 }
214
215 if (ordered_set_contains(c->import_credentials, ic))
216 return 0;
217
218 r = ordered_set_ensure_put(&c->import_credentials, &exec_import_credential_hash_ops, ic);
219 assert(r != -EEXIST);
220 if (r < 0)
221 return r;
222
223 TAKE_PTR(ic);
224
225 return 0;
226 }
227
228 bool exec_params_need_credentials(const ExecParameters *p) {
229 assert(p);
230
231 return p->flags & (EXEC_SETUP_CREDENTIALS|EXEC_SETUP_CREDENTIALS_FRESH);
232 }
233
234 bool exec_context_has_credentials(const ExecContext *c) {
235 assert(c);
236
237 return !hashmap_isempty(c->set_credentials) ||
238 !hashmap_isempty(c->load_credentials) ||
239 !ordered_set_isempty(c->import_credentials);
240 }
241
242 bool mount_point_is_credentials(const char *runtime_prefix, const char *path) {
243 const char *e;
244
245 assert(runtime_prefix);
246 assert(path);
247
248 e = path_startswith(path, runtime_prefix);
249 if (!e)
250 return false;
251
252 return path_startswith(e, "credentials");
253 }
254
255 static int get_credential_directory(
256 const char *runtime_prefix,
257 const char *unit,
258 char **ret) {
259
260 char *p;
261
262 assert(ret);
263
264 if (!runtime_prefix || !unit) {
265 *ret = NULL;
266 return 0;
267 }
268
269 p = path_join(runtime_prefix, "credentials", unit);
270 if (!p)
271 return -ENOMEM;
272
273 *ret = p;
274 return 1;
275 }
276
277 int exec_context_get_credential_directory(
278 const ExecContext *context,
279 const ExecParameters *params,
280 const char *unit,
281 char **ret) {
282
283 assert(context);
284 assert(params);
285 assert(unit);
286 assert(ret);
287
288 if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context)) {
289 *ret = NULL;
290 return 0;
291 }
292
293 return get_credential_directory(params->prefix[EXEC_DIRECTORY_RUNTIME], unit, ret);
294 }
295
296 int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_prefix, const char *unit) {
297 _cleanup_free_ char *p = NULL;
298 int r;
299
300 assert(c);
301
302 r = get_credential_directory(runtime_prefix, unit, &p);
303 if (r <= 0)
304 return r;
305
306 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
307 * unmount it, and afterwards remove the mount point */
308 (void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW);
309 (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
310
311 return 0;
312 }
313
314 static int write_credential(
315 int dfd,
316 const char *id,
317 const void *data,
318 size_t size,
319 uid_t uid,
320 gid_t gid,
321 bool ownership_ok) {
322
323 _cleanup_free_ char *tmp = NULL;
324 _cleanup_close_ int fd = -EBADF;
325 int r;
326
327 assert(dfd >= 0);
328 assert(id);
329 assert(data || size == 0);
330
331 r = tempfn_random_child("", "cred", &tmp);
332 if (r < 0)
333 return r;
334
335 fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
336 if (fd < 0)
337 return -errno;
338
339 r = loop_write(fd, data, size);
340 if (r < 0)
341 goto fail;
342
343 r = RET_NERRNO(fchmod(fd, 0400)); /* Take away "w" bit */
344 if (r < 0)
345 goto fail;
346
347 if (uid_is_valid(uid) && uid != getuid()) {
348 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
349 if (r < 0) {
350 /* Ideally we use ACLs, since we can neatly express what we want to express:
351 * the user gets read access and nothing else. But if the backing fs can't
352 * support that (e.g. ramfs), then we can use file ownership instead. But that's
353 * only safe if we can then re-mount the whole thing read-only, so that the user
354 * can no longer chmod() the file to gain write access. */
355 if (!ownership_ok || (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r)))
356 goto fail;
357
358 r = RET_NERRNO(fchown(fd, uid, gid));
359 if (r < 0)
360 goto fail;
361 }
362 }
363
364 r = RET_NERRNO(renameat(dfd, tmp, dfd, id));
365 if (r < 0)
366 goto fail;
367
368 return 0;
369
370 fail:
371 (void) unlinkat(dfd, tmp, /* flags = */ 0);
372 return r;
373 }
374
375 typedef enum CredentialSearchPath {
376 CREDENTIAL_SEARCH_PATH_TRUSTED,
377 CREDENTIAL_SEARCH_PATH_ENCRYPTED,
378 CREDENTIAL_SEARCH_PATH_ALL,
379 _CREDENTIAL_SEARCH_PATH_MAX,
380 _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
381 } CredentialSearchPath;
382
383 static int credential_search_path(const ExecParameters *params, CredentialSearchPath path, char ***ret) {
384 _cleanup_strv_free_ char **l = NULL;
385 int r;
386
387 assert(params);
388 assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
389 assert(ret);
390
391 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
392 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
393 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
394
395 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
396 r = strv_extend(&l, params->received_encrypted_credentials_directory);
397 if (r < 0)
398 return r;
399
400 _cleanup_strv_free_ char **add = NULL;
401 r = credential_store_path_encrypted(params->runtime_scope, &add);
402 if (r < 0)
403 return r;
404
405 r = strv_extend_strv_consume(&l, TAKE_PTR(add), /* filter_duplicates= */ false);
406 if (r < 0)
407 return r;
408 }
409
410 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
411 r = strv_extend(&l, params->received_credentials_directory);
412 if (r < 0)
413 return r;
414
415 _cleanup_strv_free_ char **add = NULL;
416 r = credential_store_path(params->runtime_scope, &add);
417 if (r < 0)
418 return r;
419
420 r = strv_extend_strv_consume(&l, TAKE_PTR(add), /* filter_duplicates= */ false);
421 if (r < 0)
422 return r;
423 }
424
425 if (DEBUG_LOGGING) {
426 _cleanup_free_ char *t = strv_join(l, ":");
427 log_debug("Credential search path is: %s", strempty(t));
428 }
429
430 *ret = TAKE_PTR(l);
431 return 0;
432 }
433
434 static bool device_nodes_restricted(
435 const ExecContext *c,
436 const CGroupContext *cgroup_context) {
437
438 assert(c);
439 assert(cgroup_context);
440
441 /* Returns true if we have any reason to believe we might not be able to access the TPM device
442 * directly, even if we run as root/PID 1. This could be because /dev/ is replaced by a private
443 * version, or because a device node access list is configured. */
444
445 if (c->private_devices)
446 return true;
447
448 if (cgroup_context->device_policy != CGROUP_DEVICE_POLICY_AUTO ||
449 cgroup_context->device_allow)
450 return true;
451
452 return false;
453 }
454
455 struct load_cred_args {
456 const ExecContext *context;
457 const CGroupContext *cgroup_context;
458 const ExecParameters *params;
459 const char *unit;
460 bool encrypted;
461 int write_dfd;
462 uid_t uid;
463 gid_t gid;
464 bool ownership_ok;
465 uint64_t left;
466 };
467
468 static int maybe_decrypt_and_write_credential(
469 struct load_cred_args *args,
470 const char *id,
471 const char *data,
472 size_t size) {
473
474 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
475 size_t add;
476 int r;
477
478 assert(args);
479 assert(args->write_dfd >= 0);
480 assert(id);
481 assert(data || size == 0);
482
483 if (args->encrypted) {
484 CredentialFlags flags = 0; /* only allow user creds in user scope */
485
486 switch (args->params->runtime_scope) {
487
488 case RUNTIME_SCOPE_SYSTEM:
489 /* In system mode talk directly to the TPM – unless we live in a device sandbox
490 * which might block TPM device access. */
491
492 flags |= CREDENTIAL_ANY_SCOPE;
493
494 if (!device_nodes_restricted(args->context, args->cgroup_context)) {
495 r = decrypt_credential_and_warn(
496 id,
497 now(CLOCK_REALTIME),
498 /* tpm2_device= */ NULL,
499 /* tpm2_signature_path= */ NULL,
500 getuid(),
501 &IOVEC_MAKE(data, size),
502 flags,
503 &plaintext);
504 break;
505 }
506
507 _fallthrough_;
508
509 case RUNTIME_SCOPE_USER:
510 /* In per user mode we'll not have access to the machine secret, nor to the TPM (most
511 * likely), hence go via the IPC service instead. Do this if we are run in root's
512 * per-user invocation too, to minimize differences and because isolating this logic
513 * into a separate process is generally a good thing anyway. */
514 r = ipc_decrypt_credential(
515 id,
516 now(CLOCK_REALTIME),
517 getuid(),
518 &IOVEC_MAKE(data, size),
519 flags,
520 &plaintext);
521 break;
522
523 default:
524 assert_not_reached();
525 }
526 if (r < 0)
527 return r;
528
529 data = plaintext.iov_base;
530 size = plaintext.iov_len;
531 }
532
533 add = strlen(id) + size;
534 if (add > args->left)
535 return -E2BIG;
536
537 r = write_credential(args->write_dfd, id, data, size, args->uid, args->gid, args->ownership_ok);
538 if (r < 0)
539 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
540
541 args->left -= add;
542
543 return 0;
544 }
545
546 static int load_credential_glob(
547 struct load_cred_args *args,
548 const ExecImportCredential *ic,
549 char * const *search_path,
550 ReadFullFileFlags flags) {
551
552 int r;
553
554 assert(args);
555 assert(args->write_dfd >= 0);
556 assert(ic);
557 assert(search_path);
558
559 STRV_FOREACH(d, search_path) {
560 _cleanup_strv_free_ char **paths = NULL;
561 _cleanup_free_ char *j = NULL;
562
563 j = path_join(*d, ic->glob);
564 if (!j)
565 return -ENOMEM;
566
567 r = safe_glob(j, /* flags = */ 0, &paths);
568 if (r == -ENOENT)
569 continue;
570 if (r < 0)
571 return r;
572
573 STRV_FOREACH(p, paths) {
574 _cleanup_free_ char *fn = NULL;
575 _cleanup_(erase_and_freep) char *data = NULL;
576 size_t size;
577
578 r = path_extract_filename(*p, &fn);
579 if (r < 0)
580 return log_debug_errno(r, "Failed to extract filename from '%s': %m", *p);
581
582 if (ic->rename) {
583 _cleanup_free_ char *renamed = NULL;
584
585 renamed = strjoin(ic->rename, fn + strlen(ic->glob) - !!endswith(ic->glob, "*"));
586 if (!renamed)
587 return log_oom_debug();
588
589 free_and_replace(fn, renamed);
590 }
591
592 if (!credential_name_valid(fn)) {
593 log_debug("Skipping credential with invalid name: %s", fn);
594 continue;
595 }
596
597 if (faccessat(args->write_dfd, fn, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
598 log_debug("Skipping credential with duplicated ID %s at %s", fn, *p);
599 continue;
600 }
601 if (errno != ENOENT)
602 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", fn);
603
604 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
605 r = read_full_file_full(
606 AT_FDCWD,
607 *p,
608 UINT64_MAX,
609 args->encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
610 flags,
611 NULL,
612 &data, &size);
613 if (r < 0)
614 return log_debug_errno(r, "Failed to read credential '%s': %m", *p);
615
616 r = maybe_decrypt_and_write_credential(args, fn, data, size);
617 if (r < 0)
618 return r;
619 }
620 }
621
622 return 0;
623 }
624
625 static int load_credential(
626 struct load_cred_args *args,
627 const char *id,
628 int read_dfd,
629 const char *path) {
630
631 ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
632 _cleanup_strv_free_ char **search_path = NULL;
633 _cleanup_free_ char *bindname = NULL;
634 const char *source = NULL;
635 bool missing_ok;
636 _cleanup_(erase_and_freep) char *data = NULL;
637 size_t size, maxsz;
638 int r;
639
640 assert(args);
641 assert(args->context);
642 assert(args->params);
643 assert(args->unit);
644 assert(args->write_dfd >= 0);
645 assert(id);
646 assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
647 assert(path);
648
649 if (read_dfd >= 0) {
650 /* If a directory fd is specified, then read the file directly from that dir. In this case we
651 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
652 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
653 * open it. */
654
655 if (!filename_is_valid(path)) /* safety check */
656 return -EINVAL;
657
658 missing_ok = true;
659 source = path;
660
661 } else if (path_is_absolute(path)) {
662 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
663 * sockets */
664
665 if (!path_is_valid(path)) /* safety check */
666 return -EINVAL;
667
668 flags |= READ_FULL_FILE_CONNECT_SOCKET;
669
670 /* Pass some minimal info about the unit and the credential name we are looking to acquire
671 * via the source socket address in case we read off an AF_UNIX socket. */
672 if (asprintf(&bindname, "@%" PRIx64 "/unit/%s/%s", random_u64(), args->unit, id) < 0)
673 return -ENOMEM;
674
675 missing_ok = false;
676 source = path;
677
678 } else if (credential_name_valid(path)) {
679 /* If this is a relative path, take it as credential name relative to the credentials
680 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
681 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
682
683 r = credential_search_path(args->params, CREDENTIAL_SEARCH_PATH_ALL, &search_path);
684 if (r < 0)
685 return r;
686
687 missing_ok = true;
688 } else
689 return -EINVAL;
690
691 if (args->encrypted) {
692 flags |= READ_FULL_FILE_UNBASE64;
693 maxsz = CREDENTIAL_ENCRYPTED_SIZE_MAX;
694 } else
695 maxsz = CREDENTIAL_SIZE_MAX;
696
697 if (search_path)
698 STRV_FOREACH(d, search_path) {
699 _cleanup_free_ char *j = NULL;
700
701 j = path_join(*d, path);
702 if (!j)
703 return -ENOMEM;
704
705 r = read_full_file_full(
706 AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
707 UINT64_MAX,
708 maxsz,
709 flags,
710 NULL,
711 &data, &size);
712 if (r != -ENOENT)
713 break;
714 }
715 else if (source)
716 r = read_full_file_full(
717 read_dfd, source,
718 UINT64_MAX,
719 maxsz,
720 flags,
721 bindname,
722 &data, &size);
723 else
724 assert_not_reached();
725
726 if (r == -ENOENT && (missing_ok || hashmap_contains(args->context->set_credentials, id))) {
727 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
728 * will get clear errors if we don't pass such a missing credential on as they
729 * themselves will get ENOENT when trying to read them, which should not be much
730 * worse than when we handle the error here and make it fatal.
731 *
732 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
733 * we are fine, too. */
734 log_full_errno(hashmap_contains(args->context->set_credentials, id) ? LOG_DEBUG : LOG_INFO,
735 r, "Couldn't read inherited credential '%s', skipping: %m", path);
736 return 0;
737 }
738 if (r < 0)
739 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
740
741 return maybe_decrypt_and_write_credential(args, id, data, size);
742 }
743
744 static int load_cred_recurse_dir_cb(
745 RecurseDirEvent event,
746 const char *path,
747 int dir_fd,
748 int inode_fd,
749 const struct dirent *de,
750 const struct statx *sx,
751 void *userdata) {
752
753 struct load_cred_args *args = ASSERT_PTR(userdata);
754 _cleanup_free_ char *sub_id = NULL;
755 int r;
756
757 assert(path);
758 assert(de);
759
760 if (event != RECURSE_DIR_ENTRY)
761 return RECURSE_DIR_CONTINUE;
762
763 if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
764 return RECURSE_DIR_CONTINUE;
765
766 sub_id = strreplace(path, "/", "_");
767 if (!sub_id)
768 return -ENOMEM;
769
770 if (!credential_name_valid(sub_id))
771 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID '%s', which is not valid, refusing.", sub_id);
772
773 if (faccessat(args->write_dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
774 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
775 return RECURSE_DIR_CONTINUE;
776 }
777 if (errno != ENOENT)
778 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
779
780 r = load_credential(args,
781 sub_id,
782 dir_fd, de->d_name);
783 if (r < 0)
784 return r;
785
786 return RECURSE_DIR_CONTINUE;
787 }
788
789 static int acquire_credentials(
790 const ExecContext *context,
791 const CGroupContext *cgroup_context,
792 const ExecParameters *params,
793 const char *unit,
794 const char *p,
795 uid_t uid,
796 gid_t gid,
797 bool ownership_ok) {
798
799 _cleanup_close_ int dfd = -EBADF;
800 int r;
801
802 assert(context);
803 assert(cgroup_context);
804 assert(params);
805 assert(unit);
806 assert(p);
807
808 dfd = open(p, O_DIRECTORY|O_CLOEXEC);
809 if (dfd < 0)
810 return -errno;
811
812 r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
813 if (r < 0)
814 return r;
815
816 struct load_cred_args args = {
817 .context = context,
818 .cgroup_context = cgroup_context,
819 .params = params,
820 .unit = unit,
821 .write_dfd = dfd,
822 .uid = uid,
823 .gid = gid,
824 .ownership_ok = ownership_ok,
825 .left = CREDENTIALS_TOTAL_SIZE_MAX,
826 };
827
828 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
829 ExecLoadCredential *lc;
830 HASHMAP_FOREACH(lc, context->load_credentials) {
831 _cleanup_close_ int sub_fd = -EBADF;
832
833 args.encrypted = lc->encrypted;
834
835 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
836 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
837 * a regular file. Finally, if it's a relative path we will use it as a credential name to
838 * propagate a credential passed to us from further up. */
839
840 if (path_is_absolute(lc->path)) {
841 sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC);
842 if (sub_fd < 0 && !IN_SET(errno,
843 ENOTDIR, /* Not a directory */
844 ENOENT)) /* Doesn't exist? */
845 return log_debug_errno(errno, "Failed to open credential source '%s': %m", lc->path);
846 }
847
848 if (sub_fd < 0)
849 /* Regular file (incl. a credential passed in from higher up) */
850 r = load_credential(&args,
851 lc->id,
852 AT_FDCWD, lc->path);
853 else
854 /* Directory */
855 r = recurse_dir(sub_fd,
856 /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
857 /* statx_mask= */ 0,
858 /* n_depth_max= */ UINT_MAX,
859 RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
860 load_cred_recurse_dir_cb,
861 &args);
862 if (r < 0)
863 return r;
864 }
865
866 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
867 * override any credentials found earlier. */
868 ExecImportCredential *ic;
869 ORDERED_SET_FOREACH(ic, context->import_credentials) {
870 _cleanup_free_ char **search_path = NULL;
871
872 r = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED, &search_path);
873 if (r < 0)
874 return r;
875
876 args.encrypted = false;
877
878 r = load_credential_glob(&args,
879 ic,
880 search_path,
881 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER);
882 if (r < 0)
883 return r;
884
885 search_path = strv_free(search_path);
886
887 r = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED, &search_path);
888 if (r < 0)
889 return r;
890
891 args.encrypted = true;
892
893 r = load_credential_glob(&args,
894 ic,
895 search_path,
896 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64);
897 if (r < 0)
898 return r;
899 }
900
901 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
902 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
903 ExecSetCredential *sc;
904 HASHMAP_FOREACH(sc, context->set_credentials) {
905 args.encrypted = sc->encrypted;
906
907 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
908 log_debug("Skipping credential with duplicated ID %s", sc->id);
909 continue;
910 }
911 if (errno != ENOENT)
912 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
913
914 r = maybe_decrypt_and_write_credential(&args, sc->id, sc->data, sc->size);
915 if (r < 0)
916 return r;
917 }
918
919 r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
920 if (r < 0)
921 return r;
922
923 /* After we created all keys with the right perms, also make sure the credential store as a whole is
924 * accessible */
925
926 if (uid_is_valid(uid) && uid != getuid()) {
927 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
928 if (r < 0) {
929 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
930 return r;
931
932 if (!ownership_ok)
933 return r;
934
935 if (fchown(dfd, uid, gid) < 0)
936 return -errno;
937 }
938 }
939
940 return 0;
941 }
942
943 static int setup_credentials_internal(
944 const ExecContext *context,
945 const CGroupContext *cgroup_context,
946 const ExecParameters *params,
947 const char *unit,
948 const char *final, /* This is where the credential store shall eventually end up at */
949 const char *workspace, /* This is where we can prepare it before moving it to the final place */
950 bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
951 bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
952 uid_t uid,
953 gid_t gid) {
954
955 bool final_mounted;
956 int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
957 * if we mounted something; false if we definitely can't mount anything */
958
959 assert(context);
960 assert(params);
961 assert(unit);
962 assert(final);
963 assert(workspace);
964
965 r = path_is_mount_point(final);
966 if (r < 0)
967 return log_debug_errno(r, "Failed to determine if '%s' is a mountpoint: %m", final);
968 final_mounted = r > 0;
969
970 if (final_mounted) {
971 if (FLAGS_SET(params->flags, EXEC_SETUP_CREDENTIALS_FRESH)) {
972 r = umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
973 if (r < 0)
974 return r;
975
976 final_mounted = false;
977 } else {
978 /* We can reuse the previous credential dir */
979 r = dir_is_empty(final, /* ignore_hidden_or_backup = */ false);
980 if (r < 0)
981 return r;
982 if (r == 0) {
983 log_debug("Credential dir for unit '%s' already set up, skipping.", unit);
984 return 0;
985 }
986 }
987 }
988
989 if (reuse_workspace) {
990 r = path_is_mount_point(workspace);
991 if (r < 0)
992 return r;
993 if (r > 0)
994 workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
995 * it, let's keep this in mind */
996 else
997 workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
998 } else
999 workspace_mounted = -1; /* ditto */
1000
1001 /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
1002 * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
1003 * If the workspace is not mounted, we just bind the final place over and make it writable. */
1004 must_mount = must_mount || final_mounted;
1005
1006 if (workspace_mounted < 0) {
1007 if (!final_mounted)
1008 /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
1009 * not using the final place. */
1010 r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
1011 if (final_mounted || r < 0) {
1012 /* If using final place or failed to mount new tmpfs, make a bind mount from
1013 * the final to the workspace, so that we can make it writable there. */
1014 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
1015 if (r < 0) {
1016 if (!ERRNO_IS_PRIVILEGE(r))
1017 /* Propagate anything that isn't a permission problem. */
1018 return r;
1019
1020 if (must_mount)
1021 /* If it's not OK to use the plain directory fallback, propagate all
1022 * errors too. */
1023 return r;
1024
1025 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
1026 * for compat with container envs, and just use the final dir as is.
1027 * Final place must not be mounted in this case (refused by must_mount
1028 * above) */
1029
1030 workspace_mounted = false;
1031 } else {
1032 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
1033 r = mount_nofollow_verbose(LOG_DEBUG,
1034 NULL,
1035 workspace,
1036 NULL,
1037 MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false),
1038 NULL);
1039 if (r < 0)
1040 return r;
1041
1042 workspace_mounted = true;
1043 }
1044 } else
1045 workspace_mounted = true;
1046 }
1047
1048 assert(workspace_mounted >= 0);
1049 assert(!must_mount || workspace_mounted);
1050
1051 const char *where = workspace_mounted ? workspace : final;
1052
1053 (void) label_fix_full(AT_FDCWD, where, final, 0);
1054
1055 r = acquire_credentials(context, cgroup_context, params, unit, where, uid, gid, workspace_mounted);
1056 if (r < 0) {
1057 /* If we're using final place as workspace, and failed to acquire credentials, we might
1058 * have left half-written creds there. Let's get rid of the whole mount, so future
1059 * calls won't reuse it. */
1060 if (final_mounted)
1061 (void) umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
1062
1063 return r;
1064 }
1065
1066 if (workspace_mounted) {
1067 if (!final_mounted) {
1068 /* Make workspace read-only now, so that any bind mount we make from it defaults to
1069 * read-only too */
1070 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
1071 if (r < 0)
1072 return r;
1073
1074 /* And mount it to the final place, read-only */
1075 r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
1076 } else
1077 /* Otherwise we just get rid of the bind mount of final place */
1078 r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
1079 if (r < 0)
1080 return r;
1081 } else {
1082 _cleanup_free_ char *parent = NULL;
1083
1084 /* If we do not have our own mount put used the plain directory fallback, then we need to
1085 * open access to the top-level credential directory and the per-service directory now */
1086
1087 r = path_extract_directory(final, &parent);
1088 if (r < 0)
1089 return r;
1090 if (chmod(parent, 0755) < 0)
1091 return -errno;
1092 }
1093
1094 return 0;
1095 }
1096
1097 int exec_setup_credentials(
1098 const ExecContext *context,
1099 const CGroupContext *cgroup_context,
1100 const ExecParameters *params,
1101 const char *unit,
1102 uid_t uid,
1103 gid_t gid) {
1104
1105 _cleanup_free_ char *p = NULL, *q = NULL;
1106 int r;
1107
1108 assert(context);
1109 assert(params);
1110 assert(unit);
1111
1112 if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context))
1113 return 0;
1114
1115 if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
1116 return -EINVAL;
1117
1118 /* This is where we'll place stuff when we are done; the main credentials directory is world-readable,
1119 * and the subdir we mount over with a read-only file system readable by the service's user. */
1120 q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
1121 if (!q)
1122 return -ENOMEM;
1123
1124 r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
1125 if (r < 0 && r != -EEXIST)
1126 return r;
1127
1128 p = path_join(q, unit);
1129 if (!p)
1130 return -ENOMEM;
1131
1132 r = mkdir_label(p, 0700); /* per-unit dir: private to user */
1133 if (r < 0 && r != -EEXIST)
1134 return r;
1135
1136 r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
1137 if (r < 0) {
1138 _cleanup_(rmdir_and_freep) char *u = NULL; /* remove the temporary workspace if we can */
1139 _cleanup_free_ char *t = NULL;
1140
1141 /* If this is not a privilege or support issue then propagate the error */
1142 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
1143 return r;
1144
1145 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
1146 * it into place, so that users can't access half-initialized credential stores. */
1147 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
1148 if (!t)
1149 return -ENOMEM;
1150
1151 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
1152 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
1153 * after it is fully set up */
1154 u = path_join(t, unit);
1155 if (!u)
1156 return -ENOMEM;
1157
1158 FOREACH_STRING(i, t, u) {
1159 r = mkdir_label(i, 0700);
1160 if (r < 0 && r != -EEXIST)
1161 return log_debug_errno(r, "Failed to make directory '%s': %m", i);
1162 }
1163
1164 r = setup_credentials_internal(
1165 context,
1166 cgroup_context,
1167 params,
1168 unit,
1169 p, /* final mount point */
1170 u, /* temporary workspace to overmount */
1171 true, /* reuse the workspace if it is already a mount */
1172 false, /* it's OK to fall back to a plain directory if we can't mount anything */
1173 uid,
1174 gid);
1175 if (r < 0)
1176 return r;
1177
1178 } else if (r == 0) {
1179
1180 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
1181 * we can use the same directory for all cases, after turning off propagation. Question
1182 * though is: where do we turn off propagation exactly, and where do we place the workspace
1183 * directory? We need some place that is guaranteed to be a mount point in the host, and
1184 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
1185 * since we ultimately want to move the resulting file system there, i.e. we need propagation
1186 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
1187 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
1188 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
1189 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
1190 * propagation on the former, and then overmount the latter.
1191 *
1192 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
1193 * for this purpose, but there are few other candidates that work equally well for us, and
1194 * given that we do this in a privately namespaced short-lived single-threaded process that
1195 * no one else sees this should be OK to do. */
1196
1197 /* Turn off propagation from our namespace to host */
1198 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
1199 if (r < 0)
1200 goto child_fail;
1201
1202 r = setup_credentials_internal(
1203 context,
1204 cgroup_context,
1205 params,
1206 unit,
1207 p, /* final mount point */
1208 "/dev/shm", /* temporary workspace to overmount */
1209 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1210 true, /* insist that something is mounted, do not allow fallback to plain directory */
1211 uid,
1212 gid);
1213 if (r < 0)
1214 goto child_fail;
1215
1216 _exit(EXIT_SUCCESS);
1217
1218 child_fail:
1219 _exit(EXIT_FAILURE);
1220 }
1221
1222 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1223 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1224 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1225 * seen by users when trying access this inode. */
1226 (void) rmdir(p);
1227 return 0;
1228 }