]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/exec-credential.c
Merge pull request #33424 from poettering/machined-gc-rework
[thirdparty/systemd.git] / src / core / exec-credential.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/mount.h>
4
5 #include "acl-util.h"
6 #include "creds-util.h"
7 #include "exec-credential.h"
8 #include "execute.h"
9 #include "fileio.h"
10 #include "glob-util.h"
11 #include "io-util.h"
12 #include "iovec-util.h"
13 #include "label-util.h"
14 #include "mkdir-label.h"
15 #include "mount-util.h"
16 #include "mountpoint-util.h"
17 #include "process-util.h"
18 #include "random-util.h"
19 #include "recurse-dir.h"
20 #include "rm-rf.h"
21 #include "tmpfile-util.h"
22
23 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
24 if (!sc)
25 return NULL;
26
27 free(sc->id);
28 free(sc->data);
29 return mfree(sc);
30 }
31
32 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
33 if (!lc)
34 return NULL;
35
36 free(lc->id);
37 free(lc->path);
38 return mfree(lc);
39 }
40
41 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
42 exec_set_credential_hash_ops,
43 char, string_hash_func, string_compare_func,
44 ExecSetCredential, exec_set_credential_free);
45
46 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
47 exec_load_credential_hash_ops,
48 char, string_hash_func, string_compare_func,
49 ExecLoadCredential, exec_load_credential_free);
50
51 bool exec_params_need_credentials(const ExecParameters *p) {
52 assert(p);
53
54 return p->flags & (EXEC_SETUP_CREDENTIALS|EXEC_SETUP_CREDENTIALS_FRESH);
55 }
56
57 bool exec_context_has_credentials(const ExecContext *c) {
58 assert(c);
59
60 return !hashmap_isempty(c->set_credentials) ||
61 !hashmap_isempty(c->load_credentials) ||
62 !set_isempty(c->import_credentials);
63 }
64
65 bool exec_context_has_encrypted_credentials(const ExecContext *c) {
66 assert(c);
67
68 const ExecLoadCredential *load_cred;
69 HASHMAP_FOREACH(load_cred, c->load_credentials)
70 if (load_cred->encrypted)
71 return true;
72
73 const ExecSetCredential *set_cred;
74 HASHMAP_FOREACH(set_cred, c->set_credentials)
75 if (set_cred->encrypted)
76 return true;
77
78 return false;
79 }
80
81 static int get_credential_directory(
82 const char *runtime_prefix,
83 const char *unit,
84 char **ret) {
85
86 char *p;
87
88 assert(ret);
89
90 if (!runtime_prefix || !unit) {
91 *ret = NULL;
92 return 0;
93 }
94
95 p = path_join(runtime_prefix, "credentials", unit);
96 if (!p)
97 return -ENOMEM;
98
99 *ret = p;
100 return 1;
101 }
102
103 int exec_context_get_credential_directory(
104 const ExecContext *context,
105 const ExecParameters *params,
106 const char *unit,
107 char **ret) {
108
109 assert(context);
110 assert(params);
111 assert(unit);
112 assert(ret);
113
114 if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context)) {
115 *ret = NULL;
116 return 0;
117 }
118
119 return get_credential_directory(params->prefix[EXEC_DIRECTORY_RUNTIME], unit, ret);
120 }
121
122 int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_prefix, const char *unit) {
123 _cleanup_free_ char *p = NULL;
124 int r;
125
126 assert(c);
127
128 r = get_credential_directory(runtime_prefix, unit, &p);
129 if (r <= 0)
130 return r;
131
132 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
133 * unmount it, and afterwards remove the mount point */
134 (void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW);
135 (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
136
137 return 0;
138 }
139
140 static int write_credential(
141 int dfd,
142 const char *id,
143 const void *data,
144 size_t size,
145 uid_t uid,
146 gid_t gid,
147 bool ownership_ok) {
148
149 _cleanup_(unlink_and_freep) char *tmp = NULL;
150 _cleanup_close_ int fd = -EBADF;
151 int r;
152
153 assert(dfd >= 0);
154 assert(id);
155 assert(data || size == 0);
156
157 r = tempfn_random_child("", "cred", &tmp);
158 if (r < 0)
159 return r;
160
161 fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
162 if (fd < 0) {
163 tmp = mfree(tmp);
164 return -errno;
165 }
166
167 r = loop_write(fd, data, size);
168 if (r < 0)
169 return r;
170
171 if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
172 return -errno;
173
174 if (uid_is_valid(uid) && uid != getuid()) {
175 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
176 if (r < 0) {
177 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
178 return r;
179
180 if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
181 * to express: that the user gets read access and nothing
182 * else. But if the backing fs can't support that (e.g. ramfs)
183 * then we can use file ownership instead. But that's only safe if
184 * we can then re-mount the whole thing read-only, so that the
185 * user can no longer chmod() the file to gain write access. */
186 return r;
187
188 if (fchown(fd, uid, gid) < 0)
189 return -errno;
190 }
191 }
192
193 if (renameat(dfd, tmp, dfd, id) < 0)
194 return -errno;
195
196 tmp = mfree(tmp);
197 return 0;
198 }
199
200 typedef enum CredentialSearchPath {
201 CREDENTIAL_SEARCH_PATH_TRUSTED,
202 CREDENTIAL_SEARCH_PATH_ENCRYPTED,
203 CREDENTIAL_SEARCH_PATH_ALL,
204 _CREDENTIAL_SEARCH_PATH_MAX,
205 _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
206 } CredentialSearchPath;
207
208 static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
209 _cleanup_strv_free_ char **l = NULL;
210
211 assert(params);
212 assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
213
214 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
215 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
216 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
217
218 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
219 if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
220 return NULL;
221
222 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
223 return NULL;
224 }
225
226 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
227 if (strv_extend(&l, params->received_credentials_directory) < 0)
228 return NULL;
229
230 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
231 return NULL;
232 }
233
234 if (DEBUG_LOGGING) {
235 _cleanup_free_ char *t = strv_join(l, ":");
236
237 log_debug("Credential search path is: %s", strempty(t));
238 }
239
240 return TAKE_PTR(l);
241 }
242
243 static int maybe_decrypt_and_write_credential(
244 int dir_fd,
245 const char *id,
246 bool encrypted,
247 uid_t uid,
248 gid_t gid,
249 bool ownership_ok,
250 const char *data,
251 size_t size,
252 uint64_t *left) {
253
254 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
255 size_t add;
256 int r;
257
258 assert(dir_fd >= 0);
259 assert(id);
260 assert(left);
261
262 if (encrypted) {
263 r = decrypt_credential_and_warn(
264 id,
265 now(CLOCK_REALTIME),
266 /* tpm2_device= */ NULL,
267 /* tpm2_signature_path= */ NULL,
268 getuid(),
269 &IOVEC_MAKE(data, size),
270 CREDENTIAL_ANY_SCOPE,
271 &plaintext);
272 if (r < 0)
273 return r;
274
275 data = plaintext.iov_base;
276 size = plaintext.iov_len;
277 }
278
279 add = strlen(id) + size;
280 if (add > *left)
281 return -E2BIG;
282
283 r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
284 if (r < 0)
285 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
286
287 *left -= add;
288 return 0;
289 }
290
291 static int load_credential_glob(
292 const char *path,
293 bool encrypted,
294 char * const *search_path,
295 ReadFullFileFlags flags,
296 int write_dfd,
297 uid_t uid,
298 gid_t gid,
299 bool ownership_ok,
300 uint64_t *left) {
301
302 int r;
303
304 assert(path);
305 assert(search_path);
306 assert(write_dfd >= 0);
307 assert(left);
308
309 STRV_FOREACH(d, search_path) {
310 _cleanup_globfree_ glob_t pglob = {};
311 _cleanup_free_ char *j = NULL;
312
313 j = path_join(*d, path);
314 if (!j)
315 return -ENOMEM;
316
317 r = safe_glob(j, 0, &pglob);
318 if (r == -ENOENT)
319 continue;
320 if (r < 0)
321 return r;
322
323 FOREACH_ARRAY(p, pglob.gl_pathv, pglob.gl_pathc) {
324 _cleanup_free_ char *fn = NULL;
325 _cleanup_(erase_and_freep) char *data = NULL;
326 size_t size;
327
328 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
329 r = read_full_file_full(
330 AT_FDCWD,
331 *p,
332 UINT64_MAX,
333 encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
334 flags,
335 NULL,
336 &data, &size);
337 if (r < 0)
338 return log_debug_errno(r, "Failed to read credential '%s': %m", *p);
339
340 r = path_extract_filename(*p, &fn);
341 if (r < 0)
342 return log_debug_errno(r, "Failed to extract filename from '%s': %m", *p);
343
344 r = maybe_decrypt_and_write_credential(
345 write_dfd,
346 fn,
347 encrypted,
348 uid,
349 gid,
350 ownership_ok,
351 data, size,
352 left);
353 if (r == -EEXIST)
354 continue;
355 if (r < 0)
356 return r;
357 }
358 }
359
360 return 0;
361 }
362
363 static int load_credential(
364 const ExecContext *context,
365 const ExecParameters *params,
366 const char *id,
367 const char *path,
368 bool encrypted,
369 const char *unit,
370 int read_dfd,
371 int write_dfd,
372 uid_t uid,
373 gid_t gid,
374 bool ownership_ok,
375 uint64_t *left) {
376
377 ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
378 _cleanup_strv_free_ char **search_path = NULL;
379 _cleanup_(erase_and_freep) char *data = NULL;
380 _cleanup_free_ char *bindname = NULL;
381 const char *source = NULL;
382 bool missing_ok = true;
383 size_t size, maxsz;
384 int r;
385
386 assert(context);
387 assert(params);
388 assert(id);
389 assert(path);
390 assert(unit);
391 assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
392 assert(write_dfd >= 0);
393 assert(left);
394
395 if (read_dfd >= 0) {
396 /* If a directory fd is specified, then read the file directly from that dir. In this case we
397 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
398 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
399 * open it. */
400
401 if (!filename_is_valid(path)) /* safety check */
402 return -EINVAL;
403
404 missing_ok = true;
405 source = path;
406
407 } else if (path_is_absolute(path)) {
408 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
409 * sockets */
410
411 if (!path_is_valid(path)) /* safety check */
412 return -EINVAL;
413
414 flags |= READ_FULL_FILE_CONNECT_SOCKET;
415
416 /* Pass some minimal info about the unit and the credential name we are looking to acquire
417 * via the source socket address in case we read off an AF_UNIX socket. */
418 if (asprintf(&bindname, "@%" PRIx64 "/unit/%s/%s", random_u64(), unit, id) < 0)
419 return -ENOMEM;
420
421 missing_ok = false;
422 source = path;
423
424 } else if (credential_name_valid(path)) {
425 /* If this is a relative path, take it as credential name relative to the credentials
426 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
427 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
428
429 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
430 if (!search_path)
431 return -ENOMEM;
432
433 missing_ok = true;
434 } else
435 source = NULL;
436
437 if (encrypted)
438 flags |= READ_FULL_FILE_UNBASE64;
439
440 maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
441
442 if (search_path)
443 STRV_FOREACH(d, search_path) {
444 _cleanup_free_ char *j = NULL;
445
446 j = path_join(*d, path);
447 if (!j)
448 return -ENOMEM;
449
450 r = read_full_file_full(
451 AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
452 UINT64_MAX,
453 maxsz,
454 flags,
455 NULL,
456 &data, &size);
457 if (r != -ENOENT)
458 break;
459 }
460 else if (source)
461 r = read_full_file_full(
462 read_dfd, source,
463 UINT64_MAX,
464 maxsz,
465 flags,
466 bindname,
467 &data, &size);
468 else
469 r = -ENOENT;
470
471 if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
472 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
473 * will get clear errors if we don't pass such a missing credential on as they
474 * themselves will get ENOENT when trying to read them, which should not be much
475 * worse than when we handle the error here and make it fatal.
476 *
477 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
478 * we are fine, too. */
479 log_full_errno(hashmap_contains(context->set_credentials, id) ? LOG_DEBUG : LOG_INFO,
480 r, "Couldn't read inherited credential '%s', skipping: %m", path);
481 return 0;
482 }
483 if (r < 0)
484 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
485
486 return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
487 }
488
489 struct load_cred_args {
490 const ExecContext *context;
491 const ExecParameters *params;
492 bool encrypted;
493 const char *unit;
494 int dfd;
495 uid_t uid;
496 gid_t gid;
497 bool ownership_ok;
498 uint64_t *left;
499 };
500
501 static int load_cred_recurse_dir_cb(
502 RecurseDirEvent event,
503 const char *path,
504 int dir_fd,
505 int inode_fd,
506 const struct dirent *de,
507 const struct statx *sx,
508 void *userdata) {
509
510 struct load_cred_args *args = ASSERT_PTR(userdata);
511 _cleanup_free_ char *sub_id = NULL;
512 int r;
513
514 assert(path);
515 assert(de);
516
517 if (event != RECURSE_DIR_ENTRY)
518 return RECURSE_DIR_CONTINUE;
519
520 if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
521 return RECURSE_DIR_CONTINUE;
522
523 sub_id = strreplace(path, "/", "_");
524 if (!sub_id)
525 return -ENOMEM;
526
527 if (!credential_name_valid(sub_id))
528 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
529
530 if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
531 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
532 return RECURSE_DIR_CONTINUE;
533 }
534 if (errno != ENOENT)
535 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
536
537 r = load_credential(
538 args->context,
539 args->params,
540 sub_id,
541 de->d_name,
542 args->encrypted,
543 args->unit,
544 dir_fd,
545 args->dfd,
546 args->uid,
547 args->gid,
548 args->ownership_ok,
549 args->left);
550 if (r < 0)
551 return r;
552
553 return RECURSE_DIR_CONTINUE;
554 }
555
556 static int acquire_credentials(
557 const ExecContext *context,
558 const ExecParameters *params,
559 const char *unit,
560 const char *p,
561 uid_t uid,
562 gid_t gid,
563 bool ownership_ok) {
564
565 uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
566 _cleanup_close_ int dfd = -EBADF;
567 const char *ic;
568 ExecLoadCredential *lc;
569 ExecSetCredential *sc;
570 int r;
571
572 assert(context);
573 assert(params);
574 assert(unit);
575 assert(p);
576
577 dfd = open(p, O_DIRECTORY|O_CLOEXEC);
578 if (dfd < 0)
579 return -errno;
580
581 r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
582 if (r < 0)
583 return r;
584
585 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
586 HASHMAP_FOREACH(lc, context->load_credentials) {
587 _cleanup_close_ int sub_fd = -EBADF;
588
589 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
590 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
591 * a regular file. Finally, if it's a relative path we will use it as a credential name to
592 * propagate a credential passed to us from further up. */
593
594 if (path_is_absolute(lc->path)) {
595 sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
596 if (sub_fd < 0 && !IN_SET(errno,
597 ENOTDIR, /* Not a directory */
598 ENOENT)) /* Doesn't exist? */
599 return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
600 }
601
602 if (sub_fd < 0)
603 /* Regular file (incl. a credential passed in from higher up) */
604 r = load_credential(
605 context,
606 params,
607 lc->id,
608 lc->path,
609 lc->encrypted,
610 unit,
611 AT_FDCWD,
612 dfd,
613 uid,
614 gid,
615 ownership_ok,
616 &left);
617 else
618 /* Directory */
619 r = recurse_dir(sub_fd,
620 /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
621 /* statx_mask= */ 0,
622 /* n_depth_max= */ UINT_MAX,
623 RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
624 load_cred_recurse_dir_cb,
625 &(struct load_cred_args) {
626 .context = context,
627 .params = params,
628 .encrypted = lc->encrypted,
629 .unit = unit,
630 .dfd = dfd,
631 .uid = uid,
632 .gid = gid,
633 .ownership_ok = ownership_ok,
634 .left = &left,
635 });
636 if (r < 0)
637 return r;
638 }
639
640 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
641 * override any credentials found earlier. */
642 SET_FOREACH(ic, context->import_credentials) {
643 _cleanup_free_ char **search_path = NULL;
644
645 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
646 if (!search_path)
647 return -ENOMEM;
648
649 r = load_credential_glob(
650 ic,
651 /* encrypted = */ false,
652 search_path,
653 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
654 dfd,
655 uid,
656 gid,
657 ownership_ok,
658 &left);
659 if (r < 0)
660 return r;
661
662 search_path = strv_free(search_path);
663 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
664 if (!search_path)
665 return -ENOMEM;
666
667 r = load_credential_glob(
668 ic,
669 /* encrypted = */ true,
670 search_path,
671 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
672 dfd,
673 uid,
674 gid,
675 ownership_ok,
676 &left);
677 if (r < 0)
678 return r;
679 }
680
681 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
682 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
683 HASHMAP_FOREACH(sc, context->set_credentials) {
684 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
685 const char *data;
686 size_t size, add;
687
688 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
689 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
690 * slow and involved, hence it's nice to be able to skip that if the credential already
691 * exists anyway. */
692 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
693 continue;
694 if (errno != ENOENT)
695 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
696
697 if (sc->encrypted) {
698 r = decrypt_credential_and_warn(
699 sc->id,
700 now(CLOCK_REALTIME),
701 /* tpm2_device= */ NULL,
702 /* tpm2_signature_path= */ NULL,
703 getuid(),
704 &IOVEC_MAKE(sc->data, sc->size),
705 CREDENTIAL_ANY_SCOPE,
706 &plaintext);
707 if (r < 0)
708 return r;
709
710 data = plaintext.iov_base;
711 size = plaintext.iov_len;
712 } else {
713 data = sc->data;
714 size = sc->size;
715 }
716
717 add = strlen(sc->id) + size;
718 if (add > left)
719 return -E2BIG;
720
721 r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
722 if (r < 0)
723 return r;
724
725 left -= add;
726 }
727
728 r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
729 if (r < 0)
730 return r;
731
732 /* After we created all keys with the right perms, also make sure the credential store as a whole is
733 * accessible */
734
735 if (uid_is_valid(uid) && uid != getuid()) {
736 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
737 if (r < 0) {
738 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
739 return r;
740
741 if (!ownership_ok)
742 return r;
743
744 if (fchown(dfd, uid, gid) < 0)
745 return -errno;
746 }
747 }
748
749 return 0;
750 }
751
752 static int setup_credentials_internal(
753 const ExecContext *context,
754 const ExecParameters *params,
755 const char *unit,
756 const char *final, /* This is where the credential store shall eventually end up at */
757 const char *workspace, /* This is where we can prepare it before moving it to the final place */
758 bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
759 bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
760 uid_t uid,
761 gid_t gid) {
762
763 bool final_mounted;
764 int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
765 * if we mounted something; false if we definitely can't mount anything */
766
767 assert(context);
768 assert(params);
769 assert(unit);
770 assert(final);
771 assert(workspace);
772
773 r = path_is_mount_point(final);
774 if (r < 0)
775 return r;
776 final_mounted = r > 0;
777
778 if (final_mounted) {
779 if (FLAGS_SET(params->flags, EXEC_SETUP_CREDENTIALS_FRESH)) {
780 r = umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
781 if (r < 0)
782 return r;
783
784 final_mounted = false;
785 } else {
786 /* We can reuse the previous credential dir */
787 r = dir_is_empty(final, /* ignore_hidden_or_backup = */ false);
788 if (r < 0)
789 return r;
790 if (r == 0) {
791 log_debug("Credential dir for unit '%s' already set up, skipping.", unit);
792 return 0;
793 }
794 }
795 }
796
797 if (reuse_workspace) {
798 r = path_is_mount_point(workspace);
799 if (r < 0)
800 return r;
801 if (r > 0)
802 workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
803 * it, let's keep this in mind */
804 else
805 workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
806 } else
807 workspace_mounted = -1; /* ditto */
808
809 /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
810 * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
811 * If the workspace is not mounted, we just bind the final place over and make it writable. */
812 must_mount = must_mount || final_mounted;
813
814 if (workspace_mounted < 0) {
815 if (!final_mounted)
816 /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
817 * not using the final place. */
818 r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
819 if (final_mounted || r < 0) {
820 /* If using final place or failed to mount new tmpfs, make a bind mount from
821 * the final to the workspace, so that we can make it writable there. */
822 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
823 if (r < 0) {
824 if (!ERRNO_IS_PRIVILEGE(r))
825 /* Propagate anything that isn't a permission problem. */
826 return r;
827
828 if (must_mount)
829 /* If it's not OK to use the plain directory fallback, propagate all
830 * errors too. */
831 return r;
832
833 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
834 * for compat with container envs, and just use the final dir as is.
835 * Final place must not be mounted in this case (refused by must_mount
836 * above) */
837
838 workspace_mounted = false;
839 } else {
840 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
841 r = mount_nofollow_verbose(LOG_DEBUG,
842 NULL,
843 workspace,
844 NULL,
845 MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false),
846 NULL);
847 if (r < 0)
848 return r;
849
850 workspace_mounted = true;
851 }
852 } else
853 workspace_mounted = true;
854 }
855
856 assert(workspace_mounted >= 0);
857 assert(!must_mount || workspace_mounted);
858
859 const char *where = workspace_mounted ? workspace : final;
860
861 (void) label_fix_full(AT_FDCWD, where, final, 0);
862
863 r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
864 if (r < 0) {
865 /* If we're using final place as workspace, and failed to acquire credentials, we might
866 * have left half-written creds there. Let's get rid of the whole mount, so future
867 * calls won't reuse it. */
868 if (final_mounted)
869 (void) umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
870
871 return r;
872 }
873
874 if (workspace_mounted) {
875 if (!final_mounted) {
876 /* Make workspace read-only now, so that any bind mount we make from it defaults to
877 * read-only too */
878 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
879 if (r < 0)
880 return r;
881
882 /* And mount it to the final place, read-only */
883 r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
884 } else
885 /* Otherwise we just get rid of the bind mount of final place */
886 r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
887 if (r < 0)
888 return r;
889 } else {
890 _cleanup_free_ char *parent = NULL;
891
892 /* If we do not have our own mount put used the plain directory fallback, then we need to
893 * open access to the top-level credential directory and the per-service directory now */
894
895 r = path_extract_directory(final, &parent);
896 if (r < 0)
897 return r;
898 if (chmod(parent, 0755) < 0)
899 return -errno;
900 }
901
902 return 0;
903 }
904
905 int exec_setup_credentials(
906 const ExecContext *context,
907 const ExecParameters *params,
908 const char *unit,
909 uid_t uid,
910 gid_t gid) {
911
912 _cleanup_free_ char *p = NULL, *q = NULL;
913 int r;
914
915 assert(context);
916 assert(params);
917 assert(unit);
918
919 if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context))
920 return 0;
921
922 if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
923 return -EINVAL;
924
925 /* This is where we'll place stuff when we are done; the main credentials directory is world-readable,
926 * and the subdir we mount over with a read-only file system readable by the service's user. */
927 q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
928 if (!q)
929 return -ENOMEM;
930
931 r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
932 if (r < 0 && r != -EEXIST)
933 return r;
934
935 p = path_join(q, unit);
936 if (!p)
937 return -ENOMEM;
938
939 r = mkdir_label(p, 0700); /* per-unit dir: private to user */
940 if (r < 0 && r != -EEXIST)
941 return r;
942
943 r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
944 if (r < 0) {
945 _cleanup_(rmdir_and_freep) char *u = NULL; /* remove the temporary workspace if we can */
946 _cleanup_free_ char *t = NULL;
947
948 /* If this is not a privilege or support issue then propagate the error */
949 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
950 return r;
951
952 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
953 * it into place, so that users can't access half-initialized credential stores. */
954 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
955 if (!t)
956 return -ENOMEM;
957
958 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
959 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
960 * after it is fully set up */
961 u = path_join(t, unit);
962 if (!u)
963 return -ENOMEM;
964
965 FOREACH_STRING(i, t, u) {
966 r = mkdir_label(i, 0700);
967 if (r < 0 && r != -EEXIST)
968 return r;
969 }
970
971 r = setup_credentials_internal(
972 context,
973 params,
974 unit,
975 p, /* final mount point */
976 u, /* temporary workspace to overmount */
977 true, /* reuse the workspace if it is already a mount */
978 false, /* it's OK to fall back to a plain directory if we can't mount anything */
979 uid,
980 gid);
981 if (r < 0)
982 return r;
983
984 } else if (r == 0) {
985
986 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
987 * we can use the same directory for all cases, after turning off propagation. Question
988 * though is: where do we turn off propagation exactly, and where do we place the workspace
989 * directory? We need some place that is guaranteed to be a mount point in the host, and
990 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
991 * since we ultimately want to move the resulting file system there, i.e. we need propagation
992 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
993 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
994 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
995 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
996 * propagation on the former, and then overmount the latter.
997 *
998 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
999 * for this purpose, but there are few other candidates that work equally well for us, and
1000 * given that we do this in a privately namespaced short-lived single-threaded process that
1001 * no one else sees this should be OK to do. */
1002
1003 /* Turn off propagation from our namespace to host */
1004 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
1005 if (r < 0)
1006 goto child_fail;
1007
1008 r = setup_credentials_internal(
1009 context,
1010 params,
1011 unit,
1012 p, /* final mount point */
1013 "/dev/shm", /* temporary workspace to overmount */
1014 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1015 true, /* insist that something is mounted, do not allow fallback to plain directory */
1016 uid,
1017 gid);
1018 if (r < 0)
1019 goto child_fail;
1020
1021 _exit(EXIT_SUCCESS);
1022
1023 child_fail:
1024 _exit(EXIT_FAILURE);
1025 }
1026
1027 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1028 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1029 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1030 * seen by users when trying access this inode. */
1031 (void) rmdir(p);
1032 return 0;
1033 }