]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/exec-credential.c
core: align table
[thirdparty/systemd.git] / src / core / exec-credential.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/mount.h>
4
5 #include "acl-util.h"
6 #include "creds-util.h"
7 #include "exec-credential.h"
8 #include "execute.h"
9 #include "fileio.h"
10 #include "glob-util.h"
11 #include "io-util.h"
12 #include "iovec-util.h"
13 #include "label-util.h"
14 #include "mkdir-label.h"
15 #include "mount-util.h"
16 #include "mount.h"
17 #include "mountpoint-util.h"
18 #include "process-util.h"
19 #include "random-util.h"
20 #include "recurse-dir.h"
21 #include "rm-rf.h"
22 #include "tmpfile-util.h"
23
24 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
25 if (!sc)
26 return NULL;
27
28 free(sc->id);
29 free(sc->data);
30 return mfree(sc);
31 }
32
33 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
34 if (!lc)
35 return NULL;
36
37 free(lc->id);
38 free(lc->path);
39 return mfree(lc);
40 }
41
42 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
43 exec_set_credential_hash_ops,
44 char, string_hash_func, string_compare_func,
45 ExecSetCredential, exec_set_credential_free);
46
47 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
48 exec_load_credential_hash_ops,
49 char, string_hash_func, string_compare_func,
50 ExecLoadCredential, exec_load_credential_free);
51
52 bool exec_params_need_credentials(const ExecParameters *p) {
53 assert(p);
54
55 return p->flags & (EXEC_SETUP_CREDENTIALS|EXEC_SETUP_CREDENTIALS_FRESH);
56 }
57
58 bool exec_context_has_credentials(const ExecContext *c) {
59 assert(c);
60
61 return !hashmap_isempty(c->set_credentials) ||
62 !hashmap_isempty(c->load_credentials) ||
63 !set_isempty(c->import_credentials);
64 }
65
66 bool exec_context_has_encrypted_credentials(const ExecContext *c) {
67 assert(c);
68
69 const ExecLoadCredential *load_cred;
70 HASHMAP_FOREACH(load_cred, c->load_credentials)
71 if (load_cred->encrypted)
72 return true;
73
74 const ExecSetCredential *set_cred;
75 HASHMAP_FOREACH(set_cred, c->set_credentials)
76 if (set_cred->encrypted)
77 return true;
78
79 return false;
80 }
81
82 static int get_credential_directory(
83 const char *runtime_prefix,
84 const char *unit,
85 char **ret) {
86
87 char *p;
88
89 assert(ret);
90
91 if (!runtime_prefix || !unit) {
92 *ret = NULL;
93 return 0;
94 }
95
96 p = path_join(runtime_prefix, "credentials", unit);
97 if (!p)
98 return -ENOMEM;
99
100 *ret = p;
101 return 1;
102 }
103
104 int exec_context_get_credential_directory(
105 const ExecContext *context,
106 const ExecParameters *params,
107 const char *unit,
108 char **ret) {
109
110 assert(context);
111 assert(params);
112 assert(unit);
113 assert(ret);
114
115 if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context)) {
116 *ret = NULL;
117 return 0;
118 }
119
120 return get_credential_directory(params->prefix[EXEC_DIRECTORY_RUNTIME], unit, ret);
121 }
122
123 int unit_add_default_credential_dependencies(Unit *u, const ExecContext *c) {
124 _cleanup_free_ char *p = NULL, *m = NULL;
125 int r;
126
127 assert(u);
128 assert(c);
129
130 if (!exec_context_has_credentials(c))
131 return 0;
132
133 /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
134 * shuts down. This only matters if mount namespacing is not used for the service, and hence the
135 * credentials mount appears on the host. */
136
137 r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
138 if (r <= 0)
139 return r;
140
141 r = unit_name_from_path(p, ".mount", &m);
142 if (r < 0)
143 return r;
144
145 return unit_add_dependency_by_name(u, UNIT_AFTER, m, /* add_reference= */ true, UNIT_DEPENDENCY_FILE);
146 }
147
148 int exec_context_destroy_credentials(Unit *u) {
149 _cleanup_free_ char *p = NULL;
150 int r;
151
152 assert(u);
153
154 r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
155 if (r <= 0)
156 return r;
157
158 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
159 * unmount it, and afterwards remove the mount point */
160 if (umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW) >= 0)
161 (void) mount_invalidate_state_by_path(u->manager, p);
162
163 (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
164
165 return 0;
166 }
167
168 static int write_credential(
169 int dfd,
170 const char *id,
171 const void *data,
172 size_t size,
173 uid_t uid,
174 gid_t gid,
175 bool ownership_ok) {
176
177 _cleanup_(unlink_and_freep) char *tmp = NULL;
178 _cleanup_close_ int fd = -EBADF;
179 int r;
180
181 assert(dfd >= 0);
182 assert(id);
183 assert(data || size == 0);
184
185 r = tempfn_random_child("", "cred", &tmp);
186 if (r < 0)
187 return r;
188
189 fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
190 if (fd < 0) {
191 tmp = mfree(tmp);
192 return -errno;
193 }
194
195 r = loop_write(fd, data, size);
196 if (r < 0)
197 return r;
198
199 if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
200 return -errno;
201
202 if (uid_is_valid(uid) && uid != getuid()) {
203 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
204 if (r < 0) {
205 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
206 return r;
207
208 if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
209 * to express: that the user gets read access and nothing
210 * else. But if the backing fs can't support that (e.g. ramfs)
211 * then we can use file ownership instead. But that's only safe if
212 * we can then re-mount the whole thing read-only, so that the
213 * user can no longer chmod() the file to gain write access. */
214 return r;
215
216 if (fchown(fd, uid, gid) < 0)
217 return -errno;
218 }
219 }
220
221 if (renameat(dfd, tmp, dfd, id) < 0)
222 return -errno;
223
224 tmp = mfree(tmp);
225 return 0;
226 }
227
228 typedef enum CredentialSearchPath {
229 CREDENTIAL_SEARCH_PATH_TRUSTED,
230 CREDENTIAL_SEARCH_PATH_ENCRYPTED,
231 CREDENTIAL_SEARCH_PATH_ALL,
232 _CREDENTIAL_SEARCH_PATH_MAX,
233 _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
234 } CredentialSearchPath;
235
236 static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
237 _cleanup_strv_free_ char **l = NULL;
238
239 assert(params);
240 assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
241
242 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
243 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
244 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
245
246 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
247 if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
248 return NULL;
249
250 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
251 return NULL;
252 }
253
254 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
255 if (strv_extend(&l, params->received_credentials_directory) < 0)
256 return NULL;
257
258 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
259 return NULL;
260 }
261
262 if (DEBUG_LOGGING) {
263 _cleanup_free_ char *t = strv_join(l, ":");
264
265 log_debug("Credential search path is: %s", strempty(t));
266 }
267
268 return TAKE_PTR(l);
269 }
270
271 static int maybe_decrypt_and_write_credential(
272 int dir_fd,
273 const char *id,
274 bool encrypted,
275 uid_t uid,
276 gid_t gid,
277 bool ownership_ok,
278 const char *data,
279 size_t size,
280 uint64_t *left) {
281
282 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
283 size_t add;
284 int r;
285
286 assert(dir_fd >= 0);
287 assert(id);
288 assert(left);
289
290 if (encrypted) {
291 r = decrypt_credential_and_warn(
292 id,
293 now(CLOCK_REALTIME),
294 /* tpm2_device= */ NULL,
295 /* tpm2_signature_path= */ NULL,
296 getuid(),
297 &IOVEC_MAKE(data, size),
298 CREDENTIAL_ANY_SCOPE,
299 &plaintext);
300 if (r < 0)
301 return r;
302
303 data = plaintext.iov_base;
304 size = plaintext.iov_len;
305 }
306
307 add = strlen(id) + size;
308 if (add > *left)
309 return -E2BIG;
310
311 r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
312 if (r < 0)
313 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
314
315 *left -= add;
316 return 0;
317 }
318
319 static int load_credential_glob(
320 const char *path,
321 bool encrypted,
322 char * const *search_path,
323 ReadFullFileFlags flags,
324 int write_dfd,
325 uid_t uid,
326 gid_t gid,
327 bool ownership_ok,
328 uint64_t *left) {
329
330 int r;
331
332 assert(path);
333 assert(search_path);
334 assert(write_dfd >= 0);
335 assert(left);
336
337 STRV_FOREACH(d, search_path) {
338 _cleanup_globfree_ glob_t pglob = {};
339 _cleanup_free_ char *j = NULL;
340
341 j = path_join(*d, path);
342 if (!j)
343 return -ENOMEM;
344
345 r = safe_glob(j, 0, &pglob);
346 if (r == -ENOENT)
347 continue;
348 if (r < 0)
349 return r;
350
351 FOREACH_ARRAY(p, pglob.gl_pathv, pglob.gl_pathc) {
352 _cleanup_free_ char *fn = NULL;
353 _cleanup_(erase_and_freep) char *data = NULL;
354 size_t size;
355
356 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
357 r = read_full_file_full(
358 AT_FDCWD,
359 *p,
360 UINT64_MAX,
361 encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
362 flags,
363 NULL,
364 &data, &size);
365 if (r < 0)
366 return log_debug_errno(r, "Failed to read credential '%s': %m", *p);
367
368 r = path_extract_filename(*p, &fn);
369 if (r < 0)
370 return log_debug_errno(r, "Failed to extract filename from '%s': %m", *p);
371
372 r = maybe_decrypt_and_write_credential(
373 write_dfd,
374 fn,
375 encrypted,
376 uid,
377 gid,
378 ownership_ok,
379 data, size,
380 left);
381 if (r == -EEXIST)
382 continue;
383 if (r < 0)
384 return r;
385 }
386 }
387
388 return 0;
389 }
390
391 static int load_credential(
392 const ExecContext *context,
393 const ExecParameters *params,
394 const char *id,
395 const char *path,
396 bool encrypted,
397 const char *unit,
398 int read_dfd,
399 int write_dfd,
400 uid_t uid,
401 gid_t gid,
402 bool ownership_ok,
403 uint64_t *left) {
404
405 ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
406 _cleanup_strv_free_ char **search_path = NULL;
407 _cleanup_(erase_and_freep) char *data = NULL;
408 _cleanup_free_ char *bindname = NULL;
409 const char *source = NULL;
410 bool missing_ok = true;
411 size_t size, maxsz;
412 int r;
413
414 assert(context);
415 assert(params);
416 assert(id);
417 assert(path);
418 assert(unit);
419 assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
420 assert(write_dfd >= 0);
421 assert(left);
422
423 if (read_dfd >= 0) {
424 /* If a directory fd is specified, then read the file directly from that dir. In this case we
425 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
426 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
427 * open it. */
428
429 if (!filename_is_valid(path)) /* safety check */
430 return -EINVAL;
431
432 missing_ok = true;
433 source = path;
434
435 } else if (path_is_absolute(path)) {
436 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
437 * sockets */
438
439 if (!path_is_valid(path)) /* safety check */
440 return -EINVAL;
441
442 flags |= READ_FULL_FILE_CONNECT_SOCKET;
443
444 /* Pass some minimal info about the unit and the credential name we are looking to acquire
445 * via the source socket address in case we read off an AF_UNIX socket. */
446 if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
447 return -ENOMEM;
448
449 missing_ok = false;
450 source = path;
451
452 } else if (credential_name_valid(path)) {
453 /* If this is a relative path, take it as credential name relative to the credentials
454 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
455 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
456
457 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
458 if (!search_path)
459 return -ENOMEM;
460
461 missing_ok = true;
462 } else
463 source = NULL;
464
465 if (encrypted)
466 flags |= READ_FULL_FILE_UNBASE64;
467
468 maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
469
470 if (search_path) {
471 STRV_FOREACH(d, search_path) {
472 _cleanup_free_ char *j = NULL;
473
474 j = path_join(*d, path);
475 if (!j)
476 return -ENOMEM;
477
478 r = read_full_file_full(
479 AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
480 UINT64_MAX,
481 maxsz,
482 flags,
483 NULL,
484 &data, &size);
485 if (r != -ENOENT)
486 break;
487 }
488 } else if (source)
489 r = read_full_file_full(
490 read_dfd, source,
491 UINT64_MAX,
492 maxsz,
493 flags,
494 bindname,
495 &data, &size);
496 else
497 r = -ENOENT;
498
499 if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
500 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
501 * will get clear errors if we don't pass such a missing credential on as they
502 * themselves will get ENOENT when trying to read them, which should not be much
503 * worse than when we handle the error here and make it fatal.
504 *
505 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
506 * we are fine, too. */
507 log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
508 return 0;
509 }
510 if (r < 0)
511 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
512
513 return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
514 }
515
516 struct load_cred_args {
517 const ExecContext *context;
518 const ExecParameters *params;
519 bool encrypted;
520 const char *unit;
521 int dfd;
522 uid_t uid;
523 gid_t gid;
524 bool ownership_ok;
525 uint64_t *left;
526 };
527
528 static int load_cred_recurse_dir_cb(
529 RecurseDirEvent event,
530 const char *path,
531 int dir_fd,
532 int inode_fd,
533 const struct dirent *de,
534 const struct statx *sx,
535 void *userdata) {
536
537 struct load_cred_args *args = ASSERT_PTR(userdata);
538 _cleanup_free_ char *sub_id = NULL;
539 int r;
540
541 assert(path);
542 assert(de);
543
544 if (event != RECURSE_DIR_ENTRY)
545 return RECURSE_DIR_CONTINUE;
546
547 if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
548 return RECURSE_DIR_CONTINUE;
549
550 sub_id = strreplace(path, "/", "_");
551 if (!sub_id)
552 return -ENOMEM;
553
554 if (!credential_name_valid(sub_id))
555 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
556
557 if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
558 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
559 return RECURSE_DIR_CONTINUE;
560 }
561 if (errno != ENOENT)
562 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
563
564 r = load_credential(
565 args->context,
566 args->params,
567 sub_id,
568 de->d_name,
569 args->encrypted,
570 args->unit,
571 dir_fd,
572 args->dfd,
573 args->uid,
574 args->gid,
575 args->ownership_ok,
576 args->left);
577 if (r < 0)
578 return r;
579
580 return RECURSE_DIR_CONTINUE;
581 }
582
583 static int acquire_credentials(
584 const ExecContext *context,
585 const ExecParameters *params,
586 const char *unit,
587 const char *p,
588 uid_t uid,
589 gid_t gid,
590 bool ownership_ok) {
591
592 uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
593 _cleanup_close_ int dfd = -EBADF;
594 const char *ic;
595 ExecLoadCredential *lc;
596 ExecSetCredential *sc;
597 int r;
598
599 assert(context);
600 assert(params);
601 assert(unit);
602 assert(p);
603
604 dfd = open(p, O_DIRECTORY|O_CLOEXEC);
605 if (dfd < 0)
606 return -errno;
607
608 r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
609 if (r < 0)
610 return r;
611
612 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
613 HASHMAP_FOREACH(lc, context->load_credentials) {
614 _cleanup_close_ int sub_fd = -EBADF;
615
616 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
617 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
618 * a regular file. Finally, if it's a relative path we will use it as a credential name to
619 * propagate a credential passed to us from further up. */
620
621 if (path_is_absolute(lc->path)) {
622 sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
623 if (sub_fd < 0 && !IN_SET(errno,
624 ENOTDIR, /* Not a directory */
625 ENOENT)) /* Doesn't exist? */
626 return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
627 }
628
629 if (sub_fd < 0)
630 /* Regular file (incl. a credential passed in from higher up) */
631 r = load_credential(
632 context,
633 params,
634 lc->id,
635 lc->path,
636 lc->encrypted,
637 unit,
638 AT_FDCWD,
639 dfd,
640 uid,
641 gid,
642 ownership_ok,
643 &left);
644 else
645 /* Directory */
646 r = recurse_dir(sub_fd,
647 /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
648 /* statx_mask= */ 0,
649 /* n_depth_max= */ UINT_MAX,
650 RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
651 load_cred_recurse_dir_cb,
652 &(struct load_cred_args) {
653 .context = context,
654 .params = params,
655 .encrypted = lc->encrypted,
656 .unit = unit,
657 .dfd = dfd,
658 .uid = uid,
659 .gid = gid,
660 .ownership_ok = ownership_ok,
661 .left = &left,
662 });
663 if (r < 0)
664 return r;
665 }
666
667 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
668 * override any credentials found earlier. */
669 SET_FOREACH(ic, context->import_credentials) {
670 _cleanup_free_ char **search_path = NULL;
671
672 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
673 if (!search_path)
674 return -ENOMEM;
675
676 r = load_credential_glob(
677 ic,
678 /* encrypted = */ false,
679 search_path,
680 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
681 dfd,
682 uid,
683 gid,
684 ownership_ok,
685 &left);
686 if (r < 0)
687 return r;
688
689 search_path = strv_free(search_path);
690 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
691 if (!search_path)
692 return -ENOMEM;
693
694 r = load_credential_glob(
695 ic,
696 /* encrypted = */ true,
697 search_path,
698 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
699 dfd,
700 uid,
701 gid,
702 ownership_ok,
703 &left);
704 if (r < 0)
705 return r;
706 }
707
708 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
709 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
710 HASHMAP_FOREACH(sc, context->set_credentials) {
711 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
712 const char *data;
713 size_t size, add;
714
715 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
716 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
717 * slow and involved, hence it's nice to be able to skip that if the credential already
718 * exists anyway. */
719 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
720 continue;
721 if (errno != ENOENT)
722 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
723
724 if (sc->encrypted) {
725 r = decrypt_credential_and_warn(
726 sc->id,
727 now(CLOCK_REALTIME),
728 /* tpm2_device= */ NULL,
729 /* tpm2_signature_path= */ NULL,
730 getuid(),
731 &IOVEC_MAKE(sc->data, sc->size),
732 CREDENTIAL_ANY_SCOPE,
733 &plaintext);
734 if (r < 0)
735 return r;
736
737 data = plaintext.iov_base;
738 size = plaintext.iov_len;
739 } else {
740 data = sc->data;
741 size = sc->size;
742 }
743
744 add = strlen(sc->id) + size;
745 if (add > left)
746 return -E2BIG;
747
748 r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
749 if (r < 0)
750 return r;
751
752 left -= add;
753 }
754
755 r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
756 if (r < 0)
757 return r;
758
759 /* After we created all keys with the right perms, also make sure the credential store as a whole is
760 * accessible */
761
762 if (uid_is_valid(uid) && uid != getuid()) {
763 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
764 if (r < 0) {
765 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
766 return r;
767
768 if (!ownership_ok)
769 return r;
770
771 if (fchown(dfd, uid, gid) < 0)
772 return -errno;
773 }
774 }
775
776 return 0;
777 }
778
779 static int setup_credentials_internal(
780 const ExecContext *context,
781 const ExecParameters *params,
782 const char *unit,
783 const char *final, /* This is where the credential store shall eventually end up at */
784 const char *workspace, /* This is where we can prepare it before moving it to the final place */
785 bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
786 bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
787 uid_t uid,
788 gid_t gid) {
789
790 bool final_mounted;
791 int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
792 * if we mounted something; false if we definitely can't mount anything */
793
794 assert(context);
795 assert(params);
796 assert(unit);
797 assert(final);
798 assert(workspace);
799
800 r = path_is_mount_point(final);
801 if (r < 0)
802 return r;
803 final_mounted = r > 0;
804
805 if (final_mounted) {
806 if (FLAGS_SET(params->flags, EXEC_SETUP_CREDENTIALS_FRESH)) {
807 r = umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
808 if (r < 0)
809 return r;
810
811 final_mounted = false;
812 } else {
813 /* We can reuse the previous credential dir */
814 r = dir_is_empty(final, /* ignore_hidden_or_backup = */ false);
815 if (r < 0)
816 return r;
817 if (r == 0) {
818 log_debug("Credential dir for unit '%s' already set up, skipping.", unit);
819 return 0;
820 }
821 }
822 }
823
824 if (reuse_workspace) {
825 r = path_is_mount_point(workspace);
826 if (r < 0)
827 return r;
828 if (r > 0)
829 workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
830 * it, let's keep this in mind */
831 else
832 workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
833 } else
834 workspace_mounted = -1; /* ditto */
835
836 /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
837 * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
838 * If the workspace is not mounted, we just bind the final place over and make it writable. */
839 must_mount = must_mount || final_mounted;
840
841 if (workspace_mounted < 0) {
842 if (!final_mounted)
843 /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
844 * not using the final place. */
845 r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
846 if (final_mounted || r < 0) {
847 /* If using final place or failed to mount new tmpfs, make a bind mount from
848 * the final to the workspace, so that we can make it writable there. */
849 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
850 if (r < 0) {
851 if (!ERRNO_IS_PRIVILEGE(r))
852 /* Propagate anything that isn't a permission problem. */
853 return r;
854
855 if (must_mount)
856 /* If it's not OK to use the plain directory fallback, propagate all
857 * errors too. */
858 return r;
859
860 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
861 * for compat with container envs, and just use the final dir as is.
862 * Final place must not be mounted in this case (refused by must_mount
863 * above) */
864
865 workspace_mounted = false;
866 } else {
867 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
868 r = mount_nofollow_verbose(LOG_DEBUG,
869 NULL,
870 workspace,
871 NULL,
872 MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false),
873 NULL);
874 if (r < 0)
875 return r;
876
877 workspace_mounted = true;
878 }
879 } else
880 workspace_mounted = true;
881 }
882
883 assert(workspace_mounted >= 0);
884 assert(!must_mount || workspace_mounted);
885
886 const char *where = workspace_mounted ? workspace : final;
887
888 (void) label_fix_full(AT_FDCWD, where, final, 0);
889
890 r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
891 if (r < 0) {
892 /* If we're using final place as workspace, and failed to acquire credentials, we might
893 * have left half-written creds there. Let's get rid of the whole mount, so future
894 * calls won't reuse it. */
895 if (final_mounted)
896 (void) umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
897
898 return r;
899 }
900
901 if (workspace_mounted) {
902 if (!final_mounted) {
903 /* Make workspace read-only now, so that any bind mount we make from it defaults to
904 * read-only too */
905 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
906 if (r < 0)
907 return r;
908
909 /* And mount it to the final place, read-only */
910 r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
911 } else
912 /* Otherwise we just get rid of the bind mount of final place */
913 r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
914 if (r < 0)
915 return r;
916 } else {
917 _cleanup_free_ char *parent = NULL;
918
919 /* If we do not have our own mount put used the plain directory fallback, then we need to
920 * open access to the top-level credential directory and the per-service directory now */
921
922 r = path_extract_directory(final, &parent);
923 if (r < 0)
924 return r;
925 if (chmod(parent, 0755) < 0)
926 return -errno;
927 }
928
929 return 0;
930 }
931
932 int exec_setup_credentials(
933 const ExecContext *context,
934 const ExecParameters *params,
935 const char *unit,
936 uid_t uid,
937 gid_t gid) {
938
939 _cleanup_free_ char *p = NULL, *q = NULL;
940 int r;
941
942 assert(context);
943 assert(params);
944 assert(unit);
945
946 if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context))
947 return 0;
948
949 if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
950 return -EINVAL;
951
952 /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
953 * and the subdir we mount over with a read-only file system readable by the service's user */
954 q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
955 if (!q)
956 return -ENOMEM;
957
958 r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
959 if (r < 0 && r != -EEXIST)
960 return r;
961
962 p = path_join(q, unit);
963 if (!p)
964 return -ENOMEM;
965
966 r = mkdir_label(p, 0700); /* per-unit dir: private to user */
967 if (r < 0 && r != -EEXIST)
968 return r;
969
970 r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
971 if (r < 0) {
972 _cleanup_(rmdir_and_freep) char *u = NULL; /* remove the temporary workspace if we can */
973 _cleanup_free_ char *t = NULL;
974
975 /* If this is not a privilege or support issue then propagate the error */
976 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
977 return r;
978
979 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
980 * it into place, so that users can't access half-initialized credential stores. */
981 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
982 if (!t)
983 return -ENOMEM;
984
985 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
986 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
987 * after it is fully set up */
988 u = path_join(t, unit);
989 if (!u)
990 return -ENOMEM;
991
992 FOREACH_STRING(i, t, u) {
993 r = mkdir_label(i, 0700);
994 if (r < 0 && r != -EEXIST)
995 return r;
996 }
997
998 r = setup_credentials_internal(
999 context,
1000 params,
1001 unit,
1002 p, /* final mount point */
1003 u, /* temporary workspace to overmount */
1004 true, /* reuse the workspace if it is already a mount */
1005 false, /* it's OK to fall back to a plain directory if we can't mount anything */
1006 uid,
1007 gid);
1008 if (r < 0)
1009 return r;
1010
1011 } else if (r == 0) {
1012
1013 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
1014 * we can use the same directory for all cases, after turning off propagation. Question
1015 * though is: where do we turn off propagation exactly, and where do we place the workspace
1016 * directory? We need some place that is guaranteed to be a mount point in the host, and
1017 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
1018 * since we ultimately want to move the resulting file system there, i.e. we need propagation
1019 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
1020 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
1021 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
1022 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
1023 * propagation on the former, and then overmount the latter.
1024 *
1025 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
1026 * for this purpose, but there are few other candidates that work equally well for us, and
1027 * given that we do this in a privately namespaced short-lived single-threaded process that
1028 * no one else sees this should be OK to do. */
1029
1030 /* Turn off propagation from our namespace to host */
1031 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
1032 if (r < 0)
1033 goto child_fail;
1034
1035 r = setup_credentials_internal(
1036 context,
1037 params,
1038 unit,
1039 p, /* final mount point */
1040 "/dev/shm", /* temporary workspace to overmount */
1041 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1042 true, /* insist that something is mounted, do not allow fallback to plain directory */
1043 uid,
1044 gid);
1045 if (r < 0)
1046 goto child_fail;
1047
1048 _exit(EXIT_SUCCESS);
1049
1050 child_fail:
1051 _exit(EXIT_FAILURE);
1052 }
1053
1054 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1055 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1056 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1057 * seen by users when trying access this inode. */
1058 (void) rmdir(p);
1059 return 0;
1060 }