]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/exec-credential.c
tpm2-util: more iovec'ification
[thirdparty/systemd.git] / src / core / exec-credential.c
CommitLineData
602c74cf
YW
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include <sys/mount.h>
4
5#include "acl-util.h"
602c74cf 6#include "creds-util.h"
43962c30 7#include "exec-credential.h"
602c74cf
YW
8#include "execute.h"
9#include "fileio.h"
10#include "glob-util.h"
11#include "io-util.h"
8d042bc4 12#include "iovec-util.h"
602c74cf
YW
13#include "label-util.h"
14#include "mkdir-label.h"
15#include "mount-util.h"
1e122561 16#include "mount.h"
602c74cf
YW
17#include "mountpoint-util.h"
18#include "process-util.h"
19#include "random-util.h"
20#include "recurse-dir.h"
21#include "rm-rf.h"
22#include "tmpfile-util.h"
23
24ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
25 if (!sc)
26 return NULL;
27
28 free(sc->id);
29 free(sc->data);
30 return mfree(sc);
31}
32
33ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
34 if (!lc)
35 return NULL;
36
37 free(lc->id);
38 free(lc->path);
39 return mfree(lc);
40}
41
42DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
43 exec_set_credential_hash_ops,
44 char, string_hash_func, string_compare_func,
45 ExecSetCredential, exec_set_credential_free);
46
47DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
48 exec_load_credential_hash_ops,
49 char, string_hash_func, string_compare_func,
50 ExecLoadCredential, exec_load_credential_free);
51
52bool exec_context_has_credentials(const ExecContext *c) {
53 assert(c);
54
55 return !hashmap_isempty(c->set_credentials) ||
56 !hashmap_isempty(c->load_credentials) ||
57 !set_isempty(c->import_credentials);
58}
59
60bool exec_context_has_encrypted_credentials(ExecContext *c) {
61 ExecLoadCredential *load_cred;
62 ExecSetCredential *set_cred;
63
64 assert(c);
65
66 HASHMAP_FOREACH(load_cred, c->load_credentials)
67 if (load_cred->encrypted)
68 return true;
69
70 HASHMAP_FOREACH(set_cred, c->set_credentials)
71 if (set_cred->encrypted)
72 return true;
73
74 return false;
75}
76
7d202fb3
YW
77static int get_credential_directory(
78 const char *runtime_prefix,
79 const char *unit,
80 char **ret) {
602c74cf 81
7d202fb3 82 char *p;
602c74cf 83
7d202fb3
YW
84 assert(ret);
85
86 if (!runtime_prefix || !unit) {
87 *ret = NULL;
602c74cf 88 return 0;
7d202fb3 89 }
602c74cf
YW
90
91 p = path_join(runtime_prefix, "credentials", unit);
92 if (!p)
93 return -ENOMEM;
94
7d202fb3
YW
95 *ret = p;
96 return 1;
97}
98
133e4de2
YW
99int exec_context_get_credential_directory(
100 const ExecContext *context,
101 const ExecParameters *params,
102 const char *unit,
103 char **ret) {
104
105 assert(context);
106 assert(params);
107 assert(unit);
108 assert(ret);
109
110 if (!exec_context_has_credentials(context)) {
111 *ret = NULL;
112 return 0;
113 }
114
115 return get_credential_directory(params->prefix[EXEC_DIRECTORY_RUNTIME], unit, ret);
116}
117
7d202fb3
YW
118int unit_add_default_credential_dependencies(Unit *u, const ExecContext *c) {
119 _cleanup_free_ char *p = NULL, *m = NULL;
120 int r;
121
122 assert(u);
123 assert(c);
124
125 if (!exec_context_has_credentials(c))
126 return 0;
127
128 /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
129 * shuts down. This only matters if mount namespacing is not used for the service, and hence the
130 * credentials mount appears on the host. */
131
132 r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
133 if (r <= 0)
134 return r;
135
136 r = unit_name_from_path(p, ".mount", &m);
137 if (r < 0)
138 return r;
139
140 return unit_add_dependency_by_name(u, UNIT_AFTER, m, /* add_reference= */ true, UNIT_DEPENDENCY_FILE);
141}
142
1e122561 143int exec_context_destroy_credentials(Unit *u) {
7d202fb3
YW
144 _cleanup_free_ char *p = NULL;
145 int r;
146
1e122561 147 assert(u);
7d202fb3 148
1e122561 149 r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
7d202fb3
YW
150 if (r <= 0)
151 return r;
152
602c74cf
YW
153 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
154 * unmount it, and afterwards remove the mount point */
1e122561
YW
155 if (umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW) >= 0)
156 (void) mount_invalidate_state_by_path(u->manager, p);
157
602c74cf
YW
158 (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
159
160 return 0;
161}
162
163static int write_credential(
164 int dfd,
165 const char *id,
166 const void *data,
167 size_t size,
168 uid_t uid,
169 gid_t gid,
170 bool ownership_ok) {
171
172 _cleanup_(unlink_and_freep) char *tmp = NULL;
173 _cleanup_close_ int fd = -EBADF;
174 int r;
175
176 r = tempfn_random_child("", "cred", &tmp);
177 if (r < 0)
178 return r;
179
180 fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
181 if (fd < 0) {
182 tmp = mfree(tmp);
183 return -errno;
184 }
185
e22c60a9 186 r = loop_write(fd, data, size);
602c74cf
YW
187 if (r < 0)
188 return r;
189
190 if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
191 return -errno;
192
193 if (uid_is_valid(uid) && uid != getuid()) {
194 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
195 if (r < 0) {
196 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
197 return r;
198
199 if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
200 * to express: that the user gets read access and nothing
201 * else. But if the backing fs can't support that (e.g. ramfs)
202 * then we can use file ownership instead. But that's only safe if
203 * we can then re-mount the whole thing read-only, so that the
204 * user can no longer chmod() the file to gain write access. */
205 return r;
206
207 if (fchown(fd, uid, gid) < 0)
208 return -errno;
209 }
210 }
211
212 if (renameat(dfd, tmp, dfd, id) < 0)
213 return -errno;
214
215 tmp = mfree(tmp);
216 return 0;
217}
218
219typedef enum CredentialSearchPath {
220 CREDENTIAL_SEARCH_PATH_TRUSTED,
221 CREDENTIAL_SEARCH_PATH_ENCRYPTED,
222 CREDENTIAL_SEARCH_PATH_ALL,
223 _CREDENTIAL_SEARCH_PATH_MAX,
224 _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
225} CredentialSearchPath;
226
227static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
228
229 _cleanup_strv_free_ char **l = NULL;
230
231 assert(params);
232 assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
233
234 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
235 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
236 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
237
238 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
239 if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
240 return NULL;
241
242 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
243 return NULL;
244 }
245
246 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
247 if (params->received_credentials_directory)
248 if (strv_extend(&l, params->received_credentials_directory) < 0)
249 return NULL;
250
251 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
252 return NULL;
253 }
254
255 if (DEBUG_LOGGING) {
256 _cleanup_free_ char *t = strv_join(l, ":");
257
258 log_debug("Credential search path is: %s", strempty(t));
259 }
260
261 return TAKE_PTR(l);
262}
263
264static int maybe_decrypt_and_write_credential(
265 int dir_fd,
266 const char *id,
267 bool encrypted,
268 uid_t uid,
269 gid_t gid,
270 bool ownership_ok,
271 const char *data,
272 size_t size,
273 uint64_t *left) {
274
8d042bc4 275 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
602c74cf
YW
276 size_t add;
277 int r;
278
279 if (encrypted) {
8d042bc4
LP
280 r = decrypt_credential_and_warn(
281 id,
282 now(CLOCK_REALTIME),
283 /* tpm2_device= */ NULL,
284 /* tpm2_signature_path= */ NULL,
285 &IOVEC_MAKE(data, size),
286 &plaintext);
602c74cf
YW
287 if (r < 0)
288 return r;
289
8d042bc4
LP
290 data = plaintext.iov_base;
291 size = plaintext.iov_len;
602c74cf
YW
292 }
293
294 add = strlen(id) + size;
295 if (add > *left)
296 return -E2BIG;
297
298 r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
299 if (r < 0)
300 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
301
302 *left -= add;
303 return 0;
304}
305
306static int load_credential_glob(
307 const char *path,
308 bool encrypted,
309 char **search_path,
310 ReadFullFileFlags flags,
311 int write_dfd,
312 uid_t uid,
313 gid_t gid,
314 bool ownership_ok,
315 uint64_t *left) {
316
317 int r;
318
319 STRV_FOREACH(d, search_path) {
320 _cleanup_globfree_ glob_t pglob = {};
321 _cleanup_free_ char *j = NULL;
322
323 j = path_join(*d, path);
324 if (!j)
325 return -ENOMEM;
326
327 r = safe_glob(j, 0, &pglob);
328 if (r == -ENOENT)
329 continue;
330 if (r < 0)
331 return r;
332
333 for (size_t n = 0; n < pglob.gl_pathc; n++) {
334 _cleanup_free_ char *fn = NULL;
335 _cleanup_(erase_and_freep) char *data = NULL;
336 size_t size;
337
338 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
339 r = read_full_file_full(
340 AT_FDCWD,
341 pglob.gl_pathv[n],
342 UINT64_MAX,
343 encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
344 flags,
345 NULL,
346 &data, &size);
347 if (r < 0)
348 return log_debug_errno(r, "Failed to read credential '%s': %m",
349 pglob.gl_pathv[n]);
350
351 r = path_extract_filename(pglob.gl_pathv[n], &fn);
352 if (r < 0)
353 return log_debug_errno(r, "Failed to extract filename from '%s': %m",
354 pglob.gl_pathv[n]);
355
356 r = maybe_decrypt_and_write_credential(
357 write_dfd,
358 fn,
359 encrypted,
360 uid,
361 gid,
362 ownership_ok,
363 data, size,
364 left);
365 if (r == -EEXIST)
366 continue;
367 if (r < 0)
368 return r;
369 }
370 }
371
372 return 0;
373}
374
375static int load_credential(
376 const ExecContext *context,
377 const ExecParameters *params,
378 const char *id,
379 const char *path,
380 bool encrypted,
381 const char *unit,
382 int read_dfd,
383 int write_dfd,
384 uid_t uid,
385 gid_t gid,
386 bool ownership_ok,
387 uint64_t *left) {
388
389 ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
390 _cleanup_strv_free_ char **search_path = NULL;
391 _cleanup_(erase_and_freep) char *data = NULL;
392 _cleanup_free_ char *bindname = NULL;
393 const char *source = NULL;
394 bool missing_ok = true;
395 size_t size, maxsz;
396 int r;
397
398 assert(context);
399 assert(params);
400 assert(id);
401 assert(path);
402 assert(unit);
403 assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
404 assert(write_dfd >= 0);
405 assert(left);
406
407 if (read_dfd >= 0) {
408 /* If a directory fd is specified, then read the file directly from that dir. In this case we
409 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
410 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
411 * open it. */
412
413 if (!filename_is_valid(path)) /* safety check */
414 return -EINVAL;
415
416 missing_ok = true;
417 source = path;
418
419 } else if (path_is_absolute(path)) {
420 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
421 * sockets */
422
423 if (!path_is_valid(path)) /* safety check */
424 return -EINVAL;
425
426 flags |= READ_FULL_FILE_CONNECT_SOCKET;
427
428 /* Pass some minimal info about the unit and the credential name we are looking to acquire
429 * via the source socket address in case we read off an AF_UNIX socket. */
430 if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
431 return -ENOMEM;
432
433 missing_ok = false;
434 source = path;
435
436 } else if (credential_name_valid(path)) {
437 /* If this is a relative path, take it as credential name relative to the credentials
438 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
439 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
440
441 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
442 if (!search_path)
443 return -ENOMEM;
444
445 missing_ok = true;
446 } else
447 source = NULL;
448
449 if (encrypted)
450 flags |= READ_FULL_FILE_UNBASE64;
451
452 maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
453
454 if (search_path) {
455 STRV_FOREACH(d, search_path) {
456 _cleanup_free_ char *j = NULL;
457
458 j = path_join(*d, path);
459 if (!j)
460 return -ENOMEM;
461
462 r = read_full_file_full(
463 AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
464 UINT64_MAX,
465 maxsz,
466 flags,
467 NULL,
468 &data, &size);
469 if (r != -ENOENT)
470 break;
471 }
472 } else if (source)
473 r = read_full_file_full(
474 read_dfd, source,
475 UINT64_MAX,
476 maxsz,
477 flags,
478 bindname,
479 &data, &size);
480 else
481 r = -ENOENT;
482
483 if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
484 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
485 * will get clear errors if we don't pass such a missing credential on as they
486 * themselves will get ENOENT when trying to read them, which should not be much
487 * worse than when we handle the error here and make it fatal.
488 *
489 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
490 * we are fine, too. */
491 log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
492 return 0;
493 }
494 if (r < 0)
495 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
496
497 return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
498}
499
500struct load_cred_args {
501 const ExecContext *context;
502 const ExecParameters *params;
503 bool encrypted;
504 const char *unit;
505 int dfd;
506 uid_t uid;
507 gid_t gid;
508 bool ownership_ok;
509 uint64_t *left;
510};
511
512static int load_cred_recurse_dir_cb(
513 RecurseDirEvent event,
514 const char *path,
515 int dir_fd,
516 int inode_fd,
517 const struct dirent *de,
518 const struct statx *sx,
519 void *userdata) {
520
521 struct load_cred_args *args = ASSERT_PTR(userdata);
522 _cleanup_free_ char *sub_id = NULL;
523 int r;
524
525 if (event != RECURSE_DIR_ENTRY)
526 return RECURSE_DIR_CONTINUE;
527
528 if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
529 return RECURSE_DIR_CONTINUE;
530
531 sub_id = strreplace(path, "/", "_");
532 if (!sub_id)
533 return -ENOMEM;
534
535 if (!credential_name_valid(sub_id))
536 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
537
538 if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
539 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
540 return RECURSE_DIR_CONTINUE;
541 }
542 if (errno != ENOENT)
543 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
544
545 r = load_credential(
546 args->context,
547 args->params,
548 sub_id,
549 de->d_name,
550 args->encrypted,
551 args->unit,
552 dir_fd,
553 args->dfd,
554 args->uid,
555 args->gid,
556 args->ownership_ok,
557 args->left);
558 if (r < 0)
559 return r;
560
561 return RECURSE_DIR_CONTINUE;
562}
563
564static int acquire_credentials(
565 const ExecContext *context,
566 const ExecParameters *params,
567 const char *unit,
568 const char *p,
569 uid_t uid,
570 gid_t gid,
571 bool ownership_ok) {
572
573 uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
574 _cleanup_close_ int dfd = -EBADF;
575 const char *ic;
576 ExecLoadCredential *lc;
577 ExecSetCredential *sc;
578 int r;
579
580 assert(context);
581 assert(p);
582
583 dfd = open(p, O_DIRECTORY|O_CLOEXEC);
584 if (dfd < 0)
585 return -errno;
586
587 r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
588 if (r < 0)
589 return r;
590
591 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
592 HASHMAP_FOREACH(lc, context->load_credentials) {
593 _cleanup_close_ int sub_fd = -EBADF;
594
595 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
596 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
597 * a regular file. Finally, if it's a relative path we will use it as a credential name to
598 * propagate a credential passed to us from further up. */
599
600 if (path_is_absolute(lc->path)) {
601 sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
602 if (sub_fd < 0 && !IN_SET(errno,
603 ENOTDIR, /* Not a directory */
604 ENOENT)) /* Doesn't exist? */
605 return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
606 }
607
608 if (sub_fd < 0)
609 /* Regular file (incl. a credential passed in from higher up) */
610 r = load_credential(
611 context,
612 params,
613 lc->id,
614 lc->path,
615 lc->encrypted,
616 unit,
617 AT_FDCWD,
618 dfd,
619 uid,
620 gid,
621 ownership_ok,
622 &left);
623 else
624 /* Directory */
625 r = recurse_dir(
626 sub_fd,
627 /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
628 /* statx_mask= */ 0,
629 /* n_depth_max= */ UINT_MAX,
630 RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
631 load_cred_recurse_dir_cb,
632 &(struct load_cred_args) {
633 .context = context,
634 .params = params,
635 .encrypted = lc->encrypted,
636 .unit = unit,
637 .dfd = dfd,
638 .uid = uid,
639 .gid = gid,
640 .ownership_ok = ownership_ok,
641 .left = &left,
642 });
643 if (r < 0)
644 return r;
645 }
646
647 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
648 * override any credentials found earlier. */
649 SET_FOREACH(ic, context->import_credentials) {
650 _cleanup_free_ char **search_path = NULL;
651
652 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
653 if (!search_path)
654 return -ENOMEM;
655
656 r = load_credential_glob(
657 ic,
658 /* encrypted = */ false,
659 search_path,
660 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
661 dfd,
662 uid,
663 gid,
664 ownership_ok,
665 &left);
666 if (r < 0)
667 return r;
668
669 search_path = strv_free(search_path);
670 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
671 if (!search_path)
672 return -ENOMEM;
673
674 r = load_credential_glob(
675 ic,
676 /* encrypted = */ true,
677 search_path,
678 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
679 dfd,
680 uid,
681 gid,
682 ownership_ok,
683 &left);
684 if (r < 0)
685 return r;
686 }
687
688 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
689 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
690 HASHMAP_FOREACH(sc, context->set_credentials) {
8d042bc4 691 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
602c74cf
YW
692 const char *data;
693 size_t size, add;
694
695 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
696 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
697 * slow and involved, hence it's nice to be able to skip that if the credential already
698 * exists anyway. */
699 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
700 continue;
701 if (errno != ENOENT)
702 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
703
704 if (sc->encrypted) {
8d042bc4
LP
705 r = decrypt_credential_and_warn(
706 sc->id,
707 now(CLOCK_REALTIME),
708 /* tpm2_device= */ NULL,
709 /* tpm2_signature_path= */ NULL,
710 &IOVEC_MAKE(sc->data, sc->size),
711 &plaintext);
602c74cf
YW
712 if (r < 0)
713 return r;
714
8d042bc4
LP
715 data = plaintext.iov_base;
716 size = plaintext.iov_len;
602c74cf
YW
717 } else {
718 data = sc->data;
719 size = sc->size;
720 }
721
722 add = strlen(sc->id) + size;
723 if (add > left)
724 return -E2BIG;
725
726 r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
727 if (r < 0)
728 return r;
729
730 left -= add;
731 }
732
733 r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
734 if (r < 0)
735 return r;
736
737 /* After we created all keys with the right perms, also make sure the credential store as a whole is
738 * accessible */
739
740 if (uid_is_valid(uid) && uid != getuid()) {
741 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
742 if (r < 0) {
743 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
744 return r;
745
746 if (!ownership_ok)
747 return r;
748
749 if (fchown(dfd, uid, gid) < 0)
750 return -errno;
751 }
752 }
753
754 return 0;
755}
756
757static int setup_credentials_internal(
758 const ExecContext *context,
759 const ExecParameters *params,
760 const char *unit,
761 const char *final, /* This is where the credential store shall eventually end up at */
762 const char *workspace, /* This is where we can prepare it before moving it to the final place */
763 bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
764 bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
765 uid_t uid,
73ff4d48 766 gid_t gid) {
602c74cf
YW
767
768 int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
769 * if we mounted something; false if we definitely can't mount anything */
770 bool final_mounted;
771 const char *where;
772
773 assert(context);
774 assert(final);
775 assert(workspace);
776
777 if (reuse_workspace) {
778 r = path_is_mount_point(workspace, NULL, 0);
779 if (r < 0)
780 return r;
781 if (r > 0)
782 workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
783 * it, let's keep this in mind */
784 else
785 workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
786 } else
787 workspace_mounted = -1; /* ditto */
788
789 r = path_is_mount_point(final, NULL, 0);
790 if (r < 0)
791 return r;
792 if (r > 0) {
793 /* If the final place already has something mounted, we use that. If the workspace also has
794 * something mounted we assume it's actually the same mount (but with MS_RDONLY
795 * different). */
796 final_mounted = true;
797
798 if (workspace_mounted < 0) {
799 /* If the final place is mounted, but the workspace isn't, then let's bind mount
800 * the final version to the workspace, and make it writable, so that we can make
801 * changes */
802
803 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
804 if (r < 0)
805 return r;
806
807 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
808 if (r < 0)
809 return r;
810
811 workspace_mounted = true;
812 }
813 } else
814 final_mounted = false;
815
816 if (workspace_mounted < 0) {
817 /* Nothing is mounted on the workspace yet, let's try to mount something now */
818
819 r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
820 if (r < 0) {
821 /* If that didn't work, try to make a bind mount from the final to the workspace, so
822 * that we can make it writable there. */
823 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
824 if (r < 0) {
825 if (!ERRNO_IS_PRIVILEGE(r))
826 /* Propagate anything that isn't a permission problem. */
827 return r;
828
829 if (must_mount)
830 /* If it's not OK to use the plain directory fallback, propagate all
831 * errors too. */
832 return r;
833
834 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
835 * for compat with container envs, and just use the final dir as is. */
836
837 workspace_mounted = false;
838 } else {
839 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
840 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
841 if (r < 0)
842 return r;
843
844 workspace_mounted = true;
845 }
846 } else
847 workspace_mounted = true;
848 }
849
850 assert(!must_mount || workspace_mounted > 0);
851 where = workspace_mounted ? workspace : final;
852
853 (void) label_fix_full(AT_FDCWD, where, final, 0);
854
855 r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
856 if (r < 0)
857 return r;
858
859 if (workspace_mounted) {
860 bool install;
861
862 /* Determine if we should actually install the prepared mount in the final location by bind
863 * mounting it there. We do so only if the mount is not established there already, and if the
864 * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
865 * case we are doing all this in a mount namespace, thus no one else will see that we
866 * allocated a file system we are getting rid of again here. */
867 if (final_mounted)
868 install = false; /* already installed */
869 else {
870 r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
871 if (r < 0)
872 return r;
873
874 install = r == 0; /* install only if non-empty */
875 }
876
877 if (install) {
878 /* Make workspace read-only now, so that any bind mount we make from it defaults to
879 * read-only too */
880 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
881 if (r < 0)
882 return r;
883
884 /* And mount it to the final place, read-only */
885 r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
886 } else
887 /* Otherwise get rid of it */
888 r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
889 if (r < 0)
890 return r;
891 } else {
892 _cleanup_free_ char *parent = NULL;
893
894 /* If we do not have our own mount put used the plain directory fallback, then we need to
895 * open access to the top-level credential directory and the per-service directory now */
896
897 r = path_extract_directory(final, &parent);
898 if (r < 0)
899 return r;
900 if (chmod(parent, 0755) < 0)
901 return -errno;
902 }
903
904 return 0;
905}
906
43962c30 907int exec_setup_credentials(
602c74cf
YW
908 const ExecContext *context,
909 const ExecParameters *params,
910 const char *unit,
911 uid_t uid,
73ff4d48 912 gid_t gid) {
602c74cf
YW
913
914 _cleanup_free_ char *p = NULL, *q = NULL;
915 int r;
916
917 assert(context);
918 assert(params);
919
73ff4d48 920 if (!exec_context_has_credentials(context))
602c74cf
YW
921 return 0;
922
923 if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
924 return -EINVAL;
925
926 /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
927 * and the subdir we mount over with a read-only file system readable by the service's user */
928 q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
929 if (!q)
930 return -ENOMEM;
931
932 r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
933 if (r < 0 && r != -EEXIST)
934 return r;
935
936 p = path_join(q, unit);
937 if (!p)
938 return -ENOMEM;
939
940 r = mkdir_label(p, 0700); /* per-unit dir: private to user */
941 if (r < 0 && r != -EEXIST)
942 return r;
943
e9ccae31 944 r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
602c74cf 945 if (r < 0) {
2cb6b3b7
MY
946 _cleanup_(rmdir_and_freep) char *u = NULL; /* remove the temporary workspace if we can */
947 _cleanup_free_ char *t = NULL;
602c74cf
YW
948
949 /* If this is not a privilege or support issue then propagate the error */
950 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
951 return r;
952
953 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
954 * it into place, so that users can't access half-initialized credential stores. */
955 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
956 if (!t)
957 return -ENOMEM;
958
959 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
960 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
961 * after it is fully set up */
962 u = path_join(t, unit);
963 if (!u)
964 return -ENOMEM;
965
966 FOREACH_STRING(i, t, u) {
967 r = mkdir_label(i, 0700);
968 if (r < 0 && r != -EEXIST)
969 return r;
970 }
971
972 r = setup_credentials_internal(
973 context,
974 params,
975 unit,
976 p, /* final mount point */
977 u, /* temporary workspace to overmount */
978 true, /* reuse the workspace if it is already a mount */
979 false, /* it's OK to fall back to a plain directory if we can't mount anything */
980 uid,
73ff4d48 981 gid);
602c74cf
YW
982 if (r < 0)
983 return r;
984
73ff4d48 985 } else if (r == 0) {
602c74cf
YW
986
987 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
988 * we can use the same directory for all cases, after turning off propagation. Question
989 * though is: where do we turn off propagation exactly, and where do we place the workspace
990 * directory? We need some place that is guaranteed to be a mount point in the host, and
991 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
992 * since we ultimately want to move the resulting file system there, i.e. we need propagation
993 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
994 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
995 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
996 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
997 * propagation on the former, and then overmount the latter.
998 *
999 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
1000 * for this purpose, but there are few other candidates that work equally well for us, and
1001 * given that we do this in a privately namespaced short-lived single-threaded process that
1002 * no one else sees this should be OK to do. */
1003
1004 /* Turn off propagation from our namespace to host */
1005 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
1006 if (r < 0)
1007 goto child_fail;
1008
1009 r = setup_credentials_internal(
1010 context,
1011 params,
1012 unit,
1013 p, /* final mount point */
1014 "/dev/shm", /* temporary workspace to overmount */
1015 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1016 true, /* insist that something is mounted, do not allow fallback to plain directory */
1017 uid,
73ff4d48 1018 gid);
602c74cf
YW
1019 if (r < 0)
1020 goto child_fail;
1021
1022 _exit(EXIT_SUCCESS);
1023
1024 child_fail:
1025 _exit(EXIT_FAILURE);
1026 }
1027
1028 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1029 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1030 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1031 * seen by users when trying access this inode. */
1032 (void) rmdir(p);
1033 return 0;
1034}