]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/exec-credential.c
process-util: add new FORK_DEATHSIG_SIGKILL flag, rename FORK_DEATHSIG → FORK_DEATHSI...
[thirdparty/systemd.git] / src / core / exec-credential.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/mount.h>
4
5 #include "acl-util.h"
6 #include "creds-util.h"
7 #include "exec-credential.h"
8 #include "execute.h"
9 #include "fileio.h"
10 #include "glob-util.h"
11 #include "io-util.h"
12 #include "label-util.h"
13 #include "mkdir-label.h"
14 #include "mount-util.h"
15 #include "mount.h"
16 #include "mountpoint-util.h"
17 #include "process-util.h"
18 #include "random-util.h"
19 #include "recurse-dir.h"
20 #include "rm-rf.h"
21 #include "tmpfile-util.h"
22
23 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
24 if (!sc)
25 return NULL;
26
27 free(sc->id);
28 free(sc->data);
29 return mfree(sc);
30 }
31
32 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
33 if (!lc)
34 return NULL;
35
36 free(lc->id);
37 free(lc->path);
38 return mfree(lc);
39 }
40
41 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
42 exec_set_credential_hash_ops,
43 char, string_hash_func, string_compare_func,
44 ExecSetCredential, exec_set_credential_free);
45
46 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
47 exec_load_credential_hash_ops,
48 char, string_hash_func, string_compare_func,
49 ExecLoadCredential, exec_load_credential_free);
50
51 bool exec_context_has_credentials(const ExecContext *c) {
52 assert(c);
53
54 return !hashmap_isempty(c->set_credentials) ||
55 !hashmap_isempty(c->load_credentials) ||
56 !set_isempty(c->import_credentials);
57 }
58
59 bool exec_context_has_encrypted_credentials(ExecContext *c) {
60 ExecLoadCredential *load_cred;
61 ExecSetCredential *set_cred;
62
63 assert(c);
64
65 HASHMAP_FOREACH(load_cred, c->load_credentials)
66 if (load_cred->encrypted)
67 return true;
68
69 HASHMAP_FOREACH(set_cred, c->set_credentials)
70 if (set_cred->encrypted)
71 return true;
72
73 return false;
74 }
75
76 static int get_credential_directory(
77 const char *runtime_prefix,
78 const char *unit,
79 char **ret) {
80
81 char *p;
82
83 assert(ret);
84
85 if (!runtime_prefix || !unit) {
86 *ret = NULL;
87 return 0;
88 }
89
90 p = path_join(runtime_prefix, "credentials", unit);
91 if (!p)
92 return -ENOMEM;
93
94 *ret = p;
95 return 1;
96 }
97
98 int exec_context_get_credential_directory(
99 const ExecContext *context,
100 const ExecParameters *params,
101 const char *unit,
102 char **ret) {
103
104 assert(context);
105 assert(params);
106 assert(unit);
107 assert(ret);
108
109 if (!exec_context_has_credentials(context)) {
110 *ret = NULL;
111 return 0;
112 }
113
114 return get_credential_directory(params->prefix[EXEC_DIRECTORY_RUNTIME], unit, ret);
115 }
116
117 int unit_add_default_credential_dependencies(Unit *u, const ExecContext *c) {
118 _cleanup_free_ char *p = NULL, *m = NULL;
119 int r;
120
121 assert(u);
122 assert(c);
123
124 if (!exec_context_has_credentials(c))
125 return 0;
126
127 /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
128 * shuts down. This only matters if mount namespacing is not used for the service, and hence the
129 * credentials mount appears on the host. */
130
131 r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
132 if (r <= 0)
133 return r;
134
135 r = unit_name_from_path(p, ".mount", &m);
136 if (r < 0)
137 return r;
138
139 return unit_add_dependency_by_name(u, UNIT_AFTER, m, /* add_reference= */ true, UNIT_DEPENDENCY_FILE);
140 }
141
142 int exec_context_destroy_credentials(Unit *u) {
143 _cleanup_free_ char *p = NULL;
144 int r;
145
146 assert(u);
147
148 r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
149 if (r <= 0)
150 return r;
151
152 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
153 * unmount it, and afterwards remove the mount point */
154 if (umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW) >= 0)
155 (void) mount_invalidate_state_by_path(u->manager, p);
156
157 (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
158
159 return 0;
160 }
161
162 static int write_credential(
163 int dfd,
164 const char *id,
165 const void *data,
166 size_t size,
167 uid_t uid,
168 gid_t gid,
169 bool ownership_ok) {
170
171 _cleanup_(unlink_and_freep) char *tmp = NULL;
172 _cleanup_close_ int fd = -EBADF;
173 int r;
174
175 r = tempfn_random_child("", "cred", &tmp);
176 if (r < 0)
177 return r;
178
179 fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
180 if (fd < 0) {
181 tmp = mfree(tmp);
182 return -errno;
183 }
184
185 r = loop_write(fd, data, size);
186 if (r < 0)
187 return r;
188
189 if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
190 return -errno;
191
192 if (uid_is_valid(uid) && uid != getuid()) {
193 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
194 if (r < 0) {
195 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
196 return r;
197
198 if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
199 * to express: that the user gets read access and nothing
200 * else. But if the backing fs can't support that (e.g. ramfs)
201 * then we can use file ownership instead. But that's only safe if
202 * we can then re-mount the whole thing read-only, so that the
203 * user can no longer chmod() the file to gain write access. */
204 return r;
205
206 if (fchown(fd, uid, gid) < 0)
207 return -errno;
208 }
209 }
210
211 if (renameat(dfd, tmp, dfd, id) < 0)
212 return -errno;
213
214 tmp = mfree(tmp);
215 return 0;
216 }
217
218 typedef enum CredentialSearchPath {
219 CREDENTIAL_SEARCH_PATH_TRUSTED,
220 CREDENTIAL_SEARCH_PATH_ENCRYPTED,
221 CREDENTIAL_SEARCH_PATH_ALL,
222 _CREDENTIAL_SEARCH_PATH_MAX,
223 _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
224 } CredentialSearchPath;
225
226 static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
227
228 _cleanup_strv_free_ char **l = NULL;
229
230 assert(params);
231 assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
232
233 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
234 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
235 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
236
237 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
238 if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
239 return NULL;
240
241 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
242 return NULL;
243 }
244
245 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
246 if (params->received_credentials_directory)
247 if (strv_extend(&l, params->received_credentials_directory) < 0)
248 return NULL;
249
250 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
251 return NULL;
252 }
253
254 if (DEBUG_LOGGING) {
255 _cleanup_free_ char *t = strv_join(l, ":");
256
257 log_debug("Credential search path is: %s", strempty(t));
258 }
259
260 return TAKE_PTR(l);
261 }
262
263 static int maybe_decrypt_and_write_credential(
264 int dir_fd,
265 const char *id,
266 bool encrypted,
267 uid_t uid,
268 gid_t gid,
269 bool ownership_ok,
270 const char *data,
271 size_t size,
272 uint64_t *left) {
273
274 _cleanup_free_ void *plaintext = NULL;
275 size_t add;
276 int r;
277
278 if (encrypted) {
279 size_t plaintext_size = 0;
280
281 r = decrypt_credential_and_warn(id, now(CLOCK_REALTIME), NULL, NULL, data, size,
282 &plaintext, &plaintext_size);
283 if (r < 0)
284 return r;
285
286 data = plaintext;
287 size = plaintext_size;
288 }
289
290 add = strlen(id) + size;
291 if (add > *left)
292 return -E2BIG;
293
294 r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
295 if (r < 0)
296 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
297
298 *left -= add;
299 return 0;
300 }
301
302 static int load_credential_glob(
303 const char *path,
304 bool encrypted,
305 char **search_path,
306 ReadFullFileFlags flags,
307 int write_dfd,
308 uid_t uid,
309 gid_t gid,
310 bool ownership_ok,
311 uint64_t *left) {
312
313 int r;
314
315 STRV_FOREACH(d, search_path) {
316 _cleanup_globfree_ glob_t pglob = {};
317 _cleanup_free_ char *j = NULL;
318
319 j = path_join(*d, path);
320 if (!j)
321 return -ENOMEM;
322
323 r = safe_glob(j, 0, &pglob);
324 if (r == -ENOENT)
325 continue;
326 if (r < 0)
327 return r;
328
329 for (size_t n = 0; n < pglob.gl_pathc; n++) {
330 _cleanup_free_ char *fn = NULL;
331 _cleanup_(erase_and_freep) char *data = NULL;
332 size_t size;
333
334 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
335 r = read_full_file_full(
336 AT_FDCWD,
337 pglob.gl_pathv[n],
338 UINT64_MAX,
339 encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
340 flags,
341 NULL,
342 &data, &size);
343 if (r < 0)
344 return log_debug_errno(r, "Failed to read credential '%s': %m",
345 pglob.gl_pathv[n]);
346
347 r = path_extract_filename(pglob.gl_pathv[n], &fn);
348 if (r < 0)
349 return log_debug_errno(r, "Failed to extract filename from '%s': %m",
350 pglob.gl_pathv[n]);
351
352 r = maybe_decrypt_and_write_credential(
353 write_dfd,
354 fn,
355 encrypted,
356 uid,
357 gid,
358 ownership_ok,
359 data, size,
360 left);
361 if (r == -EEXIST)
362 continue;
363 if (r < 0)
364 return r;
365 }
366 }
367
368 return 0;
369 }
370
371 static int load_credential(
372 const ExecContext *context,
373 const ExecParameters *params,
374 const char *id,
375 const char *path,
376 bool encrypted,
377 const char *unit,
378 int read_dfd,
379 int write_dfd,
380 uid_t uid,
381 gid_t gid,
382 bool ownership_ok,
383 uint64_t *left) {
384
385 ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
386 _cleanup_strv_free_ char **search_path = NULL;
387 _cleanup_(erase_and_freep) char *data = NULL;
388 _cleanup_free_ char *bindname = NULL;
389 const char *source = NULL;
390 bool missing_ok = true;
391 size_t size, maxsz;
392 int r;
393
394 assert(context);
395 assert(params);
396 assert(id);
397 assert(path);
398 assert(unit);
399 assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
400 assert(write_dfd >= 0);
401 assert(left);
402
403 if (read_dfd >= 0) {
404 /* If a directory fd is specified, then read the file directly from that dir. In this case we
405 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
406 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
407 * open it. */
408
409 if (!filename_is_valid(path)) /* safety check */
410 return -EINVAL;
411
412 missing_ok = true;
413 source = path;
414
415 } else if (path_is_absolute(path)) {
416 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
417 * sockets */
418
419 if (!path_is_valid(path)) /* safety check */
420 return -EINVAL;
421
422 flags |= READ_FULL_FILE_CONNECT_SOCKET;
423
424 /* Pass some minimal info about the unit and the credential name we are looking to acquire
425 * via the source socket address in case we read off an AF_UNIX socket. */
426 if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
427 return -ENOMEM;
428
429 missing_ok = false;
430 source = path;
431
432 } else if (credential_name_valid(path)) {
433 /* If this is a relative path, take it as credential name relative to the credentials
434 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
435 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
436
437 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
438 if (!search_path)
439 return -ENOMEM;
440
441 missing_ok = true;
442 } else
443 source = NULL;
444
445 if (encrypted)
446 flags |= READ_FULL_FILE_UNBASE64;
447
448 maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
449
450 if (search_path) {
451 STRV_FOREACH(d, search_path) {
452 _cleanup_free_ char *j = NULL;
453
454 j = path_join(*d, path);
455 if (!j)
456 return -ENOMEM;
457
458 r = read_full_file_full(
459 AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
460 UINT64_MAX,
461 maxsz,
462 flags,
463 NULL,
464 &data, &size);
465 if (r != -ENOENT)
466 break;
467 }
468 } else if (source)
469 r = read_full_file_full(
470 read_dfd, source,
471 UINT64_MAX,
472 maxsz,
473 flags,
474 bindname,
475 &data, &size);
476 else
477 r = -ENOENT;
478
479 if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
480 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
481 * will get clear errors if we don't pass such a missing credential on as they
482 * themselves will get ENOENT when trying to read them, which should not be much
483 * worse than when we handle the error here and make it fatal.
484 *
485 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
486 * we are fine, too. */
487 log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
488 return 0;
489 }
490 if (r < 0)
491 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
492
493 return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
494 }
495
496 struct load_cred_args {
497 const ExecContext *context;
498 const ExecParameters *params;
499 bool encrypted;
500 const char *unit;
501 int dfd;
502 uid_t uid;
503 gid_t gid;
504 bool ownership_ok;
505 uint64_t *left;
506 };
507
508 static int load_cred_recurse_dir_cb(
509 RecurseDirEvent event,
510 const char *path,
511 int dir_fd,
512 int inode_fd,
513 const struct dirent *de,
514 const struct statx *sx,
515 void *userdata) {
516
517 struct load_cred_args *args = ASSERT_PTR(userdata);
518 _cleanup_free_ char *sub_id = NULL;
519 int r;
520
521 if (event != RECURSE_DIR_ENTRY)
522 return RECURSE_DIR_CONTINUE;
523
524 if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
525 return RECURSE_DIR_CONTINUE;
526
527 sub_id = strreplace(path, "/", "_");
528 if (!sub_id)
529 return -ENOMEM;
530
531 if (!credential_name_valid(sub_id))
532 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
533
534 if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
535 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
536 return RECURSE_DIR_CONTINUE;
537 }
538 if (errno != ENOENT)
539 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
540
541 r = load_credential(
542 args->context,
543 args->params,
544 sub_id,
545 de->d_name,
546 args->encrypted,
547 args->unit,
548 dir_fd,
549 args->dfd,
550 args->uid,
551 args->gid,
552 args->ownership_ok,
553 args->left);
554 if (r < 0)
555 return r;
556
557 return RECURSE_DIR_CONTINUE;
558 }
559
560 static int acquire_credentials(
561 const ExecContext *context,
562 const ExecParameters *params,
563 const char *unit,
564 const char *p,
565 uid_t uid,
566 gid_t gid,
567 bool ownership_ok) {
568
569 uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
570 _cleanup_close_ int dfd = -EBADF;
571 const char *ic;
572 ExecLoadCredential *lc;
573 ExecSetCredential *sc;
574 int r;
575
576 assert(context);
577 assert(p);
578
579 dfd = open(p, O_DIRECTORY|O_CLOEXEC);
580 if (dfd < 0)
581 return -errno;
582
583 r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
584 if (r < 0)
585 return r;
586
587 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
588 HASHMAP_FOREACH(lc, context->load_credentials) {
589 _cleanup_close_ int sub_fd = -EBADF;
590
591 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
592 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
593 * a regular file. Finally, if it's a relative path we will use it as a credential name to
594 * propagate a credential passed to us from further up. */
595
596 if (path_is_absolute(lc->path)) {
597 sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
598 if (sub_fd < 0 && !IN_SET(errno,
599 ENOTDIR, /* Not a directory */
600 ENOENT)) /* Doesn't exist? */
601 return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
602 }
603
604 if (sub_fd < 0)
605 /* Regular file (incl. a credential passed in from higher up) */
606 r = load_credential(
607 context,
608 params,
609 lc->id,
610 lc->path,
611 lc->encrypted,
612 unit,
613 AT_FDCWD,
614 dfd,
615 uid,
616 gid,
617 ownership_ok,
618 &left);
619 else
620 /* Directory */
621 r = recurse_dir(
622 sub_fd,
623 /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
624 /* statx_mask= */ 0,
625 /* n_depth_max= */ UINT_MAX,
626 RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
627 load_cred_recurse_dir_cb,
628 &(struct load_cred_args) {
629 .context = context,
630 .params = params,
631 .encrypted = lc->encrypted,
632 .unit = unit,
633 .dfd = dfd,
634 .uid = uid,
635 .gid = gid,
636 .ownership_ok = ownership_ok,
637 .left = &left,
638 });
639 if (r < 0)
640 return r;
641 }
642
643 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
644 * override any credentials found earlier. */
645 SET_FOREACH(ic, context->import_credentials) {
646 _cleanup_free_ char **search_path = NULL;
647
648 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
649 if (!search_path)
650 return -ENOMEM;
651
652 r = load_credential_glob(
653 ic,
654 /* encrypted = */ false,
655 search_path,
656 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
657 dfd,
658 uid,
659 gid,
660 ownership_ok,
661 &left);
662 if (r < 0)
663 return r;
664
665 search_path = strv_free(search_path);
666 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
667 if (!search_path)
668 return -ENOMEM;
669
670 r = load_credential_glob(
671 ic,
672 /* encrypted = */ true,
673 search_path,
674 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
675 dfd,
676 uid,
677 gid,
678 ownership_ok,
679 &left);
680 if (r < 0)
681 return r;
682 }
683
684 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
685 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
686 HASHMAP_FOREACH(sc, context->set_credentials) {
687 _cleanup_(erase_and_freep) void *plaintext = NULL;
688 const char *data;
689 size_t size, add;
690
691 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
692 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
693 * slow and involved, hence it's nice to be able to skip that if the credential already
694 * exists anyway. */
695 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
696 continue;
697 if (errno != ENOENT)
698 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
699
700 if (sc->encrypted) {
701 r = decrypt_credential_and_warn(sc->id, now(CLOCK_REALTIME), NULL, NULL, sc->data, sc->size, &plaintext, &size);
702 if (r < 0)
703 return r;
704
705 data = plaintext;
706 } else {
707 data = sc->data;
708 size = sc->size;
709 }
710
711 add = strlen(sc->id) + size;
712 if (add > left)
713 return -E2BIG;
714
715 r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
716 if (r < 0)
717 return r;
718
719 left -= add;
720 }
721
722 r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
723 if (r < 0)
724 return r;
725
726 /* After we created all keys with the right perms, also make sure the credential store as a whole is
727 * accessible */
728
729 if (uid_is_valid(uid) && uid != getuid()) {
730 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
731 if (r < 0) {
732 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
733 return r;
734
735 if (!ownership_ok)
736 return r;
737
738 if (fchown(dfd, uid, gid) < 0)
739 return -errno;
740 }
741 }
742
743 return 0;
744 }
745
746 static int setup_credentials_internal(
747 const ExecContext *context,
748 const ExecParameters *params,
749 const char *unit,
750 const char *final, /* This is where the credential store shall eventually end up at */
751 const char *workspace, /* This is where we can prepare it before moving it to the final place */
752 bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
753 bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
754 uid_t uid,
755 gid_t gid) {
756
757 int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
758 * if we mounted something; false if we definitely can't mount anything */
759 bool final_mounted;
760 const char *where;
761
762 assert(context);
763 assert(final);
764 assert(workspace);
765
766 if (reuse_workspace) {
767 r = path_is_mount_point(workspace, NULL, 0);
768 if (r < 0)
769 return r;
770 if (r > 0)
771 workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
772 * it, let's keep this in mind */
773 else
774 workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
775 } else
776 workspace_mounted = -1; /* ditto */
777
778 r = path_is_mount_point(final, NULL, 0);
779 if (r < 0)
780 return r;
781 if (r > 0) {
782 /* If the final place already has something mounted, we use that. If the workspace also has
783 * something mounted we assume it's actually the same mount (but with MS_RDONLY
784 * different). */
785 final_mounted = true;
786
787 if (workspace_mounted < 0) {
788 /* If the final place is mounted, but the workspace isn't, then let's bind mount
789 * the final version to the workspace, and make it writable, so that we can make
790 * changes */
791
792 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
793 if (r < 0)
794 return r;
795
796 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
797 if (r < 0)
798 return r;
799
800 workspace_mounted = true;
801 }
802 } else
803 final_mounted = false;
804
805 if (workspace_mounted < 0) {
806 /* Nothing is mounted on the workspace yet, let's try to mount something now */
807
808 r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
809 if (r < 0) {
810 /* If that didn't work, try to make a bind mount from the final to the workspace, so
811 * that we can make it writable there. */
812 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
813 if (r < 0) {
814 if (!ERRNO_IS_PRIVILEGE(r))
815 /* Propagate anything that isn't a permission problem. */
816 return r;
817
818 if (must_mount)
819 /* If it's not OK to use the plain directory fallback, propagate all
820 * errors too. */
821 return r;
822
823 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
824 * for compat with container envs, and just use the final dir as is. */
825
826 workspace_mounted = false;
827 } else {
828 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
829 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
830 if (r < 0)
831 return r;
832
833 workspace_mounted = true;
834 }
835 } else
836 workspace_mounted = true;
837 }
838
839 assert(!must_mount || workspace_mounted > 0);
840 where = workspace_mounted ? workspace : final;
841
842 (void) label_fix_full(AT_FDCWD, where, final, 0);
843
844 r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
845 if (r < 0)
846 return r;
847
848 if (workspace_mounted) {
849 bool install;
850
851 /* Determine if we should actually install the prepared mount in the final location by bind
852 * mounting it there. We do so only if the mount is not established there already, and if the
853 * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
854 * case we are doing all this in a mount namespace, thus no one else will see that we
855 * allocated a file system we are getting rid of again here. */
856 if (final_mounted)
857 install = false; /* already installed */
858 else {
859 r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
860 if (r < 0)
861 return r;
862
863 install = r == 0; /* install only if non-empty */
864 }
865
866 if (install) {
867 /* Make workspace read-only now, so that any bind mount we make from it defaults to
868 * read-only too */
869 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
870 if (r < 0)
871 return r;
872
873 /* And mount it to the final place, read-only */
874 r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
875 } else
876 /* Otherwise get rid of it */
877 r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
878 if (r < 0)
879 return r;
880 } else {
881 _cleanup_free_ char *parent = NULL;
882
883 /* If we do not have our own mount put used the plain directory fallback, then we need to
884 * open access to the top-level credential directory and the per-service directory now */
885
886 r = path_extract_directory(final, &parent);
887 if (r < 0)
888 return r;
889 if (chmod(parent, 0755) < 0)
890 return -errno;
891 }
892
893 return 0;
894 }
895
896 int exec_setup_credentials(
897 const ExecContext *context,
898 const ExecParameters *params,
899 const char *unit,
900 uid_t uid,
901 gid_t gid) {
902
903 _cleanup_free_ char *p = NULL, *q = NULL;
904 int r;
905
906 assert(context);
907 assert(params);
908
909 if (!exec_context_has_credentials(context))
910 return 0;
911
912 if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
913 return -EINVAL;
914
915 /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
916 * and the subdir we mount over with a read-only file system readable by the service's user */
917 q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
918 if (!q)
919 return -ENOMEM;
920
921 r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
922 if (r < 0 && r != -EEXIST)
923 return r;
924
925 p = path_join(q, unit);
926 if (!p)
927 return -ENOMEM;
928
929 r = mkdir_label(p, 0700); /* per-unit dir: private to user */
930 if (r < 0 && r != -EEXIST)
931 return r;
932
933 r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
934 if (r < 0) {
935 _cleanup_(rmdir_and_freep) char *u = NULL; /* remove the temporary workspace if we can */
936 _cleanup_free_ char *t = NULL;
937
938 /* If this is not a privilege or support issue then propagate the error */
939 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
940 return r;
941
942 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
943 * it into place, so that users can't access half-initialized credential stores. */
944 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
945 if (!t)
946 return -ENOMEM;
947
948 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
949 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
950 * after it is fully set up */
951 u = path_join(t, unit);
952 if (!u)
953 return -ENOMEM;
954
955 FOREACH_STRING(i, t, u) {
956 r = mkdir_label(i, 0700);
957 if (r < 0 && r != -EEXIST)
958 return r;
959 }
960
961 r = setup_credentials_internal(
962 context,
963 params,
964 unit,
965 p, /* final mount point */
966 u, /* temporary workspace to overmount */
967 true, /* reuse the workspace if it is already a mount */
968 false, /* it's OK to fall back to a plain directory if we can't mount anything */
969 uid,
970 gid);
971 if (r < 0)
972 return r;
973
974 } else if (r == 0) {
975
976 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
977 * we can use the same directory for all cases, after turning off propagation. Question
978 * though is: where do we turn off propagation exactly, and where do we place the workspace
979 * directory? We need some place that is guaranteed to be a mount point in the host, and
980 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
981 * since we ultimately want to move the resulting file system there, i.e. we need propagation
982 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
983 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
984 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
985 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
986 * propagation on the former, and then overmount the latter.
987 *
988 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
989 * for this purpose, but there are few other candidates that work equally well for us, and
990 * given that we do this in a privately namespaced short-lived single-threaded process that
991 * no one else sees this should be OK to do. */
992
993 /* Turn off propagation from our namespace to host */
994 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
995 if (r < 0)
996 goto child_fail;
997
998 r = setup_credentials_internal(
999 context,
1000 params,
1001 unit,
1002 p, /* final mount point */
1003 "/dev/shm", /* temporary workspace to overmount */
1004 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1005 true, /* insist that something is mounted, do not allow fallback to plain directory */
1006 uid,
1007 gid);
1008 if (r < 0)
1009 goto child_fail;
1010
1011 _exit(EXIT_SUCCESS);
1012
1013 child_fail:
1014 _exit(EXIT_FAILURE);
1015 }
1016
1017 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1018 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1019 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1020 * seen by users when trying access this inode. */
1021 (void) rmdir(p);
1022 return 0;
1023 }