]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/credential.c
Revert "core: do not leak mount for credentials directory if mount namespace is enabled"
[thirdparty/systemd.git] / src / core / credential.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/mount.h>
4
5 #include "acl-util.h"
6 #include "credential.h"
7 #include "creds-util.h"
8 #include "execute.h"
9 #include "fileio.h"
10 #include "glob-util.h"
11 #include "io-util.h"
12 #include "label-util.h"
13 #include "mkdir-label.h"
14 #include "mount-util.h"
15 #include "mountpoint-util.h"
16 #include "process-util.h"
17 #include "random-util.h"
18 #include "recurse-dir.h"
19 #include "rm-rf.h"
20 #include "tmpfile-util.h"
21
22 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
23 if (!sc)
24 return NULL;
25
26 free(sc->id);
27 free(sc->data);
28 return mfree(sc);
29 }
30
31 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
32 if (!lc)
33 return NULL;
34
35 free(lc->id);
36 free(lc->path);
37 return mfree(lc);
38 }
39
40 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
41 exec_set_credential_hash_ops,
42 char, string_hash_func, string_compare_func,
43 ExecSetCredential, exec_set_credential_free);
44
45 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
46 exec_load_credential_hash_ops,
47 char, string_hash_func, string_compare_func,
48 ExecLoadCredential, exec_load_credential_free);
49
50 bool exec_context_has_credentials(const ExecContext *c) {
51 assert(c);
52
53 return !hashmap_isempty(c->set_credentials) ||
54 !hashmap_isempty(c->load_credentials) ||
55 !set_isempty(c->import_credentials);
56 }
57
58 bool exec_context_has_encrypted_credentials(ExecContext *c) {
59 ExecLoadCredential *load_cred;
60 ExecSetCredential *set_cred;
61
62 assert(c);
63
64 HASHMAP_FOREACH(load_cred, c->load_credentials)
65 if (load_cred->encrypted)
66 return true;
67
68 HASHMAP_FOREACH(set_cred, c->set_credentials)
69 if (set_cred->encrypted)
70 return true;
71
72 return false;
73 }
74
75 static int get_credential_directory(
76 const char *runtime_prefix,
77 const char *unit,
78 char **ret) {
79
80 char *p;
81
82 assert(ret);
83
84 if (!runtime_prefix || !unit) {
85 *ret = NULL;
86 return 0;
87 }
88
89 p = path_join(runtime_prefix, "credentials", unit);
90 if (!p)
91 return -ENOMEM;
92
93 *ret = p;
94 return 1;
95 }
96
97 int unit_add_default_credential_dependencies(Unit *u, const ExecContext *c) {
98 _cleanup_free_ char *p = NULL, *m = NULL;
99 int r;
100
101 assert(u);
102 assert(c);
103
104 if (!exec_context_has_credentials(c))
105 return 0;
106
107 /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
108 * shuts down. This only matters if mount namespacing is not used for the service, and hence the
109 * credentials mount appears on the host. */
110
111 r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
112 if (r <= 0)
113 return r;
114
115 r = unit_name_from_path(p, ".mount", &m);
116 if (r < 0)
117 return r;
118
119 return unit_add_dependency_by_name(u, UNIT_AFTER, m, /* add_reference= */ true, UNIT_DEPENDENCY_FILE);
120 }
121
122 int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_prefix, const char *unit) {
123 _cleanup_free_ char *p = NULL;
124 int r;
125
126 assert(c);
127
128 r = get_credential_directory(runtime_prefix, unit, &p);
129 if (r <= 0)
130 return r;
131
132 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
133 * unmount it, and afterwards remove the mount point */
134 (void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW);
135 (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
136
137 return 0;
138 }
139
140 static int write_credential(
141 int dfd,
142 const char *id,
143 const void *data,
144 size_t size,
145 uid_t uid,
146 gid_t gid,
147 bool ownership_ok) {
148
149 _cleanup_(unlink_and_freep) char *tmp = NULL;
150 _cleanup_close_ int fd = -EBADF;
151 int r;
152
153 r = tempfn_random_child("", "cred", &tmp);
154 if (r < 0)
155 return r;
156
157 fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
158 if (fd < 0) {
159 tmp = mfree(tmp);
160 return -errno;
161 }
162
163 r = loop_write(fd, data, size, /* do_poll = */ false);
164 if (r < 0)
165 return r;
166
167 if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
168 return -errno;
169
170 if (uid_is_valid(uid) && uid != getuid()) {
171 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
172 if (r < 0) {
173 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
174 return r;
175
176 if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
177 * to express: that the user gets read access and nothing
178 * else. But if the backing fs can't support that (e.g. ramfs)
179 * then we can use file ownership instead. But that's only safe if
180 * we can then re-mount the whole thing read-only, so that the
181 * user can no longer chmod() the file to gain write access. */
182 return r;
183
184 if (fchown(fd, uid, gid) < 0)
185 return -errno;
186 }
187 }
188
189 if (renameat(dfd, tmp, dfd, id) < 0)
190 return -errno;
191
192 tmp = mfree(tmp);
193 return 0;
194 }
195
196 typedef enum CredentialSearchPath {
197 CREDENTIAL_SEARCH_PATH_TRUSTED,
198 CREDENTIAL_SEARCH_PATH_ENCRYPTED,
199 CREDENTIAL_SEARCH_PATH_ALL,
200 _CREDENTIAL_SEARCH_PATH_MAX,
201 _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
202 } CredentialSearchPath;
203
204 static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
205
206 _cleanup_strv_free_ char **l = NULL;
207
208 assert(params);
209 assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
210
211 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
212 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
213 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
214
215 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
216 if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
217 return NULL;
218
219 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
220 return NULL;
221 }
222
223 if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
224 if (params->received_credentials_directory)
225 if (strv_extend(&l, params->received_credentials_directory) < 0)
226 return NULL;
227
228 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
229 return NULL;
230 }
231
232 if (DEBUG_LOGGING) {
233 _cleanup_free_ char *t = strv_join(l, ":");
234
235 log_debug("Credential search path is: %s", strempty(t));
236 }
237
238 return TAKE_PTR(l);
239 }
240
241 static int maybe_decrypt_and_write_credential(
242 int dir_fd,
243 const char *id,
244 bool encrypted,
245 uid_t uid,
246 gid_t gid,
247 bool ownership_ok,
248 const char *data,
249 size_t size,
250 uint64_t *left) {
251
252 _cleanup_free_ void *plaintext = NULL;
253 size_t add;
254 int r;
255
256 if (encrypted) {
257 size_t plaintext_size = 0;
258
259 r = decrypt_credential_and_warn(id, now(CLOCK_REALTIME), NULL, NULL, data, size,
260 &plaintext, &plaintext_size);
261 if (r < 0)
262 return r;
263
264 data = plaintext;
265 size = plaintext_size;
266 }
267
268 add = strlen(id) + size;
269 if (add > *left)
270 return -E2BIG;
271
272 r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
273 if (r < 0)
274 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
275
276 *left -= add;
277 return 0;
278 }
279
280 static int load_credential_glob(
281 const char *path,
282 bool encrypted,
283 char **search_path,
284 ReadFullFileFlags flags,
285 int write_dfd,
286 uid_t uid,
287 gid_t gid,
288 bool ownership_ok,
289 uint64_t *left) {
290
291 int r;
292
293 STRV_FOREACH(d, search_path) {
294 _cleanup_globfree_ glob_t pglob = {};
295 _cleanup_free_ char *j = NULL;
296
297 j = path_join(*d, path);
298 if (!j)
299 return -ENOMEM;
300
301 r = safe_glob(j, 0, &pglob);
302 if (r == -ENOENT)
303 continue;
304 if (r < 0)
305 return r;
306
307 for (size_t n = 0; n < pglob.gl_pathc; n++) {
308 _cleanup_free_ char *fn = NULL;
309 _cleanup_(erase_and_freep) char *data = NULL;
310 size_t size;
311
312 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
313 r = read_full_file_full(
314 AT_FDCWD,
315 pglob.gl_pathv[n],
316 UINT64_MAX,
317 encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
318 flags,
319 NULL,
320 &data, &size);
321 if (r < 0)
322 return log_debug_errno(r, "Failed to read credential '%s': %m",
323 pglob.gl_pathv[n]);
324
325 r = path_extract_filename(pglob.gl_pathv[n], &fn);
326 if (r < 0)
327 return log_debug_errno(r, "Failed to extract filename from '%s': %m",
328 pglob.gl_pathv[n]);
329
330 r = maybe_decrypt_and_write_credential(
331 write_dfd,
332 fn,
333 encrypted,
334 uid,
335 gid,
336 ownership_ok,
337 data, size,
338 left);
339 if (r == -EEXIST)
340 continue;
341 if (r < 0)
342 return r;
343 }
344 }
345
346 return 0;
347 }
348
349 static int load_credential(
350 const ExecContext *context,
351 const ExecParameters *params,
352 const char *id,
353 const char *path,
354 bool encrypted,
355 const char *unit,
356 int read_dfd,
357 int write_dfd,
358 uid_t uid,
359 gid_t gid,
360 bool ownership_ok,
361 uint64_t *left) {
362
363 ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
364 _cleanup_strv_free_ char **search_path = NULL;
365 _cleanup_(erase_and_freep) char *data = NULL;
366 _cleanup_free_ char *bindname = NULL;
367 const char *source = NULL;
368 bool missing_ok = true;
369 size_t size, maxsz;
370 int r;
371
372 assert(context);
373 assert(params);
374 assert(id);
375 assert(path);
376 assert(unit);
377 assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
378 assert(write_dfd >= 0);
379 assert(left);
380
381 if (read_dfd >= 0) {
382 /* If a directory fd is specified, then read the file directly from that dir. In this case we
383 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
384 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
385 * open it. */
386
387 if (!filename_is_valid(path)) /* safety check */
388 return -EINVAL;
389
390 missing_ok = true;
391 source = path;
392
393 } else if (path_is_absolute(path)) {
394 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
395 * sockets */
396
397 if (!path_is_valid(path)) /* safety check */
398 return -EINVAL;
399
400 flags |= READ_FULL_FILE_CONNECT_SOCKET;
401
402 /* Pass some minimal info about the unit and the credential name we are looking to acquire
403 * via the source socket address in case we read off an AF_UNIX socket. */
404 if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
405 return -ENOMEM;
406
407 missing_ok = false;
408 source = path;
409
410 } else if (credential_name_valid(path)) {
411 /* If this is a relative path, take it as credential name relative to the credentials
412 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
413 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
414
415 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
416 if (!search_path)
417 return -ENOMEM;
418
419 missing_ok = true;
420 } else
421 source = NULL;
422
423 if (encrypted)
424 flags |= READ_FULL_FILE_UNBASE64;
425
426 maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
427
428 if (search_path) {
429 STRV_FOREACH(d, search_path) {
430 _cleanup_free_ char *j = NULL;
431
432 j = path_join(*d, path);
433 if (!j)
434 return -ENOMEM;
435
436 r = read_full_file_full(
437 AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
438 UINT64_MAX,
439 maxsz,
440 flags,
441 NULL,
442 &data, &size);
443 if (r != -ENOENT)
444 break;
445 }
446 } else if (source)
447 r = read_full_file_full(
448 read_dfd, source,
449 UINT64_MAX,
450 maxsz,
451 flags,
452 bindname,
453 &data, &size);
454 else
455 r = -ENOENT;
456
457 if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
458 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
459 * will get clear errors if we don't pass such a missing credential on as they
460 * themselves will get ENOENT when trying to read them, which should not be much
461 * worse than when we handle the error here and make it fatal.
462 *
463 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
464 * we are fine, too. */
465 log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
466 return 0;
467 }
468 if (r < 0)
469 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
470
471 return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
472 }
473
474 struct load_cred_args {
475 const ExecContext *context;
476 const ExecParameters *params;
477 bool encrypted;
478 const char *unit;
479 int dfd;
480 uid_t uid;
481 gid_t gid;
482 bool ownership_ok;
483 uint64_t *left;
484 };
485
486 static int load_cred_recurse_dir_cb(
487 RecurseDirEvent event,
488 const char *path,
489 int dir_fd,
490 int inode_fd,
491 const struct dirent *de,
492 const struct statx *sx,
493 void *userdata) {
494
495 struct load_cred_args *args = ASSERT_PTR(userdata);
496 _cleanup_free_ char *sub_id = NULL;
497 int r;
498
499 if (event != RECURSE_DIR_ENTRY)
500 return RECURSE_DIR_CONTINUE;
501
502 if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
503 return RECURSE_DIR_CONTINUE;
504
505 sub_id = strreplace(path, "/", "_");
506 if (!sub_id)
507 return -ENOMEM;
508
509 if (!credential_name_valid(sub_id))
510 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
511
512 if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
513 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
514 return RECURSE_DIR_CONTINUE;
515 }
516 if (errno != ENOENT)
517 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
518
519 r = load_credential(
520 args->context,
521 args->params,
522 sub_id,
523 de->d_name,
524 args->encrypted,
525 args->unit,
526 dir_fd,
527 args->dfd,
528 args->uid,
529 args->gid,
530 args->ownership_ok,
531 args->left);
532 if (r < 0)
533 return r;
534
535 return RECURSE_DIR_CONTINUE;
536 }
537
538 static int acquire_credentials(
539 const ExecContext *context,
540 const ExecParameters *params,
541 const char *unit,
542 const char *p,
543 uid_t uid,
544 gid_t gid,
545 bool ownership_ok) {
546
547 uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
548 _cleanup_close_ int dfd = -EBADF;
549 const char *ic;
550 ExecLoadCredential *lc;
551 ExecSetCredential *sc;
552 int r;
553
554 assert(context);
555 assert(p);
556
557 dfd = open(p, O_DIRECTORY|O_CLOEXEC);
558 if (dfd < 0)
559 return -errno;
560
561 r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
562 if (r < 0)
563 return r;
564
565 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
566 HASHMAP_FOREACH(lc, context->load_credentials) {
567 _cleanup_close_ int sub_fd = -EBADF;
568
569 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
570 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
571 * a regular file. Finally, if it's a relative path we will use it as a credential name to
572 * propagate a credential passed to us from further up. */
573
574 if (path_is_absolute(lc->path)) {
575 sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
576 if (sub_fd < 0 && !IN_SET(errno,
577 ENOTDIR, /* Not a directory */
578 ENOENT)) /* Doesn't exist? */
579 return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
580 }
581
582 if (sub_fd < 0)
583 /* Regular file (incl. a credential passed in from higher up) */
584 r = load_credential(
585 context,
586 params,
587 lc->id,
588 lc->path,
589 lc->encrypted,
590 unit,
591 AT_FDCWD,
592 dfd,
593 uid,
594 gid,
595 ownership_ok,
596 &left);
597 else
598 /* Directory */
599 r = recurse_dir(
600 sub_fd,
601 /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
602 /* statx_mask= */ 0,
603 /* n_depth_max= */ UINT_MAX,
604 RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
605 load_cred_recurse_dir_cb,
606 &(struct load_cred_args) {
607 .context = context,
608 .params = params,
609 .encrypted = lc->encrypted,
610 .unit = unit,
611 .dfd = dfd,
612 .uid = uid,
613 .gid = gid,
614 .ownership_ok = ownership_ok,
615 .left = &left,
616 });
617 if (r < 0)
618 return r;
619 }
620
621 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
622 * override any credentials found earlier. */
623 SET_FOREACH(ic, context->import_credentials) {
624 _cleanup_free_ char **search_path = NULL;
625
626 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
627 if (!search_path)
628 return -ENOMEM;
629
630 r = load_credential_glob(
631 ic,
632 /* encrypted = */ false,
633 search_path,
634 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
635 dfd,
636 uid,
637 gid,
638 ownership_ok,
639 &left);
640 if (r < 0)
641 return r;
642
643 search_path = strv_free(search_path);
644 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
645 if (!search_path)
646 return -ENOMEM;
647
648 r = load_credential_glob(
649 ic,
650 /* encrypted = */ true,
651 search_path,
652 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
653 dfd,
654 uid,
655 gid,
656 ownership_ok,
657 &left);
658 if (r < 0)
659 return r;
660 }
661
662 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
663 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
664 HASHMAP_FOREACH(sc, context->set_credentials) {
665 _cleanup_(erase_and_freep) void *plaintext = NULL;
666 const char *data;
667 size_t size, add;
668
669 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
670 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
671 * slow and involved, hence it's nice to be able to skip that if the credential already
672 * exists anyway. */
673 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
674 continue;
675 if (errno != ENOENT)
676 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
677
678 if (sc->encrypted) {
679 r = decrypt_credential_and_warn(sc->id, now(CLOCK_REALTIME), NULL, NULL, sc->data, sc->size, &plaintext, &size);
680 if (r < 0)
681 return r;
682
683 data = plaintext;
684 } else {
685 data = sc->data;
686 size = sc->size;
687 }
688
689 add = strlen(sc->id) + size;
690 if (add > left)
691 return -E2BIG;
692
693 r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
694 if (r < 0)
695 return r;
696
697 left -= add;
698 }
699
700 r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
701 if (r < 0)
702 return r;
703
704 /* After we created all keys with the right perms, also make sure the credential store as a whole is
705 * accessible */
706
707 if (uid_is_valid(uid) && uid != getuid()) {
708 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
709 if (r < 0) {
710 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
711 return r;
712
713 if (!ownership_ok)
714 return r;
715
716 if (fchown(dfd, uid, gid) < 0)
717 return -errno;
718 }
719 }
720
721 return 0;
722 }
723
724 static int setup_credentials_internal(
725 const ExecContext *context,
726 const ExecParameters *params,
727 const char *unit,
728 const char *final, /* This is where the credential store shall eventually end up at */
729 const char *workspace, /* This is where we can prepare it before moving it to the final place */
730 bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
731 bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
732 uid_t uid,
733 gid_t gid) {
734
735 int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
736 * if we mounted something; false if we definitely can't mount anything */
737 bool final_mounted;
738 const char *where;
739
740 assert(context);
741 assert(final);
742 assert(workspace);
743
744 if (reuse_workspace) {
745 r = path_is_mount_point(workspace, NULL, 0);
746 if (r < 0)
747 return r;
748 if (r > 0)
749 workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
750 * it, let's keep this in mind */
751 else
752 workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
753 } else
754 workspace_mounted = -1; /* ditto */
755
756 r = path_is_mount_point(final, NULL, 0);
757 if (r < 0)
758 return r;
759 if (r > 0) {
760 /* If the final place already has something mounted, we use that. If the workspace also has
761 * something mounted we assume it's actually the same mount (but with MS_RDONLY
762 * different). */
763 final_mounted = true;
764
765 if (workspace_mounted < 0) {
766 /* If the final place is mounted, but the workspace isn't, then let's bind mount
767 * the final version to the workspace, and make it writable, so that we can make
768 * changes */
769
770 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
771 if (r < 0)
772 return r;
773
774 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
775 if (r < 0)
776 return r;
777
778 workspace_mounted = true;
779 }
780 } else
781 final_mounted = false;
782
783 if (workspace_mounted < 0) {
784 /* Nothing is mounted on the workspace yet, let's try to mount something now */
785
786 r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
787 if (r < 0) {
788 /* If that didn't work, try to make a bind mount from the final to the workspace, so
789 * that we can make it writable there. */
790 r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
791 if (r < 0) {
792 if (!ERRNO_IS_PRIVILEGE(r))
793 /* Propagate anything that isn't a permission problem. */
794 return r;
795
796 if (must_mount)
797 /* If it's not OK to use the plain directory fallback, propagate all
798 * errors too. */
799 return r;
800
801 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
802 * for compat with container envs, and just use the final dir as is. */
803
804 workspace_mounted = false;
805 } else {
806 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
807 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
808 if (r < 0)
809 return r;
810
811 workspace_mounted = true;
812 }
813 } else
814 workspace_mounted = true;
815 }
816
817 assert(!must_mount || workspace_mounted > 0);
818 where = workspace_mounted ? workspace : final;
819
820 (void) label_fix_full(AT_FDCWD, where, final, 0);
821
822 r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
823 if (r < 0)
824 return r;
825
826 if (workspace_mounted) {
827 bool install;
828
829 /* Determine if we should actually install the prepared mount in the final location by bind
830 * mounting it there. We do so only if the mount is not established there already, and if the
831 * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
832 * case we are doing all this in a mount namespace, thus no one else will see that we
833 * allocated a file system we are getting rid of again here. */
834 if (final_mounted)
835 install = false; /* already installed */
836 else {
837 r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
838 if (r < 0)
839 return r;
840
841 install = r == 0; /* install only if non-empty */
842 }
843
844 if (install) {
845 /* Make workspace read-only now, so that any bind mount we make from it defaults to
846 * read-only too */
847 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
848 if (r < 0)
849 return r;
850
851 /* And mount it to the final place, read-only */
852 r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
853 } else
854 /* Otherwise get rid of it */
855 r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
856 if (r < 0)
857 return r;
858 } else {
859 _cleanup_free_ char *parent = NULL;
860
861 /* If we do not have our own mount put used the plain directory fallback, then we need to
862 * open access to the top-level credential directory and the per-service directory now */
863
864 r = path_extract_directory(final, &parent);
865 if (r < 0)
866 return r;
867 if (chmod(parent, 0755) < 0)
868 return -errno;
869 }
870
871 return 0;
872 }
873
874 int setup_credentials(
875 const ExecContext *context,
876 const ExecParameters *params,
877 const char *unit,
878 uid_t uid,
879 gid_t gid) {
880
881 _cleanup_free_ char *p = NULL, *q = NULL;
882 int r;
883
884 assert(context);
885 assert(params);
886
887 if (!exec_context_has_credentials(context))
888 return 0;
889
890 if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
891 return -EINVAL;
892
893 /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
894 * and the subdir we mount over with a read-only file system readable by the service's user */
895 q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
896 if (!q)
897 return -ENOMEM;
898
899 r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
900 if (r < 0 && r != -EEXIST)
901 return r;
902
903 p = path_join(q, unit);
904 if (!p)
905 return -ENOMEM;
906
907 r = mkdir_label(p, 0700); /* per-unit dir: private to user */
908 if (r < 0 && r != -EEXIST)
909 return r;
910
911 r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
912 if (r < 0) {
913 _cleanup_free_ char *t = NULL, *u = NULL;
914
915 /* If this is not a privilege or support issue then propagate the error */
916 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
917 return r;
918
919 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
920 * it into place, so that users can't access half-initialized credential stores. */
921 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
922 if (!t)
923 return -ENOMEM;
924
925 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
926 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
927 * after it is fully set up */
928 u = path_join(t, unit);
929 if (!u)
930 return -ENOMEM;
931
932 FOREACH_STRING(i, t, u) {
933 r = mkdir_label(i, 0700);
934 if (r < 0 && r != -EEXIST)
935 return r;
936 }
937
938 r = setup_credentials_internal(
939 context,
940 params,
941 unit,
942 p, /* final mount point */
943 u, /* temporary workspace to overmount */
944 true, /* reuse the workspace if it is already a mount */
945 false, /* it's OK to fall back to a plain directory if we can't mount anything */
946 uid,
947 gid);
948
949 (void) rmdir(u); /* remove the workspace again if we can. */
950
951 if (r < 0)
952 return r;
953
954 } else if (r == 0) {
955
956 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
957 * we can use the same directory for all cases, after turning off propagation. Question
958 * though is: where do we turn off propagation exactly, and where do we place the workspace
959 * directory? We need some place that is guaranteed to be a mount point in the host, and
960 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
961 * since we ultimately want to move the resulting file system there, i.e. we need propagation
962 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
963 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
964 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
965 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
966 * propagation on the former, and then overmount the latter.
967 *
968 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
969 * for this purpose, but there are few other candidates that work equally well for us, and
970 * given that we do this in a privately namespaced short-lived single-threaded process that
971 * no one else sees this should be OK to do. */
972
973 /* Turn off propagation from our namespace to host */
974 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
975 if (r < 0)
976 goto child_fail;
977
978 r = setup_credentials_internal(
979 context,
980 params,
981 unit,
982 p, /* final mount point */
983 "/dev/shm", /* temporary workspace to overmount */
984 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
985 true, /* insist that something is mounted, do not allow fallback to plain directory */
986 uid,
987 gid);
988 if (r < 0)
989 goto child_fail;
990
991 _exit(EXIT_SUCCESS);
992
993 child_fail:
994 _exit(EXIT_FAILURE);
995 }
996
997 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
998 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
999 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1000 * seen by users when trying access this inode. */
1001 (void) rmdir(p);
1002 return 0;
1003 }