src/core/exec-credential.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <sys/mount.h>
   4
   5 #include "acl-util.h"
   6 #include "creds-util.h"
   7 #include "exec-credential.h"
   8 #include "execute.h"
   9 #include "fileio.h"
  10 #include "glob-util.h"
  11 #include "io-util.h"
  12 #include "iovec-util.h"
  13 #include "label-util.h"
  14 #include "mkdir-label.h"
  15 #include "mount-util.h"
  16 #include "mount.h"
  17 #include "mountpoint-util.h"
  18 #include "process-util.h"
  19 #include "random-util.h"
  20 #include "recurse-dir.h"
  21 #include "rm-rf.h"
  22 #include "tmpfile-util.h"
  23
  24 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
  25         if (!sc)
  26                 return NULL;
  27
  28         free(sc->id);
  29         free(sc->data);
  30         return mfree(sc);
  31 }
  32
  33 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
  34         if (!lc)
  35                 return NULL;
  36
  37         free(lc->id);
  38         free(lc->path);
  39         return mfree(lc);
  40 }
  41
  42 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
  43         exec_set_credential_hash_ops,
  44         char, string_hash_func, string_compare_func,
  45         ExecSetCredential, exec_set_credential_free);
  46
  47 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
  48         exec_load_credential_hash_ops,
  49         char, string_hash_func, string_compare_func,
  50         ExecLoadCredential, exec_load_credential_free);
  51
  52 bool exec_params_need_credentials(const ExecParameters *p) {
  53         assert(p);
  54
  55         return p->flags & (EXEC_SETUP_CREDENTIALS|EXEC_SETUP_CREDENTIALS_FRESH);
  56 }
  57
  58 bool exec_context_has_credentials(const ExecContext *c) {
  59         assert(c);
  60
  61         return !hashmap_isempty(c->set_credentials) ||
  62                 !hashmap_isempty(c->load_credentials) ||
  63                 !set_isempty(c->import_credentials);
  64 }
  65
  66 bool exec_context_has_encrypted_credentials(const ExecContext *c) {
  67         assert(c);
  68
  69         const ExecLoadCredential *load_cred;
  70         HASHMAP_FOREACH(load_cred, c->load_credentials)
  71                 if (load_cred->encrypted)
  72                         return true;
  73
  74         const ExecSetCredential *set_cred;
  75         HASHMAP_FOREACH(set_cred, c->set_credentials)
  76                 if (set_cred->encrypted)
  77                         return true;
  78
  79         return false;
  80 }
  81
  82 static int get_credential_directory(
  83                 const char *runtime_prefix,
  84                 const char *unit,
  85                 char **ret) {
  86
  87         char *p;
  88
  89         assert(ret);
  90
  91         if (!runtime_prefix || !unit) {
  92                 *ret = NULL;
  93                 return 0;
  94         }
  95
  96         p = path_join(runtime_prefix, "credentials", unit);
  97         if (!p)
  98                 return -ENOMEM;
  99
 100         *ret = p;
 101         return 1;
 102 }
 103
 104 int exec_context_get_credential_directory(
 105                 const ExecContext *context,
 106                 const ExecParameters *params,
 107                 const char *unit,
 108                 char **ret) {
 109
 110         assert(context);
 111         assert(params);
 112         assert(unit);
 113         assert(ret);
 114
 115         if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context)) {
 116                 *ret = NULL;
 117                 return 0;
 118         }
 119
 120         return get_credential_directory(params->prefix[EXEC_DIRECTORY_RUNTIME], unit, ret);
 121 }
 122
 123 int unit_add_default_credential_dependencies(Unit *u, const ExecContext *c) {
 124         _cleanup_free_ char *p = NULL, *m = NULL;
 125         int r;
 126
 127         assert(u);
 128         assert(c);
 129
 130         if (!exec_context_has_credentials(c))
 131                 return 0;
 132
 133         /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
 134          * shuts down. This only matters if mount namespacing is not used for the service, and hence the
 135          * credentials mount appears on the host. */
 136
 137         r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
 138         if (r <= 0)
 139                 return r;
 140
 141         r = unit_name_from_path(p, ".mount", &m);
 142         if (r < 0)
 143                 return r;
 144
 145         return unit_add_dependency_by_name(u, UNIT_AFTER, m, /* add_reference= */ true, UNIT_DEPENDENCY_FILE);
 146 }
 147
 148 int exec_context_destroy_credentials(Unit *u) {
 149         _cleanup_free_ char *p = NULL;
 150         int r;
 151
 152         assert(u);
 153
 154         r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
 155         if (r <= 0)
 156                 return r;
 157
 158         /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
 159          * unmount it, and afterwards remove the mount point */
 160         if (umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW) >= 0)
 161                 (void) mount_invalidate_state_by_path(u->manager, p);
 162
 163         (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
 164
 165         return 0;
 166 }
 167
 168 static int write_credential(
 169                 int dfd,
 170                 const char *id,
 171                 const void *data,
 172                 size_t size,
 173                 uid_t uid,
 174                 gid_t gid,
 175                 bool ownership_ok) {
 176
 177         _cleanup_(unlink_and_freep) char *tmp = NULL;
 178         _cleanup_close_ int fd = -EBADF;
 179         int r;
 180
 181         assert(dfd >= 0);
 182         assert(id);
 183         assert(data || size == 0);
 184
 185         r = tempfn_random_child("", "cred", &tmp);
 186         if (r < 0)
 187                 return r;
 188
 189         fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
 190         if (fd < 0) {
 191                 tmp = mfree(tmp);
 192                 return -errno;
 193         }
 194
 195         r = loop_write(fd, data, size);
 196         if (r < 0)
 197                 return r;
 198
 199         if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
 200                 return -errno;
 201
 202         if (uid_is_valid(uid) && uid != getuid()) {
 203                 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
 204                 if (r < 0) {
 205                         if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
 206                                 return r;
 207
 208                         if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
 209                                             * to express: that the user gets read access and nothing
 210                                             * else. But if the backing fs can't support that (e.g. ramfs)
 211                                             * then we can use file ownership instead. But that's only safe if
 212                                             * we can then re-mount the whole thing read-only, so that the
 213                                             * user can no longer chmod() the file to gain write access. */
 214                                 return r;
 215
 216                         if (fchown(fd, uid, gid) < 0)
 217                                 return -errno;
 218                 }
 219         }
 220
 221         if (renameat(dfd, tmp, dfd, id) < 0)
 222                 return -errno;
 223
 224         tmp = mfree(tmp);
 225         return 0;
 226 }
 227
 228 typedef enum CredentialSearchPath {
 229         CREDENTIAL_SEARCH_PATH_TRUSTED,
 230         CREDENTIAL_SEARCH_PATH_ENCRYPTED,
 231         CREDENTIAL_SEARCH_PATH_ALL,
 232         _CREDENTIAL_SEARCH_PATH_MAX,
 233         _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
 234 } CredentialSearchPath;
 235
 236 static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
 237         _cleanup_strv_free_ char **l = NULL;
 238
 239         assert(params);
 240         assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
 241
 242         /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
 243          * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
 244          * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
 245
 246         if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
 247                 if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
 248                         return NULL;
 249
 250                 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
 251                         return NULL;
 252         }
 253
 254         if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
 255                 if (strv_extend(&l, params->received_credentials_directory) < 0)
 256                         return NULL;
 257
 258                 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
 259                         return NULL;
 260         }
 261
 262         if (DEBUG_LOGGING) {
 263                 _cleanup_free_ char *t = strv_join(l, ":");
 264
 265                 log_debug("Credential search path is: %s", strempty(t));
 266         }
 267
 268         return TAKE_PTR(l);
 269 }
 270
 271 static int maybe_decrypt_and_write_credential(
 272                 int dir_fd,
 273                 const char *id,
 274                 bool encrypted,
 275                 uid_t uid,
 276                 gid_t gid,
 277                 bool ownership_ok,
 278                 const char *data,
 279                 size_t size,
 280                 uint64_t *left) {
 281
 282         _cleanup_(iovec_done_erase) struct iovec plaintext = {};
 283         size_t add;
 284         int r;
 285
 286         assert(dir_fd >= 0);
 287         assert(id);
 288         assert(left);
 289
 290         if (encrypted) {
 291                 r = decrypt_credential_and_warn(
 292                                 id,
 293                                 now(CLOCK_REALTIME),
 294                                 /* tpm2_device= */ NULL,
 295                                 /* tpm2_signature_path= */ NULL,
 296                                 getuid(),
 297                                 &IOVEC_MAKE(data, size),
 298                                 CREDENTIAL_ANY_SCOPE,
 299                                 &plaintext);
 300                 if (r < 0)
 301                         return r;
 302
 303                 data = plaintext.iov_base;
 304                 size = plaintext.iov_len;
 305         }
 306
 307         add = strlen(id) + size;
 308         if (add > *left)
 309                 return -E2BIG;
 310
 311         r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
 312         if (r < 0)
 313                 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
 314
 315         *left -= add;
 316         return 0;
 317 }
 318
 319 static int load_credential_glob(
 320                 const char *path,
 321                 bool encrypted,
 322                 char * const *search_path,
 323                 ReadFullFileFlags flags,
 324                 int write_dfd,
 325                 uid_t uid,
 326                 gid_t gid,
 327                 bool ownership_ok,
 328                 uint64_t *left) {
 329
 330         int r;
 331
 332         assert(path);
 333         assert(search_path);
 334         assert(write_dfd >= 0);
 335         assert(left);
 336
 337         STRV_FOREACH(d, search_path) {
 338                 _cleanup_globfree_ glob_t pglob = {};
 339                 _cleanup_free_ char *j = NULL;
 340
 341                 j = path_join(*d, path);
 342                 if (!j)
 343                         return -ENOMEM;
 344
 345                 r = safe_glob(j, 0, &pglob);
 346                 if (r == -ENOENT)
 347                         continue;
 348                 if (r < 0)
 349                         return r;
 350
 351                 FOREACH_ARRAY(p, pglob.gl_pathv, pglob.gl_pathc) {
 352                         _cleanup_free_ char *fn = NULL;
 353                         _cleanup_(erase_and_freep) char *data = NULL;
 354                         size_t size;
 355
 356                         /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
 357                         r = read_full_file_full(
 358                                         AT_FDCWD,
 359                                         *p,
 360                                         UINT64_MAX,
 361                                         encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
 362                                         flags,
 363                                         NULL,
 364                                         &data, &size);
 365                         if (r < 0)
 366                                 return log_debug_errno(r, "Failed to read credential '%s': %m", *p);
 367
 368                         r = path_extract_filename(*p, &fn);
 369                         if (r < 0)
 370                                 return log_debug_errno(r, "Failed to extract filename from '%s': %m", *p);
 371
 372                         r = maybe_decrypt_and_write_credential(
 373                                         write_dfd,
 374                                         fn,
 375                                         encrypted,
 376                                         uid,
 377                                         gid,
 378                                         ownership_ok,
 379                                         data, size,
 380                                         left);
 381                         if (r == -EEXIST)
 382                                 continue;
 383                         if (r < 0)
 384                                 return r;
 385                 }
 386         }
 387
 388         return 0;
 389 }
 390
 391 static int load_credential(
 392                 const ExecContext *context,
 393                 const ExecParameters *params,
 394                 const char *id,
 395                 const char *path,
 396                 bool encrypted,
 397                 const char *unit,
 398                 int read_dfd,
 399                 int write_dfd,
 400                 uid_t uid,
 401                 gid_t gid,
 402                 bool ownership_ok,
 403                 uint64_t *left) {
 404
 405         ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
 406         _cleanup_strv_free_ char **search_path = NULL;
 407         _cleanup_(erase_and_freep) char *data = NULL;
 408         _cleanup_free_ char *bindname = NULL;
 409         const char *source = NULL;
 410         bool missing_ok = true;
 411         size_t size, maxsz;
 412         int r;
 413
 414         assert(context);
 415         assert(params);
 416         assert(id);
 417         assert(path);
 418         assert(unit);
 419         assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
 420         assert(write_dfd >= 0);
 421         assert(left);
 422
 423         if (read_dfd >= 0) {
 424                 /* If a directory fd is specified, then read the file directly from that dir. In this case we
 425                  * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
 426                  * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
 427                  * open it. */
 428
 429                 if (!filename_is_valid(path)) /* safety check */
 430                         return -EINVAL;
 431
 432                 missing_ok = true;
 433                 source = path;
 434
 435         } else if (path_is_absolute(path)) {
 436                 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
 437                  * sockets */
 438
 439                 if (!path_is_valid(path)) /* safety check */
 440                         return -EINVAL;
 441
 442                 flags |= READ_FULL_FILE_CONNECT_SOCKET;
 443
 444                 /* Pass some minimal info about the unit and the credential name we are looking to acquire
 445                  * via the source socket address in case we read off an AF_UNIX socket. */
 446                 if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
 447                         return -ENOMEM;
 448
 449                 missing_ok = false;
 450                 source = path;
 451
 452         } else if (credential_name_valid(path)) {
 453                 /* If this is a relative path, take it as credential name relative to the credentials
 454                  * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
 455                  * are operating on a credential store, i.e. this is guaranteed to be regular files. */
 456
 457                 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
 458                 if (!search_path)
 459                         return -ENOMEM;
 460
 461                 missing_ok = true;
 462         } else
 463                 source = NULL;
 464
 465         if (encrypted)
 466                 flags |= READ_FULL_FILE_UNBASE64;
 467
 468         maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
 469
 470         if (search_path) {
 471                 STRV_FOREACH(d, search_path) {
 472                         _cleanup_free_ char *j = NULL;
 473
 474                         j = path_join(*d, path);
 475                         if (!j)
 476                                 return -ENOMEM;
 477
 478                         r = read_full_file_full(
 479                                         AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
 480                                         UINT64_MAX,
 481                                         maxsz,
 482                                         flags,
 483                                         NULL,
 484                                         &data, &size);
 485                         if (r != -ENOENT)
 486                                 break;
 487                 }
 488         } else if (source)
 489                 r = read_full_file_full(
 490                                 read_dfd, source,
 491                                 UINT64_MAX,
 492                                 maxsz,
 493                                 flags,
 494                                 bindname,
 495                                 &data, &size);
 496         else
 497                 r = -ENOENT;
 498
 499         if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
 500                 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
 501                  * will get clear errors if we don't pass such a missing credential on as they
 502                  * themselves will get ENOENT when trying to read them, which should not be much
 503                  * worse than when we handle the error here and make it fatal.
 504                  *
 505                  * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
 506                  * we are fine, too. */
 507                 log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
 508                 return 0;
 509         }
 510         if (r < 0)
 511                 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
 512
 513         return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
 514 }
 515
 516 struct load_cred_args {
 517         const ExecContext *context;
 518         const ExecParameters *params;
 519         bool encrypted;
 520         const char *unit;
 521         int dfd;
 522         uid_t uid;
 523         gid_t gid;
 524         bool ownership_ok;
 525         uint64_t *left;
 526 };
 527
 528 static int load_cred_recurse_dir_cb(
 529                 RecurseDirEvent event,
 530                 const char *path,
 531                 int dir_fd,
 532                 int inode_fd,
 533                 const struct dirent *de,
 534                 const struct statx *sx,
 535                 void *userdata) {
 536
 537         struct load_cred_args *args = ASSERT_PTR(userdata);
 538         _cleanup_free_ char *sub_id = NULL;
 539         int r;
 540
 541         assert(path);
 542         assert(de);
 543
 544         if (event != RECURSE_DIR_ENTRY)
 545                 return RECURSE_DIR_CONTINUE;
 546
 547         if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
 548                 return RECURSE_DIR_CONTINUE;
 549
 550         sub_id = strreplace(path, "/", "_");
 551         if (!sub_id)
 552                 return -ENOMEM;
 553
 554         if (!credential_name_valid(sub_id))
 555                 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
 556
 557         if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
 558                 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
 559                 return RECURSE_DIR_CONTINUE;
 560         }
 561         if (errno != ENOENT)
 562                 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
 563
 564         r = load_credential(
 565                         args->context,
 566                         args->params,
 567                         sub_id,
 568                         de->d_name,
 569                         args->encrypted,
 570                         args->unit,
 571                         dir_fd,
 572                         args->dfd,
 573                         args->uid,
 574                         args->gid,
 575                         args->ownership_ok,
 576                         args->left);
 577         if (r < 0)
 578                 return r;
 579
 580         return RECURSE_DIR_CONTINUE;
 581 }
 582
 583 static int acquire_credentials(
 584                 const ExecContext *context,
 585                 const ExecParameters *params,
 586                 const char *unit,
 587                 const char *p,
 588                 uid_t uid,
 589                 gid_t gid,
 590                 bool ownership_ok) {
 591
 592         uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
 593         _cleanup_close_ int dfd = -EBADF;
 594         const char *ic;
 595         ExecLoadCredential *lc;
 596         ExecSetCredential *sc;
 597         int r;
 598
 599         assert(context);
 600         assert(params);
 601         assert(unit);
 602         assert(p);
 603
 604         dfd = open(p, O_DIRECTORY|O_CLOEXEC);
 605         if (dfd < 0)
 606                 return -errno;
 607
 608         r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
 609         if (r < 0)
 610                 return r;
 611
 612         /* First, load credentials off disk (or acquire via AF_UNIX socket) */
 613         HASHMAP_FOREACH(lc, context->load_credentials) {
 614                 _cleanup_close_ int sub_fd = -EBADF;
 615
 616                 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
 617                  * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
 618                  * a regular file. Finally, if it's a relative path we will use it as a credential name to
 619                  * propagate a credential passed to us from further up. */
 620
 621                 if (path_is_absolute(lc->path)) {
 622                         sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
 623                         if (sub_fd < 0 && !IN_SET(errno,
 624                                                   ENOTDIR,  /* Not a directory */
 625                                                   ENOENT))  /* Doesn't exist? */
 626                                 return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
 627                 }
 628
 629                 if (sub_fd < 0)
 630                         /* Regular file (incl. a credential passed in from higher up) */
 631                         r = load_credential(
 632                                         context,
 633                                         params,
 634                                         lc->id,
 635                                         lc->path,
 636                                         lc->encrypted,
 637                                         unit,
 638                                         AT_FDCWD,
 639                                         dfd,
 640                                         uid,
 641                                         gid,
 642                                         ownership_ok,
 643                                         &left);
 644                 else
 645                         /* Directory */
 646                         r = recurse_dir(sub_fd,
 647                                         /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
 648                                         /* statx_mask= */ 0,
 649                                         /* n_depth_max= */ UINT_MAX,
 650                                         RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
 651                                         load_cred_recurse_dir_cb,
 652                                         &(struct load_cred_args) {
 653                                                 .context = context,
 654                                                 .params = params,
 655                                                 .encrypted = lc->encrypted,
 656                                                 .unit = unit,
 657                                                 .dfd = dfd,
 658                                                 .uid = uid,
 659                                                 .gid = gid,
 660                                                 .ownership_ok = ownership_ok,
 661                                                 .left = &left,
 662                                         });
 663                 if (r < 0)
 664                         return r;
 665         }
 666
 667         /* Next, look for system credentials and credentials in the credentials store. Note that these do not
 668          * override any credentials found earlier. */
 669         SET_FOREACH(ic, context->import_credentials) {
 670                 _cleanup_free_ char **search_path = NULL;
 671
 672                 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
 673                 if (!search_path)
 674                         return -ENOMEM;
 675
 676                 r = load_credential_glob(
 677                                 ic,
 678                                 /* encrypted = */ false,
 679                                 search_path,
 680                                 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
 681                                 dfd,
 682                                 uid,
 683                                 gid,
 684                                 ownership_ok,
 685                                 &left);
 686                 if (r < 0)
 687                         return r;
 688
 689                 search_path = strv_free(search_path);
 690                 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
 691                 if (!search_path)
 692                         return -ENOMEM;
 693
 694                 r = load_credential_glob(
 695                                 ic,
 696                                 /* encrypted = */ true,
 697                                 search_path,
 698                                 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
 699                                 dfd,
 700                                 uid,
 701                                 gid,
 702                                 ownership_ok,
 703                                 &left);
 704                 if (r < 0)
 705                         return r;
 706         }
 707
 708         /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
 709          * add them, so that they can act as a "default" if the same credential is specified multiple times. */
 710         HASHMAP_FOREACH(sc, context->set_credentials) {
 711                 _cleanup_(iovec_done_erase) struct iovec plaintext = {};
 712                 const char *data;
 713                 size_t size, add;
 714
 715                 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
 716                  * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
 717                  * slow and involved, hence it's nice to be able to skip that if the credential already
 718                  * exists anyway. */
 719                 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
 720                         continue;
 721                 if (errno != ENOENT)
 722                         return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
 723
 724                 if (sc->encrypted) {
 725                         r = decrypt_credential_and_warn(
 726                                         sc->id,
 727                                         now(CLOCK_REALTIME),
 728                                         /* tpm2_device= */ NULL,
 729                                         /* tpm2_signature_path= */ NULL,
 730                                         getuid(),
 731                                         &IOVEC_MAKE(sc->data, sc->size),
 732                                         CREDENTIAL_ANY_SCOPE,
 733                                         &plaintext);
 734                         if (r < 0)
 735                                 return r;
 736
 737                         data = plaintext.iov_base;
 738                         size = plaintext.iov_len;
 739                 } else {
 740                         data = sc->data;
 741                         size = sc->size;
 742                 }
 743
 744                 add = strlen(sc->id) + size;
 745                 if (add > left)
 746                         return -E2BIG;
 747
 748                 r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
 749                 if (r < 0)
 750                         return r;
 751
 752                 left -= add;
 753         }
 754
 755         r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
 756         if (r < 0)
 757                 return r;
 758
 759         /* After we created all keys with the right perms, also make sure the credential store as a whole is
 760          * accessible */
 761
 762         if (uid_is_valid(uid) && uid != getuid()) {
 763                 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
 764                 if (r < 0) {
 765                         if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
 766                                 return r;
 767
 768                         if (!ownership_ok)
 769                                 return r;
 770
 771                         if (fchown(dfd, uid, gid) < 0)
 772                                 return -errno;
 773                 }
 774         }
 775
 776         return 0;
 777 }
 778
 779 static int setup_credentials_internal(
 780                 const ExecContext *context,
 781                 const ExecParameters *params,
 782                 const char *unit,
 783                 const char *final,        /* This is where the credential store shall eventually end up at */
 784                 const char *workspace,    /* This is where we can prepare it before moving it to the final place */
 785                 bool reuse_workspace,     /* Whether to reuse any existing workspace mount if it already is a mount */
 786                 bool must_mount,          /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
 787                 uid_t uid,
 788                 gid_t gid) {
 789
 790         bool final_mounted;
 791         int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
 792                                    * if we mounted something; false if we definitely can't mount anything */
 793
 794         assert(context);
 795         assert(params);
 796         assert(unit);
 797         assert(final);
 798         assert(workspace);
 799
 800         r = path_is_mount_point(final);
 801         if (r < 0)
 802                 return r;
 803         final_mounted = r > 0;
 804
 805         if (final_mounted) {
 806                 if (FLAGS_SET(params->flags, EXEC_SETUP_CREDENTIALS_FRESH)) {
 807                         r = umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
 808                         if (r < 0)
 809                                 return r;
 810
 811                         final_mounted = false;
 812                 } else {
 813                         /* We can reuse the previous credential dir */
 814                         r = dir_is_empty(final, /* ignore_hidden_or_backup = */ false);
 815                         if (r < 0)
 816                                 return r;
 817                         if (r == 0) {
 818                                 log_debug("Credential dir for unit '%s' already set up, skipping.", unit);
 819                                 return 0;
 820                         }
 821                 }
 822         }
 823
 824         if (reuse_workspace) {
 825                 r = path_is_mount_point(workspace);
 826                 if (r < 0)
 827                         return r;
 828                 if (r > 0)
 829                         workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
 830                                                    * it, let's keep this in mind */
 831                 else
 832                         workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
 833         } else
 834                 workspace_mounted = -1; /* ditto */
 835
 836         /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
 837          * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
 838          * If the workspace is not mounted, we just bind the final place over and make it writable. */
 839         must_mount = must_mount || final_mounted;
 840
 841         if (workspace_mounted < 0) {
 842                 if (!final_mounted)
 843                         /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
 844                          * not using the final place. */
 845                         r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
 846                 if (final_mounted || r < 0) {
 847                         /* If using final place or failed to mount new tmpfs, make a bind mount from
 848                          * the final to the workspace, so that we can make it writable there. */
 849                         r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
 850                         if (r < 0) {
 851                                 if (!ERRNO_IS_PRIVILEGE(r))
 852                                         /* Propagate anything that isn't a permission problem. */
 853                                         return r;
 854
 855                                 if (must_mount)
 856                                         /* If it's not OK to use the plain directory fallback, propagate all
 857                                          * errors too. */
 858                                         return r;
 859
 860                                 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
 861                                  * for compat with container envs, and just use the final dir as is.
 862                                  * Final place must not be mounted in this case (refused by must_mount
 863                                  * above) */
 864
 865                                 workspace_mounted = false;
 866                         } else {
 867                                 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
 868                                 r = mount_nofollow_verbose(LOG_DEBUG,
 869                                                            NULL,
 870                                                            workspace,
 871                                                            NULL,
 872                                                            MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false),
 873                                                            NULL);
 874                                 if (r < 0)
 875                                         return r;
 876
 877                                 workspace_mounted = true;
 878                         }
 879                 } else
 880                         workspace_mounted = true;
 881         }
 882
 883         assert(workspace_mounted >= 0);
 884         assert(!must_mount || workspace_mounted);
 885
 886         const char *where = workspace_mounted ? workspace : final;
 887
 888         (void) label_fix_full(AT_FDCWD, where, final, 0);
 889
 890         r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
 891         if (r < 0) {
 892                 /* If we're using final place as workspace, and failed to acquire credentials, we might
 893                  * have left half-written creds there. Let's get rid of the whole mount, so future
 894                  * calls won't reuse it. */
 895                 if (final_mounted)
 896                         (void) umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
 897
 898                 return r;
 899         }
 900
 901         if (workspace_mounted) {
 902                 if (!final_mounted) {
 903                         /* Make workspace read-only now, so that any bind mount we make from it defaults to
 904                          * read-only too */
 905                         r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
 906                         if (r < 0)
 907                                 return r;
 908
 909                         /* And mount it to the final place, read-only */
 910                         r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
 911                 } else
 912                         /* Otherwise we just get rid of the bind mount of final place */
 913                         r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
 914                 if (r < 0)
 915                         return r;
 916         } else {
 917                 _cleanup_free_ char *parent = NULL;
 918
 919                 /* If we do not have our own mount put used the plain directory fallback, then we need to
 920                  * open access to the top-level credential directory and the per-service directory now */
 921
 922                 r = path_extract_directory(final, &parent);
 923                 if (r < 0)
 924                         return r;
 925                 if (chmod(parent, 0755) < 0)
 926                         return -errno;
 927         }
 928
 929         return 0;
 930 }
 931
 932 int exec_setup_credentials(
 933                 const ExecContext *context,
 934                 const ExecParameters *params,
 935                 const char *unit,
 936                 uid_t uid,
 937                 gid_t gid) {
 938
 939         _cleanup_free_ char *p = NULL, *q = NULL;
 940         int r;
 941
 942         assert(context);
 943         assert(params);
 944         assert(unit);
 945
 946         if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context))
 947                 return 0;
 948
 949         if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
 950                 return -EINVAL;
 951
 952         /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
 953          * and the subdir we mount over with a read-only file system readable by the service's user */
 954         q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
 955         if (!q)
 956                 return -ENOMEM;
 957
 958         r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
 959         if (r < 0 && r != -EEXIST)
 960                 return r;
 961
 962         p = path_join(q, unit);
 963         if (!p)
 964                 return -ENOMEM;
 965
 966         r = mkdir_label(p, 0700); /* per-unit dir: private to user */
 967         if (r < 0 && r != -EEXIST)
 968                 return r;
 969
 970         r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
 971         if (r < 0) {
 972                 _cleanup_(rmdir_and_freep) char *u = NULL; /* remove the temporary workspace if we can */
 973                 _cleanup_free_ char *t = NULL;
 974
 975                 /* If this is not a privilege or support issue then propagate the error */
 976                 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
 977                         return r;
 978
 979                 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
 980                  * it into place, so that users can't access half-initialized credential stores. */
 981                 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
 982                 if (!t)
 983                         return -ENOMEM;
 984
 985                 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
 986                  * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
 987                  * after it is fully set up */
 988                 u = path_join(t, unit);
 989                 if (!u)
 990                         return -ENOMEM;
 991
 992                 FOREACH_STRING(i, t, u) {
 993                         r = mkdir_label(i, 0700);
 994                         if (r < 0 && r != -EEXIST)
 995                                 return r;
 996                 }
 997
 998                 r = setup_credentials_internal(
 999                                 context,
1000                                 params,
1001                                 unit,
1002                                 p,       /* final mount point */
1003                                 u,       /* temporary workspace to overmount */
1004                                 true,    /* reuse the workspace if it is already a mount */
1005                                 false,   /* it's OK to fall back to a plain directory if we can't mount anything */
1006                                 uid,
1007                                 gid);
1008                 if (r < 0)
1009                         return r;
1010
1011         } else if (r == 0) {
1012
1013                 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
1014                  * we can use the same directory for all cases, after turning off propagation. Question
1015                  * though is: where do we turn off propagation exactly, and where do we place the workspace
1016                  * directory? We need some place that is guaranteed to be a mount point in the host, and
1017                  * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
1018                  * since we ultimately want to move the resulting file system there, i.e. we need propagation
1019                  * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
1020                  * would be visible in the host mount table all the time, which we want to avoid. Hence, what
1021                  * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
1022                  * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
1023                  * propagation on the former, and then overmount the latter.
1024                  *
1025                  * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
1026                  * for this purpose, but there are few other candidates that work equally well for us, and
1027                  * given that we do this in a privately namespaced short-lived single-threaded process that
1028                  * no one else sees this should be OK to do. */
1029
1030                 /* Turn off propagation from our namespace to host */
1031                 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
1032                 if (r < 0)
1033                         goto child_fail;
1034
1035                 r = setup_credentials_internal(
1036                                 context,
1037                                 params,
1038                                 unit,
1039                                 p,           /* final mount point */
1040                                 "/dev/shm",  /* temporary workspace to overmount */
1041                                 false,       /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1042                                 true,        /* insist that something is mounted, do not allow fallback to plain directory */
1043                                 uid,
1044                                 gid);
1045                 if (r < 0)
1046                         goto child_fail;
1047
1048                 _exit(EXIT_SUCCESS);
1049
1050         child_fail:
1051                 _exit(EXIT_FAILURE);
1052         }
1053
1054         /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1055          * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1056          * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1057          * seen by users when trying access this inode. */
1058         (void) rmdir(p);
1059         return 0;
1060 }