src/core/exec-credential.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <sys/mount.h>
   4
   5 #include "acl-util.h"
   6 #include "creds-util.h"
   7 #include "exec-credential.h"
   8 #include "execute.h"
   9 #include "fileio.h"
  10 #include "glob-util.h"
  11 #include "io-util.h"
  12 #include "label-util.h"
  13 #include "mkdir-label.h"
  14 #include "mount-util.h"
  15 #include "mount.h"
  16 #include "mountpoint-util.h"
  17 #include "process-util.h"
  18 #include "random-util.h"
  19 #include "recurse-dir.h"
  20 #include "rm-rf.h"
  21 #include "tmpfile-util.h"
  22
  23 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
  24         if (!sc)
  25                 return NULL;
  26
  27         free(sc->id);
  28         free(sc->data);
  29         return mfree(sc);
  30 }
  31
  32 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
  33         if (!lc)
  34                 return NULL;
  35
  36         free(lc->id);
  37         free(lc->path);
  38         return mfree(lc);
  39 }
  40
  41 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
  42         exec_set_credential_hash_ops,
  43         char, string_hash_func, string_compare_func,
  44         ExecSetCredential, exec_set_credential_free);
  45
  46 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
  47         exec_load_credential_hash_ops,
  48         char, string_hash_func, string_compare_func,
  49         ExecLoadCredential, exec_load_credential_free);
  50
  51 bool exec_context_has_credentials(const ExecContext *c) {
  52         assert(c);
  53
  54         return !hashmap_isempty(c->set_credentials) ||
  55                 !hashmap_isempty(c->load_credentials) ||
  56                 !set_isempty(c->import_credentials);
  57 }
  58
  59 bool exec_context_has_encrypted_credentials(ExecContext *c) {
  60         ExecLoadCredential *load_cred;
  61         ExecSetCredential *set_cred;
  62
  63         assert(c);
  64
  65         HASHMAP_FOREACH(load_cred, c->load_credentials)
  66                 if (load_cred->encrypted)
  67                         return true;
  68
  69         HASHMAP_FOREACH(set_cred, c->set_credentials)
  70                 if (set_cred->encrypted)
  71                         return true;
  72
  73         return false;
  74 }
  75
  76 static int get_credential_directory(
  77                 const char *runtime_prefix,
  78                 const char *unit,
  79                 char **ret) {
  80
  81         char *p;
  82
  83         assert(ret);
  84
  85         if (!runtime_prefix || !unit) {
  86                 *ret = NULL;
  87                 return 0;
  88         }
  89
  90         p = path_join(runtime_prefix, "credentials", unit);
  91         if (!p)
  92                 return -ENOMEM;
  93
  94         *ret = p;
  95         return 1;
  96 }
  97
  98 int exec_context_get_credential_directory(
  99                 const ExecContext *context,
 100                 const ExecParameters *params,
 101                 const char *unit,
 102                 char **ret) {
 103
 104         assert(context);
 105         assert(params);
 106         assert(unit);
 107         assert(ret);
 108
 109         if (!exec_context_has_credentials(context)) {
 110                 *ret = NULL;
 111                 return 0;
 112         }
 113
 114         return get_credential_directory(params->prefix[EXEC_DIRECTORY_RUNTIME], unit, ret);
 115 }
 116
 117 int unit_add_default_credential_dependencies(Unit *u, const ExecContext *c) {
 118         _cleanup_free_ char *p = NULL, *m = NULL;
 119         int r;
 120
 121         assert(u);
 122         assert(c);
 123
 124         if (!exec_context_has_credentials(c))
 125                 return 0;
 126
 127         /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
 128          * shuts down. This only matters if mount namespacing is not used for the service, and hence the
 129          * credentials mount appears on the host. */
 130
 131         r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
 132         if (r <= 0)
 133                 return r;
 134
 135         r = unit_name_from_path(p, ".mount", &m);
 136         if (r < 0)
 137                 return r;
 138
 139         return unit_add_dependency_by_name(u, UNIT_AFTER, m, /* add_reference= */ true, UNIT_DEPENDENCY_FILE);
 140 }
 141
 142 int exec_context_destroy_credentials(Unit *u) {
 143         _cleanup_free_ char *p = NULL;
 144         int r;
 145
 146         assert(u);
 147
 148         r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &p);
 149         if (r <= 0)
 150                 return r;
 151
 152         /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
 153          * unmount it, and afterwards remove the mount point */
 154         if (umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW) >= 0)
 155                 (void) mount_invalidate_state_by_path(u->manager, p);
 156
 157         (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
 158
 159         return 0;
 160 }
 161
 162 static int write_credential(
 163                 int dfd,
 164                 const char *id,
 165                 const void *data,
 166                 size_t size,
 167                 uid_t uid,
 168                 gid_t gid,
 169                 bool ownership_ok) {
 170
 171         _cleanup_(unlink_and_freep) char *tmp = NULL;
 172         _cleanup_close_ int fd = -EBADF;
 173         int r;
 174
 175         r = tempfn_random_child("", "cred", &tmp);
 176         if (r < 0)
 177                 return r;
 178
 179         fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
 180         if (fd < 0) {
 181                 tmp = mfree(tmp);
 182                 return -errno;
 183         }
 184
 185         r = loop_write(fd, data, size);
 186         if (r < 0)
 187                 return r;
 188
 189         if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
 190                 return -errno;
 191
 192         if (uid_is_valid(uid) && uid != getuid()) {
 193                 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
 194                 if (r < 0) {
 195                         if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
 196                                 return r;
 197
 198                         if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
 199                                             * to express: that the user gets read access and nothing
 200                                             * else. But if the backing fs can't support that (e.g. ramfs)
 201                                             * then we can use file ownership instead. But that's only safe if
 202                                             * we can then re-mount the whole thing read-only, so that the
 203                                             * user can no longer chmod() the file to gain write access. */
 204                                 return r;
 205
 206                         if (fchown(fd, uid, gid) < 0)
 207                                 return -errno;
 208                 }
 209         }
 210
 211         if (renameat(dfd, tmp, dfd, id) < 0)
 212                 return -errno;
 213
 214         tmp = mfree(tmp);
 215         return 0;
 216 }
 217
 218 typedef enum CredentialSearchPath {
 219         CREDENTIAL_SEARCH_PATH_TRUSTED,
 220         CREDENTIAL_SEARCH_PATH_ENCRYPTED,
 221         CREDENTIAL_SEARCH_PATH_ALL,
 222         _CREDENTIAL_SEARCH_PATH_MAX,
 223         _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
 224 } CredentialSearchPath;
 225
 226 static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
 227
 228         _cleanup_strv_free_ char **l = NULL;
 229
 230         assert(params);
 231         assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
 232
 233         /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
 234          * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
 235          * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
 236
 237         if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
 238                 if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
 239                         return NULL;
 240
 241                 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
 242                         return NULL;
 243         }
 244
 245         if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
 246                 if (params->received_credentials_directory)
 247                         if (strv_extend(&l, params->received_credentials_directory) < 0)
 248                                 return NULL;
 249
 250                 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
 251                         return NULL;
 252         }
 253
 254         if (DEBUG_LOGGING) {
 255                 _cleanup_free_ char *t = strv_join(l, ":");
 256
 257                 log_debug("Credential search path is: %s", strempty(t));
 258         }
 259
 260         return TAKE_PTR(l);
 261 }
 262
 263 static int maybe_decrypt_and_write_credential(
 264                 int dir_fd,
 265                 const char *id,
 266                 bool encrypted,
 267                 uid_t uid,
 268                 gid_t gid,
 269                 bool ownership_ok,
 270                 const char *data,
 271                 size_t size,
 272                 uint64_t *left) {
 273
 274         _cleanup_free_ void *plaintext = NULL;
 275         size_t add;
 276         int r;
 277
 278         if (encrypted) {
 279                 size_t plaintext_size = 0;
 280
 281                 r = decrypt_credential_and_warn(id, now(CLOCK_REALTIME), NULL, NULL, data, size,
 282                                                 &plaintext, &plaintext_size);
 283                 if (r < 0)
 284                         return r;
 285
 286                 data = plaintext;
 287                 size = plaintext_size;
 288         }
 289
 290         add = strlen(id) + size;
 291         if (add > *left)
 292                 return -E2BIG;
 293
 294         r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
 295         if (r < 0)
 296                 return log_debug_errno(r, "Failed to write credential '%s': %m", id);
 297
 298         *left -= add;
 299         return 0;
 300 }
 301
 302 static int load_credential_glob(
 303                 const char *path,
 304                 bool encrypted,
 305                 char **search_path,
 306                 ReadFullFileFlags flags,
 307                 int write_dfd,
 308                 uid_t uid,
 309                 gid_t gid,
 310                 bool ownership_ok,
 311                 uint64_t *left) {
 312
 313         int r;
 314
 315         STRV_FOREACH(d, search_path) {
 316                 _cleanup_globfree_ glob_t pglob = {};
 317                 _cleanup_free_ char *j = NULL;
 318
 319                 j = path_join(*d, path);
 320                 if (!j)
 321                         return -ENOMEM;
 322
 323                 r = safe_glob(j, 0, &pglob);
 324                 if (r == -ENOENT)
 325                         continue;
 326                 if (r < 0)
 327                         return r;
 328
 329                 for (size_t n = 0; n < pglob.gl_pathc; n++) {
 330                         _cleanup_free_ char *fn = NULL;
 331                         _cleanup_(erase_and_freep) char *data = NULL;
 332                         size_t size;
 333
 334                         /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
 335                         r = read_full_file_full(
 336                                 AT_FDCWD,
 337                                 pglob.gl_pathv[n],
 338                                 UINT64_MAX,
 339                                 encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
 340                                 flags,
 341                                 NULL,
 342                                 &data, &size);
 343                         if (r < 0)
 344                                 return log_debug_errno(r, "Failed to read credential '%s': %m",
 345                                                         pglob.gl_pathv[n]);
 346
 347                         r = path_extract_filename(pglob.gl_pathv[n], &fn);
 348                         if (r < 0)
 349                                 return log_debug_errno(r, "Failed to extract filename from '%s': %m",
 350                                                         pglob.gl_pathv[n]);
 351
 352                         r = maybe_decrypt_and_write_credential(
 353                                 write_dfd,
 354                                 fn,
 355                                 encrypted,
 356                                 uid,
 357                                 gid,
 358                                 ownership_ok,
 359                                 data, size,
 360                                 left);
 361                         if (r == -EEXIST)
 362                                 continue;
 363                         if (r < 0)
 364                                 return r;
 365                 }
 366         }
 367
 368         return 0;
 369 }
 370
 371 static int load_credential(
 372                 const ExecContext *context,
 373                 const ExecParameters *params,
 374                 const char *id,
 375                 const char *path,
 376                 bool encrypted,
 377                 const char *unit,
 378                 int read_dfd,
 379                 int write_dfd,
 380                 uid_t uid,
 381                 gid_t gid,
 382                 bool ownership_ok,
 383                 uint64_t *left) {
 384
 385         ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
 386         _cleanup_strv_free_ char **search_path = NULL;
 387         _cleanup_(erase_and_freep) char *data = NULL;
 388         _cleanup_free_ char *bindname = NULL;
 389         const char *source = NULL;
 390         bool missing_ok = true;
 391         size_t size, maxsz;
 392         int r;
 393
 394         assert(context);
 395         assert(params);
 396         assert(id);
 397         assert(path);
 398         assert(unit);
 399         assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
 400         assert(write_dfd >= 0);
 401         assert(left);
 402
 403         if (read_dfd >= 0) {
 404                 /* If a directory fd is specified, then read the file directly from that dir. In this case we
 405                  * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
 406                  * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
 407                  * open it. */
 408
 409                 if (!filename_is_valid(path)) /* safety check */
 410                         return -EINVAL;
 411
 412                 missing_ok = true;
 413                 source = path;
 414
 415         } else if (path_is_absolute(path)) {
 416                 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
 417                  * sockets */
 418
 419                 if (!path_is_valid(path)) /* safety check */
 420                         return -EINVAL;
 421
 422                 flags |= READ_FULL_FILE_CONNECT_SOCKET;
 423
 424                 /* Pass some minimal info about the unit and the credential name we are looking to acquire
 425                  * via the source socket address in case we read off an AF_UNIX socket. */
 426                 if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
 427                         return -ENOMEM;
 428
 429                 missing_ok = false;
 430                 source = path;
 431
 432         } else if (credential_name_valid(path)) {
 433                 /* If this is a relative path, take it as credential name relative to the credentials
 434                  * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
 435                  * are operating on a credential store, i.e. this is guaranteed to be regular files. */
 436
 437                 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
 438                 if (!search_path)
 439                         return -ENOMEM;
 440
 441                 missing_ok = true;
 442         } else
 443                 source = NULL;
 444
 445         if (encrypted)
 446                 flags |= READ_FULL_FILE_UNBASE64;
 447
 448         maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
 449
 450         if (search_path) {
 451                 STRV_FOREACH(d, search_path) {
 452                         _cleanup_free_ char *j = NULL;
 453
 454                         j = path_join(*d, path);
 455                         if (!j)
 456                                 return -ENOMEM;
 457
 458                         r = read_full_file_full(
 459                                         AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
 460                                         UINT64_MAX,
 461                                         maxsz,
 462                                         flags,
 463                                         NULL,
 464                                         &data, &size);
 465                         if (r != -ENOENT)
 466                                 break;
 467                 }
 468         } else if (source)
 469                 r = read_full_file_full(
 470                                 read_dfd, source,
 471                                 UINT64_MAX,
 472                                 maxsz,
 473                                 flags,
 474                                 bindname,
 475                                 &data, &size);
 476         else
 477                 r = -ENOENT;
 478
 479         if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
 480                 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
 481                  * will get clear errors if we don't pass such a missing credential on as they
 482                  * themselves will get ENOENT when trying to read them, which should not be much
 483                  * worse than when we handle the error here and make it fatal.
 484                  *
 485                  * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
 486                  * we are fine, too. */
 487                 log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
 488                 return 0;
 489         }
 490         if (r < 0)
 491                 return log_debug_errno(r, "Failed to read credential '%s': %m", path);
 492
 493         return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
 494 }
 495
 496 struct load_cred_args {
 497         const ExecContext *context;
 498         const ExecParameters *params;
 499         bool encrypted;
 500         const char *unit;
 501         int dfd;
 502         uid_t uid;
 503         gid_t gid;
 504         bool ownership_ok;
 505         uint64_t *left;
 506 };
 507
 508 static int load_cred_recurse_dir_cb(
 509                 RecurseDirEvent event,
 510                 const char *path,
 511                 int dir_fd,
 512                 int inode_fd,
 513                 const struct dirent *de,
 514                 const struct statx *sx,
 515                 void *userdata) {
 516
 517         struct load_cred_args *args = ASSERT_PTR(userdata);
 518         _cleanup_free_ char *sub_id = NULL;
 519         int r;
 520
 521         if (event != RECURSE_DIR_ENTRY)
 522                 return RECURSE_DIR_CONTINUE;
 523
 524         if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
 525                 return RECURSE_DIR_CONTINUE;
 526
 527         sub_id = strreplace(path, "/", "_");
 528         if (!sub_id)
 529                 return -ENOMEM;
 530
 531         if (!credential_name_valid(sub_id))
 532                 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
 533
 534         if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
 535                 log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
 536                 return RECURSE_DIR_CONTINUE;
 537         }
 538         if (errno != ENOENT)
 539                 return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
 540
 541         r = load_credential(
 542                         args->context,
 543                         args->params,
 544                         sub_id,
 545                         de->d_name,
 546                         args->encrypted,
 547                         args->unit,
 548                         dir_fd,
 549                         args->dfd,
 550                         args->uid,
 551                         args->gid,
 552                         args->ownership_ok,
 553                         args->left);
 554         if (r < 0)
 555                 return r;
 556
 557         return RECURSE_DIR_CONTINUE;
 558 }
 559
 560 static int acquire_credentials(
 561                 const ExecContext *context,
 562                 const ExecParameters *params,
 563                 const char *unit,
 564                 const char *p,
 565                 uid_t uid,
 566                 gid_t gid,
 567                 bool ownership_ok) {
 568
 569         uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
 570         _cleanup_close_ int dfd = -EBADF;
 571         const char *ic;
 572         ExecLoadCredential *lc;
 573         ExecSetCredential *sc;
 574         int r;
 575
 576         assert(context);
 577         assert(p);
 578
 579         dfd = open(p, O_DIRECTORY|O_CLOEXEC);
 580         if (dfd < 0)
 581                 return -errno;
 582
 583         r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
 584         if (r < 0)
 585                 return r;
 586
 587         /* First, load credentials off disk (or acquire via AF_UNIX socket) */
 588         HASHMAP_FOREACH(lc, context->load_credentials) {
 589                 _cleanup_close_ int sub_fd = -EBADF;
 590
 591                 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
 592                  * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
 593                  * a regular file. Finally, if it's a relative path we will use it as a credential name to
 594                  * propagate a credential passed to us from further up. */
 595
 596                 if (path_is_absolute(lc->path)) {
 597                         sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
 598                         if (sub_fd < 0 && !IN_SET(errno,
 599                                                   ENOTDIR,  /* Not a directory */
 600                                                   ENOENT))  /* Doesn't exist? */
 601                                 return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
 602                 }
 603
 604                 if (sub_fd < 0)
 605                         /* Regular file (incl. a credential passed in from higher up) */
 606                         r = load_credential(
 607                                         context,
 608                                         params,
 609                                         lc->id,
 610                                         lc->path,
 611                                         lc->encrypted,
 612                                         unit,
 613                                         AT_FDCWD,
 614                                         dfd,
 615                                         uid,
 616                                         gid,
 617                                         ownership_ok,
 618                                         &left);
 619                 else
 620                         /* Directory */
 621                         r = recurse_dir(
 622                                         sub_fd,
 623                                         /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
 624                                         /* statx_mask= */ 0,
 625                                         /* n_depth_max= */ UINT_MAX,
 626                                         RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
 627                                         load_cred_recurse_dir_cb,
 628                                         &(struct load_cred_args) {
 629                                                 .context = context,
 630                                                 .params = params,
 631                                                 .encrypted = lc->encrypted,
 632                                                 .unit = unit,
 633                                                 .dfd = dfd,
 634                                                 .uid = uid,
 635                                                 .gid = gid,
 636                                                 .ownership_ok = ownership_ok,
 637                                                 .left = &left,
 638                                         });
 639                 if (r < 0)
 640                         return r;
 641         }
 642
 643         /* Next, look for system credentials and credentials in the credentials store. Note that these do not
 644          * override any credentials found earlier. */
 645         SET_FOREACH(ic, context->import_credentials) {
 646                 _cleanup_free_ char **search_path = NULL;
 647
 648                 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
 649                 if (!search_path)
 650                         return -ENOMEM;
 651
 652                 r = load_credential_glob(
 653                                 ic,
 654                                 /* encrypted = */ false,
 655                                 search_path,
 656                                 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
 657                                 dfd,
 658                                 uid,
 659                                 gid,
 660                                 ownership_ok,
 661                                 &left);
 662                 if (r < 0)
 663                         return r;
 664
 665                 search_path = strv_free(search_path);
 666                 search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
 667                 if (!search_path)
 668                         return -ENOMEM;
 669
 670                 r = load_credential_glob(
 671                                 ic,
 672                                 /* encrypted = */ true,
 673                                 search_path,
 674                                 READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
 675                                 dfd,
 676                                 uid,
 677                                 gid,
 678                                 ownership_ok,
 679                                 &left);
 680                 if (r < 0)
 681                         return r;
 682         }
 683
 684         /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
 685          * add them, so that they can act as a "default" if the same credential is specified multiple times. */
 686         HASHMAP_FOREACH(sc, context->set_credentials) {
 687                 _cleanup_(erase_and_freep) void *plaintext = NULL;
 688                 const char *data;
 689                 size_t size, add;
 690
 691                 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
 692                  * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
 693                  * slow and involved, hence it's nice to be able to skip that if the credential already
 694                  * exists anyway. */
 695                 if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
 696                         continue;
 697                 if (errno != ENOENT)
 698                         return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
 699
 700                 if (sc->encrypted) {
 701                         r = decrypt_credential_and_warn(sc->id, now(CLOCK_REALTIME), NULL, NULL, sc->data, sc->size, &plaintext, &size);
 702                         if (r < 0)
 703                                 return r;
 704
 705                         data = plaintext;
 706                 } else {
 707                         data = sc->data;
 708                         size = sc->size;
 709                 }
 710
 711                 add = strlen(sc->id) + size;
 712                 if (add > left)
 713                         return -E2BIG;
 714
 715                 r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
 716                 if (r < 0)
 717                         return r;
 718
 719                 left -= add;
 720         }
 721
 722         r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
 723         if (r < 0)
 724                 return r;
 725
 726         /* After we created all keys with the right perms, also make sure the credential store as a whole is
 727          * accessible */
 728
 729         if (uid_is_valid(uid) && uid != getuid()) {
 730                 r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
 731                 if (r < 0) {
 732                         if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
 733                                 return r;
 734
 735                         if (!ownership_ok)
 736                                 return r;
 737
 738                         if (fchown(dfd, uid, gid) < 0)
 739                                 return -errno;
 740                 }
 741         }
 742
 743         return 0;
 744 }
 745
 746 static int setup_credentials_internal(
 747                 const ExecContext *context,
 748                 const ExecParameters *params,
 749                 const char *unit,
 750                 const char *final,        /* This is where the credential store shall eventually end up at */
 751                 const char *workspace,    /* This is where we can prepare it before moving it to the final place */
 752                 bool reuse_workspace,     /* Whether to reuse any existing workspace mount if it already is a mount */
 753                 bool must_mount,          /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
 754                 uid_t uid,
 755                 gid_t gid) {
 756
 757         int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
 758                                    * if we mounted something; false if we definitely can't mount anything */
 759         bool final_mounted;
 760         const char *where;
 761
 762         assert(context);
 763         assert(final);
 764         assert(workspace);
 765
 766         if (reuse_workspace) {
 767                 r = path_is_mount_point(workspace, NULL, 0);
 768                 if (r < 0)
 769                         return r;
 770                 if (r > 0)
 771                         workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
 772                                                    * it, let's keep this in mind */
 773                 else
 774                         workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
 775         } else
 776                 workspace_mounted = -1; /* ditto */
 777
 778         r = path_is_mount_point(final, NULL, 0);
 779         if (r < 0)
 780                 return r;
 781         if (r > 0) {
 782                 /* If the final place already has something mounted, we use that. If the workspace also has
 783                  * something mounted we assume it's actually the same mount (but with MS_RDONLY
 784                  * different). */
 785                 final_mounted = true;
 786
 787                 if (workspace_mounted < 0) {
 788                         /* If the final place is mounted, but the workspace isn't, then let's bind mount
 789                          * the final version to the workspace, and make it writable, so that we can make
 790                          * changes */
 791
 792                         r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
 793                         if (r < 0)
 794                                 return r;
 795
 796                         r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
 797                         if (r < 0)
 798                                 return r;
 799
 800                         workspace_mounted = true;
 801                 }
 802         } else
 803                 final_mounted = false;
 804
 805         if (workspace_mounted < 0) {
 806                 /* Nothing is mounted on the workspace yet, let's try to mount something now */
 807
 808                 r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
 809                 if (r < 0) {
 810                         /* If that didn't work, try to make a bind mount from the final to the workspace, so
 811                          * that we can make it writable there. */
 812                         r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
 813                         if (r < 0) {
 814                                 if (!ERRNO_IS_PRIVILEGE(r))
 815                                         /* Propagate anything that isn't a permission problem. */
 816                                         return r;
 817
 818                                 if (must_mount)
 819                                         /* If it's not OK to use the plain directory fallback, propagate all
 820                                          * errors too. */
 821                                         return r;
 822
 823                                 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
 824                                  * for compat with container envs, and just use the final dir as is. */
 825
 826                                 workspace_mounted = false;
 827                         } else {
 828                                 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
 829                                 r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
 830                                 if (r < 0)
 831                                         return r;
 832
 833                                 workspace_mounted = true;
 834                         }
 835                 } else
 836                         workspace_mounted = true;
 837         }
 838
 839         assert(!must_mount || workspace_mounted > 0);
 840         where = workspace_mounted ? workspace : final;
 841
 842         (void) label_fix_full(AT_FDCWD, where, final, 0);
 843
 844         r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
 845         if (r < 0)
 846                 return r;
 847
 848         if (workspace_mounted) {
 849                 bool install;
 850
 851                 /* Determine if we should actually install the prepared mount in the final location by bind
 852                  * mounting it there. We do so only if the mount is not established there already, and if the
 853                  * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
 854                  * case we are doing all this in a mount namespace, thus no one else will see that we
 855                  * allocated a file system we are getting rid of again here. */
 856                 if (final_mounted)
 857                         install = false; /* already installed */
 858                 else {
 859                         r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
 860                         if (r < 0)
 861                                 return r;
 862
 863                         install = r == 0; /* install only if non-empty */
 864                 }
 865
 866                 if (install) {
 867                         /* Make workspace read-only now, so that any bind mount we make from it defaults to
 868                          * read-only too */
 869                         r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
 870                         if (r < 0)
 871                                 return r;
 872
 873                         /* And mount it to the final place, read-only */
 874                         r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
 875                 } else
 876                         /* Otherwise get rid of it */
 877                         r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
 878                 if (r < 0)
 879                         return r;
 880         } else {
 881                 _cleanup_free_ char *parent = NULL;
 882
 883                 /* If we do not have our own mount put used the plain directory fallback, then we need to
 884                  * open access to the top-level credential directory and the per-service directory now */
 885
 886                 r = path_extract_directory(final, &parent);
 887                 if (r < 0)
 888                         return r;
 889                 if (chmod(parent, 0755) < 0)
 890                         return -errno;
 891         }
 892
 893         return 0;
 894 }
 895
 896 int exec_setup_credentials(
 897                 const ExecContext *context,
 898                 const ExecParameters *params,
 899                 const char *unit,
 900                 uid_t uid,
 901                 gid_t gid) {
 902
 903         _cleanup_free_ char *p = NULL, *q = NULL;
 904         int r;
 905
 906         assert(context);
 907         assert(params);
 908
 909         if (!exec_context_has_credentials(context))
 910                 return 0;
 911
 912         if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
 913                 return -EINVAL;
 914
 915         /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
 916          * and the subdir we mount over with a read-only file system readable by the service's user */
 917         q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
 918         if (!q)
 919                 return -ENOMEM;
 920
 921         r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
 922         if (r < 0 && r != -EEXIST)
 923                 return r;
 924
 925         p = path_join(q, unit);
 926         if (!p)
 927                 return -ENOMEM;
 928
 929         r = mkdir_label(p, 0700); /* per-unit dir: private to user */
 930         if (r < 0 && r != -EEXIST)
 931                 return r;
 932
 933         r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
 934         if (r < 0) {
 935                 _cleanup_(rmdir_and_freep) char *u = NULL; /* remove the temporary workspace if we can */
 936                 _cleanup_free_ char *t = NULL;
 937
 938                 /* If this is not a privilege or support issue then propagate the error */
 939                 if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
 940                         return r;
 941
 942                 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
 943                  * it into place, so that users can't access half-initialized credential stores. */
 944                 t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
 945                 if (!t)
 946                         return -ENOMEM;
 947
 948                 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
 949                  * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
 950                  * after it is fully set up */
 951                 u = path_join(t, unit);
 952                 if (!u)
 953                         return -ENOMEM;
 954
 955                 FOREACH_STRING(i, t, u) {
 956                         r = mkdir_label(i, 0700);
 957                         if (r < 0 && r != -EEXIST)
 958                                 return r;
 959                 }
 960
 961                 r = setup_credentials_internal(
 962                                 context,
 963                                 params,
 964                                 unit,
 965                                 p,       /* final mount point */
 966                                 u,       /* temporary workspace to overmount */
 967                                 true,    /* reuse the workspace if it is already a mount */
 968                                 false,   /* it's OK to fall back to a plain directory if we can't mount anything */
 969                                 uid,
 970                                 gid);
 971                 if (r < 0)
 972                         return r;
 973
 974         } else if (r == 0) {
 975
 976                 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
 977                  * we can use the same directory for all cases, after turning off propagation. Question
 978                  * though is: where do we turn off propagation exactly, and where do we place the workspace
 979                  * directory? We need some place that is guaranteed to be a mount point in the host, and
 980                  * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
 981                  * since we ultimately want to move the resulting file system there, i.e. we need propagation
 982                  * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
 983                  * would be visible in the host mount table all the time, which we want to avoid. Hence, what
 984                  * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
 985                  * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
 986                  * propagation on the former, and then overmount the latter.
 987                  *
 988                  * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
 989                  * for this purpose, but there are few other candidates that work equally well for us, and
 990                  * given that we do this in a privately namespaced short-lived single-threaded process that
 991                  * no one else sees this should be OK to do. */
 992
 993                 /* Turn off propagation from our namespace to host */
 994                 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
 995                 if (r < 0)
 996                         goto child_fail;
 997
 998                 r = setup_credentials_internal(
 999                                 context,
1000                                 params,
1001                                 unit,
1002                                 p,           /* final mount point */
1003                                 "/dev/shm",  /* temporary workspace to overmount */
1004                                 false,       /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1005                                 true,        /* insist that something is mounted, do not allow fallback to plain directory */
1006                                 uid,
1007                                 gid);
1008                 if (r < 0)
1009                         goto child_fail;
1010
1011                 _exit(EXIT_SUCCESS);
1012
1013         child_fail:
1014                 _exit(EXIT_FAILURE);
1015         }
1016
1017         /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1018          * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1019          * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1020          * seen by users when trying access this inode. */
1021         (void) rmdir(p);
1022         return 0;
1023 }