src/shared/mount-setup.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <stdlib.h>
   5 #include <sys/mount.h>
   6 #include <sys/statvfs.h>
   7 #include <unistd.h>
   8
   9 #include "alloc-util.h"
  10 #include "bus-util.h"
  11 #include "cgroup-setup.h"
  12 #include "cgroup-util.h"
  13 #include "conf-files.h"
  14 #include "dev-setup.h"
  15 #include "dirent-util.h"
  16 #include "efi-loader.h"
  17 #include "fd-util.h"
  18 #include "fileio.h"
  19 #include "fs-util.h"
  20 #include "label.h"
  21 #include "log.h"
  22 #include "macro.h"
  23 #include "mkdir-label.h"
  24 #include "mount-setup.h"
  25 #include "mount-util.h"
  26 #include "mountpoint-util.h"
  27 #include "nulstr-util.h"
  28 #include "path-util.h"
  29 #include "recurse-dir.h"
  30 #include "set.h"
  31 #include "smack-util.h"
  32 #include "strv.h"
  33 #include "user-util.h"
  34 #include "virt.h"
  35
  36 typedef enum MountMode {
  37         MNT_NONE           = 0,
  38         MNT_FATAL          = 1 << 0,
  39         MNT_IN_CONTAINER   = 1 << 1,
  40         MNT_CHECK_WRITABLE = 1 << 2,
  41         MNT_FOLLOW_SYMLINK = 1 << 3,
  42 } MountMode;
  43
  44 typedef struct MountPoint {
  45         const char *what;
  46         const char *where;
  47         const char *type;
  48         const char *options;
  49         unsigned long flags;
  50         bool (*condition_fn)(void);
  51         MountMode mode;
  52 } MountPoint;
  53
  54 /* The first three entries we might need before SELinux is up. The
  55  * fourth (securityfs) is needed by IMA to load a custom policy. The
  56  * other ones we can delay until SELinux and IMA are loaded. When
  57  * SMACK is enabled we need smackfs, too, so it's a fifth one. */
  58 #if ENABLE_SMACK
  59 #define N_EARLY_MOUNT 5
  60 #else
  61 #define N_EARLY_MOUNT 4
  62 #endif
  63
  64 static const MountPoint mount_table[] = {
  65         { "proc",        "/proc",                     "proc",       NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
  66           NULL,          MNT_FATAL|MNT_IN_CONTAINER|MNT_FOLLOW_SYMLINK },
  67         { "sysfs",       "/sys",                      "sysfs",      NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
  68           NULL,          MNT_FATAL|MNT_IN_CONTAINER },
  69         { "devtmpfs",    "/dev",                      "devtmpfs",   "mode=755" TMPFS_LIMITS_DEV,               MS_NOSUID|MS_STRICTATIME,
  70           NULL,          MNT_FATAL|MNT_IN_CONTAINER },
  71         { "securityfs",  "/sys/kernel/security",      "securityfs", NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
  72           NULL,          MNT_NONE                   },
  73 #if ENABLE_SMACK
  74         { "smackfs",     "/sys/fs/smackfs",           "smackfs",    "smackfsdef=*",                            MS_NOSUID|MS_NOEXEC|MS_NODEV,
  75           mac_smack_use, MNT_FATAL                  },
  76         { "tmpfs",       "/dev/shm",                  "tmpfs",      "mode=1777,smackfsroot=*",                 MS_NOSUID|MS_NODEV|MS_STRICTATIME,
  77           mac_smack_use, MNT_FATAL                  },
  78 #endif
  79         { "tmpfs",       "/dev/shm",                  "tmpfs",      "mode=1777",                               MS_NOSUID|MS_NODEV|MS_STRICTATIME,
  80           NULL,          MNT_FATAL|MNT_IN_CONTAINER },
  81         { "devpts",      "/dev/pts",                  "devpts",     "mode=620,gid=" STRINGIFY(TTY_GID),        MS_NOSUID|MS_NOEXEC,
  82           NULL,          MNT_IN_CONTAINER           },
  83 #if ENABLE_SMACK
  84         { "tmpfs",       "/run",                      "tmpfs",      "mode=755,smackfsroot=*" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
  85           mac_smack_use, MNT_FATAL                  },
  86 #endif
  87         { "tmpfs",       "/run",                      "tmpfs",      "mode=755" TMPFS_LIMITS_RUN,               MS_NOSUID|MS_NODEV|MS_STRICTATIME,
  88           NULL,          MNT_FATAL|MNT_IN_CONTAINER },
  89         { "cgroup2",     "/sys/fs/cgroup",            "cgroup2",    "nsdelegate,memory_recursiveprot",         MS_NOSUID|MS_NOEXEC|MS_NODEV,
  90           cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
  91         { "cgroup2",     "/sys/fs/cgroup",            "cgroup2",    "nsdelegate",                              MS_NOSUID|MS_NOEXEC|MS_NODEV,
  92           cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
  93         { "cgroup2",     "/sys/fs/cgroup",            "cgroup2",    NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
  94           cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
  95         { "tmpfs",       "/sys/fs/cgroup",            "tmpfs",      "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP,     MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
  96           cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
  97         { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    "nsdelegate",                              MS_NOSUID|MS_NOEXEC|MS_NODEV,
  98           cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
  99         { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
 100           cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
 101         { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd,xattr",                 MS_NOSUID|MS_NOEXEC|MS_NODEV,
 102           cg_is_legacy_wanted, MNT_IN_CONTAINER     },
 103         { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd",                       MS_NOSUID|MS_NOEXEC|MS_NODEV,
 104           cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
 105         { "pstore",      "/sys/fs/pstore",            "pstore",     NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
 106           NULL,          MNT_NONE                   },
 107 #if ENABLE_EFI
 108         { "efivarfs",    "/sys/firmware/efi/efivars", "efivarfs",   NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
 109           is_efi_boot,   MNT_NONE                   },
 110 #endif
 111         { "bpf",         "/sys/fs/bpf",               "bpf",        "mode=700",                                MS_NOSUID|MS_NOEXEC|MS_NODEV,
 112           NULL,          MNT_NONE,                  },
 113 };
 114
 115 bool mount_point_is_api(const char *path) {
 116         unsigned i;
 117
 118         /* Checks if this mount point is considered "API", and hence
 119          * should be ignored */
 120
 121         for (i = 0; i < ELEMENTSOF(mount_table); i ++)
 122                 if (path_equal(path, mount_table[i].where))
 123                         return true;
 124
 125         return path_startswith(path, "/sys/fs/cgroup/");
 126 }
 127
 128 bool mount_point_ignore(const char *path) {
 129         /* These are API file systems that might be mounted by other software, we just list them here so that
 130          * we know that we should ignore them. */
 131         FOREACH_STRING(i,
 132                        /* SELinux file systems */
 133                        "/sys/fs/selinux",
 134                        /* Container bind mounts */
 135                        "/dev/console",
 136                        "/proc/kmsg",
 137                        "/proc/sys",
 138                        "/proc/sys/kernel/random/boot_id")
 139                 if (path_equal(path, i))
 140                         return true;
 141
 142         if (path_startswith(path, "/run/host")) /* All mounts passed in from the container manager are
 143                                                  * something we better ignore. */
 144                 return true;
 145
 146         return false;
 147 }
 148
 149 static int mount_one(const MountPoint *p, bool relabel) {
 150         int r, priority;
 151
 152         assert(p);
 153
 154         priority = (p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG;
 155
 156         if (p->condition_fn && !p->condition_fn())
 157                 return 0;
 158
 159         /* Relabel first, just in case */
 160         if (relabel)
 161                 (void) label_fix(p->where, LABEL_IGNORE_ENOENT|LABEL_IGNORE_EROFS);
 162
 163         r = path_is_mount_point(p->where, NULL, AT_SYMLINK_FOLLOW);
 164         if (r < 0 && r != -ENOENT) {
 165                 log_full_errno(priority, r, "Failed to determine whether %s is a mount point: %m", p->where);
 166                 return (p->mode & MNT_FATAL) ? r : 0;
 167         }
 168         if (r > 0)
 169                 return 0;
 170
 171         /* Skip securityfs in a container */
 172         if (!(p->mode & MNT_IN_CONTAINER) && detect_container() > 0)
 173                 return 0;
 174
 175         /* The access mode here doesn't really matter too much, since
 176          * the mounted file system will take precedence anyway. */
 177         if (relabel)
 178                 (void) mkdir_p_label(p->where, 0755);
 179         else
 180                 (void) mkdir_p(p->where, 0755);
 181
 182         log_debug("Mounting %s to %s of type %s with options %s.",
 183                   p->what,
 184                   p->where,
 185                   p->type,
 186                   strna(p->options));
 187
 188         if (FLAGS_SET(p->mode, MNT_FOLLOW_SYMLINK))
 189                 r = RET_NERRNO(mount(p->what, p->where, p->type, p->flags, p->options));
 190         else
 191                 r = mount_nofollow(p->what, p->where, p->type, p->flags, p->options);
 192         if (r < 0) {
 193                 log_full_errno(priority, r, "Failed to mount %s at %s: %m", p->type, p->where);
 194                 return (p->mode & MNT_FATAL) ? r : 0;
 195         }
 196
 197         /* Relabel again, since we now mounted something fresh here */
 198         if (relabel)
 199                 (void) label_fix(p->where, 0);
 200
 201         if (p->mode & MNT_CHECK_WRITABLE) {
 202                 if (access(p->where, W_OK) < 0) {
 203                         r = -errno;
 204
 205                         (void) umount2(p->where, UMOUNT_NOFOLLOW);
 206                         (void) rmdir(p->where);
 207
 208                         log_full_errno(priority, r, "Mount point %s not writable after mounting: %m", p->where);
 209                         return (p->mode & MNT_FATAL) ? r : 0;
 210                 }
 211         }
 212
 213         return 1;
 214 }
 215
 216 static int mount_points_setup(unsigned n, bool loaded_policy) {
 217         unsigned i;
 218         int r = 0;
 219
 220         for (i = 0; i < n; i ++) {
 221                 int j;
 222
 223                 j = mount_one(mount_table + i, loaded_policy);
 224                 if (j != 0 && r >= 0)
 225                         r = j;
 226         }
 227
 228         return r;
 229 }
 230
 231 int mount_setup_early(void) {
 232         assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
 233
 234         /* Do a minimal mount of /proc and friends to enable the most
 235          * basic stuff, such as SELinux */
 236         return mount_points_setup(N_EARLY_MOUNT, false);
 237 }
 238
 239 static const char *join_with(const char *controller) {
 240
 241         static const char* const pairs[] = {
 242                 "cpu", "cpuacct",
 243                 "net_cls", "net_prio",
 244                 NULL
 245         };
 246
 247         assert(controller);
 248
 249         /* This will lookup which controller to mount another controller with. Input is a controller name, and output
 250          * is the other controller name. The function works both ways: you can input one and get the other, and input
 251          * the other to get the one. */
 252
 253         STRV_FOREACH_PAIR(x, y, pairs) {
 254                 if (streq(controller, *x))
 255                         return *y;
 256                 if (streq(controller, *y))
 257                         return *x;
 258         }
 259
 260         return NULL;
 261 }
 262
 263 static int symlink_controller(const char *target, const char *alias) {
 264         const char *a;
 265         int r;
 266
 267         assert(target);
 268         assert(alias);
 269
 270         a = strjoina("/sys/fs/cgroup/", alias);
 271
 272         r = symlink_idempotent(target, a, false);
 273         if (r < 0)
 274                 return log_error_errno(r, "Failed to create symlink %s: %m", a);
 275
 276 #if HAVE_SMACK_RUN_LABEL
 277         const char *p;
 278
 279         p = strjoina("/sys/fs/cgroup/", target);
 280
 281         r = mac_smack_copy(a, p);
 282         if (r < 0 && r != -EOPNOTSUPP)
 283                 return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", p, a);
 284 #endif
 285
 286         return 0;
 287 }
 288
 289 int mount_cgroup_controllers(void) {
 290         _cleanup_set_free_ Set *controllers = NULL;
 291         int r;
 292
 293         if (!cg_is_legacy_wanted())
 294                 return 0;
 295
 296         /* Mount all available cgroup controllers that are built into the kernel. */
 297         r = cg_kernel_controllers(&controllers);
 298         if (r < 0)
 299                 return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
 300
 301         for (;;) {
 302                 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
 303                 const char *other_controller;
 304                 MountPoint p = {
 305                         .what = "cgroup",
 306                         .type = "cgroup",
 307                         .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
 308                         .mode = MNT_IN_CONTAINER,
 309                 };
 310
 311                 controller = set_steal_first(controllers);
 312                 if (!controller)
 313                         break;
 314
 315                 /* Check if we shall mount this together with another controller */
 316                 other_controller = join_with(controller);
 317                 if (other_controller) {
 318                         _cleanup_free_ char *c = NULL;
 319
 320                         /* Check if the other controller is actually available in the kernel too */
 321                         c = set_remove(controllers, other_controller);
 322                         if (c) {
 323
 324                                 /* Join the two controllers into one string, and maintain a stable ordering */
 325                                 if (strcmp(controller, other_controller) < 0)
 326                                         options = strjoin(controller, ",", other_controller);
 327                                 else
 328                                         options = strjoin(other_controller, ",", controller);
 329                                 if (!options)
 330                                         return log_oom();
 331                         }
 332                 }
 333
 334                 /* The simple case, where there's only one controller to mount together */
 335                 if (!options)
 336                         options = TAKE_PTR(controller);
 337
 338                 where = path_join("/sys/fs/cgroup", options);
 339                 if (!where)
 340                         return log_oom();
 341
 342                 p.where = where;
 343                 p.options = options;
 344
 345                 r = mount_one(&p, true);
 346                 if (r < 0)
 347                         return r;
 348
 349                 /* Create symlinks from the individual controller names, in case we have a joined mount */
 350                 if (controller)
 351                         (void) symlink_controller(options, controller);
 352                 if (other_controller)
 353                         (void) symlink_controller(options, other_controller);
 354         }
 355
 356         /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */
 357         (void) mount_nofollow("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP);
 358
 359         return 0;
 360 }
 361
 362 #if HAVE_SELINUX || ENABLE_SMACK
 363 static int relabel_cb(
 364                 RecurseDirEvent event,
 365                 const char *path,
 366                 int dir_fd,
 367                 int inode_fd,
 368                 const struct dirent *de,
 369                 const struct statx *sx,
 370                 void *userdata) {
 371
 372         switch (event) {
 373
 374         case RECURSE_DIR_LEAVE:
 375         case RECURSE_DIR_SKIP_MOUNT:
 376                 /* If we already saw this dirent when entering it or this is a dirent that on a different
 377                  * mount, don't relabel it. */
 378                 return RECURSE_DIR_CONTINUE;
 379
 380         case RECURSE_DIR_ENTER:
 381                 /* /run/initramfs is static data and big, no need to dynamically relabel its contents at boot... */
 382                 if (path_equal(path, "/run/initramfs"))
 383                         return RECURSE_DIR_SKIP_ENTRY;
 384
 385                 _fallthrough_;
 386
 387         default:
 388                 /* Otherwise, label it, even if we had trouble stat()ing it and similar. SELinux can figure this out */
 389                 (void) label_fix(path, 0);
 390                 return RECURSE_DIR_CONTINUE;
 391         }
 392 }
 393
 394 static int relabel_tree(const char *path) {
 395         int r;
 396
 397         r = recurse_dir_at(AT_FDCWD, path, 0, UINT_MAX, RECURSE_DIR_ENSURE_TYPE|RECURSE_DIR_SAME_MOUNT, relabel_cb, NULL);
 398         if (r < 0)
 399                 log_debug_errno(r, "Failed to recursively relabel '%s': %m", path);
 400
 401         return r;
 402 }
 403
 404 static int relabel_cgroup_filesystems(void) {
 405         int r;
 406         struct statfs st;
 407
 408         r = cg_all_unified();
 409         if (r == 0) {
 410                 /* Temporarily remount the root cgroup filesystem to give it a proper label. Do this
 411                    only when the filesystem has been already populated by a previous instance of systemd
 412                    running from initrd. Otherwise don't remount anything and leave the filesystem read-write
 413                    for the cgroup filesystems to be mounted inside. */
 414                 if (statfs("/sys/fs/cgroup", &st) < 0)
 415                         return log_error_errno(errno, "Failed to determine mount flags for /sys/fs/cgroup: %m");
 416
 417                 if (st.f_flags & ST_RDONLY)
 418                         (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL);
 419
 420                 (void) label_fix("/sys/fs/cgroup", 0);
 421                 (void) relabel_tree("/sys/fs/cgroup");
 422
 423                 if (st.f_flags & ST_RDONLY)
 424                         (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL);
 425
 426         } else if (r < 0)
 427                 return log_error_errno(r, "Failed to determine whether we are in all unified mode: %m");
 428
 429         return 0;
 430 }
 431
 432 static int relabel_extra(void) {
 433         _cleanup_strv_free_ char **files = NULL;
 434         int r, c = 0;
 435
 436         /* Support for relabelling additional files or directories after loading the policy. For this, code in the
 437          * initrd simply has to drop in *.relabel files into /run/systemd/relabel-extra.d/. We'll read all such files
 438          * expecting one absolute path by line and will relabel each (and everyone below that in case the path refers
 439          * to a directory). These drop-in files are supposed to be absolutely minimal, and do not understand comments
 440          * and such. After the operation succeeded the files are removed, and the drop-in directory as well, if
 441          * possible.
 442          */
 443
 444         r = conf_files_list(&files, ".relabel", NULL,
 445                             CONF_FILES_FILTER_MASKED | CONF_FILES_REGULAR,
 446                             "/run/systemd/relabel-extra.d/");
 447         if (r < 0)
 448                 return log_error_errno(r, "Failed to enumerate /run/systemd/relabel-extra.d/, ignoring: %m");
 449
 450         STRV_FOREACH(file, files) {
 451                 _cleanup_fclose_ FILE *f = NULL;
 452
 453                 f = fopen(*file, "re");
 454                 if (!f) {
 455                         log_warning_errno(errno, "Failed to open %s, ignoring: %m", *file);
 456                         continue;
 457                 }
 458
 459                 for (;;) {
 460                         _cleanup_free_ char *line = NULL;
 461
 462                         r = read_line(f, LONG_LINE_MAX, &line);
 463                         if (r < 0) {
 464                                 log_warning_errno(r, "Failed to read %s, ignoring: %m", *file);
 465                                 break;
 466                         }
 467                         if (r == 0) /* EOF */
 468                                 break;
 469
 470                         path_simplify(line);
 471
 472                         if (!path_is_normalized(line)) {
 473                                 log_warning("Path to relabel is not normalized, ignoring: %s", line);
 474                                 continue;
 475                         }
 476
 477                         if (!path_is_absolute(line)) {
 478                                 log_warning("Path to relabel is not absolute, ignoring: %s", line);
 479                                 continue;
 480                         }
 481
 482                         log_debug("Relabelling additional file/directory '%s'.", line);
 483                         (void) label_fix(line, 0);
 484                         (void) relabel_tree(line);
 485                         c++;
 486                 }
 487
 488                 if (unlink(*file) < 0)
 489                         log_warning_errno(errno, "Failed to remove %s, ignoring: %m", *file);
 490         }
 491
 492         /* Remove when we complete things. */
 493         if (rmdir("/run/systemd/relabel-extra.d") < 0 &&
 494             errno != ENOENT)
 495                 log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/ directory: %m");
 496
 497         return c;
 498 }
 499 #endif
 500
 501 int mount_setup(bool loaded_policy, bool leave_propagation) {
 502         int r;
 503
 504         r = mount_points_setup(ELEMENTSOF(mount_table), loaded_policy);
 505         if (r < 0)
 506                 return r;
 507
 508 #if HAVE_SELINUX || ENABLE_SMACK
 509         /* Nodes in devtmpfs and /run need to be manually updated for
 510          * the appropriate labels, after mounting. The other virtual
 511          * API file systems like /sys and /proc do not need that, they
 512          * use the same label for all their files. */
 513         if (loaded_policy) {
 514                 usec_t before_relabel, after_relabel;
 515                 int n_extra;
 516
 517                 before_relabel = now(CLOCK_MONOTONIC);
 518
 519                 FOREACH_STRING(i, "/dev", "/dev/shm", "/run")
 520                         (void) relabel_tree(i);
 521
 522                 (void) relabel_cgroup_filesystems();
 523
 524                 n_extra = relabel_extra();
 525
 526                 after_relabel = now(CLOCK_MONOTONIC);
 527
 528                 log_info("Relabelled /dev, /dev/shm, /run, /sys/fs/cgroup%s in %s.",
 529                          n_extra > 0 ? ", additional files" : "",
 530                          FORMAT_TIMESPAN(after_relabel - before_relabel, 0));
 531         }
 532 #endif
 533
 534         /* Create a few default symlinks, which are normally created
 535          * by udevd, but some scripts might need them before we start
 536          * udevd. */
 537         dev_setup(NULL, UID_INVALID, GID_INVALID);
 538
 539         /* Mark the root directory as shared in regards to mount propagation. The kernel defaults to "private", but we
 540          * think it makes more sense to have a default of "shared" so that nspawn and the container tools work out of
 541          * the box. If specific setups need other settings they can reset the propagation mode to private if
 542          * needed. Note that we set this only when we are invoked directly by the kernel. If we are invoked by a
 543          * container manager we assume the container manager knows what it is doing (for example, because it set up
 544          * some directories with different propagation modes). */
 545         if (detect_container() <= 0 && !leave_propagation)
 546                 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
 547                         log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
 548
 549         /* Create a few directories we always want around, Note that sd_booted() checks for /run/systemd/system, so
 550          * this mkdir really needs to stay for good, otherwise software that copied sd-daemon.c into their sources will
 551          * misdetect systemd. */
 552         (void) mkdir_label("/run/systemd", 0755);
 553         (void) mkdir_label("/run/systemd/system", 0755);
 554
 555         /* Make sure we have a mount point to hide in sandboxes */
 556         (void) mkdir_label("/run/credentials", 0755);
 557
 558         /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount
 559          * inaccessible nodes from. If we run in a container the host might have created these for us already
 560          * in /run/host/inaccessible/. Use those if we can, since that way we likely get access to block/char
 561          * device nodes that are inaccessible, and if userns is used to nodes that are on mounts owned by a
 562          * userns outside the container and thus nicely read-only and not remountable. */
 563         if (access("/run/host/inaccessible/", F_OK) < 0) {
 564                 if (errno != ENOENT)
 565                         log_debug_errno(errno, "Failed to check if /run/host/inaccessible exists, ignoring: %m");
 566
 567                 (void) make_inaccessible_nodes("/run/systemd", UID_INVALID, GID_INVALID);
 568         } else
 569                 (void) symlink("../host/inaccessible", "/run/systemd/inaccessible");
 570
 571         return 0;
 572 }