]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/nspawn/nspawn-mount.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
index 25d38aa742a0e750bbf575cf3d81f8ff886430a7..c8c941cdfdba33553b885a6a62bb1c5d3ce4582e 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
 /***
   This file is part of systemd.
 
@@ -47,7 +48,7 @@ CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
         assert(t >= 0);
         assert(t < _CUSTOM_MOUNT_TYPE_MAX);
 
-        c = realloc(*l, (*n + 1) * sizeof(CustomMount));
+        c = realloc_multiply(*l, (*n + 1), sizeof(CustomMount));
         if (!c)
                 return NULL;
 
@@ -75,13 +76,18 @@ void custom_mount_free_all(CustomMount *l, unsigned n) {
                         free(m->work_dir);
                 }
 
+                if (m->rm_rf_tmpdir) {
+                        (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
+                        free(m->rm_rf_tmpdir);
+                }
+
                 strv_free(m->lower);
         }
 
         free(l);
 }
 
-int custom_mount_compare(const void *a, const void *b) {
+static int custom_mount_compare(const void *a, const void *b) {
         const CustomMount *x = a, *y = b;
         int r;
 
@@ -97,6 +103,109 @@ int custom_mount_compare(const void *a, const void *b) {
         return 0;
 }
 
+static bool source_path_is_valid(const char *p) {
+        assert(p);
+
+        if (*p == '+')
+                p++;
+
+        return path_is_absolute(p);
+}
+
+static char *resolve_source_path(const char *dest, const char *source) {
+
+        if (!source)
+                return NULL;
+
+        if (source[0] == '+')
+                return prefix_root(dest, source + 1);
+
+        return strdup(source);
+}
+
+int custom_mount_prepare_all(const char *dest, CustomMount *l, unsigned n) {
+        unsigned i;
+        int r;
+
+        /* Prepare all custom mounts. This will make source we know all temporary directories. This is called in the
+         * parent process, so that we know the temporary directories to remove on exit before we fork off the
+         * children. */
+
+        assert(l || n == 0);
+
+        /* Order the custom mounts, and make sure we have a working directory */
+        qsort_safe(l, n, sizeof(CustomMount), custom_mount_compare);
+
+        for (i = 0; i < n; i++) {
+                CustomMount *m = l + i;
+
+                if (m->source) {
+                        char *s;
+
+                        s = resolve_source_path(dest, m->source);
+                        if (!s)
+                                return log_oom();
+
+                        free(m->source);
+                        m->source = s;
+                } else {
+                        /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
+
+                        m->rm_rf_tmpdir = strdup("/var/tmp/nspawn-temp-XXXXXX");
+                        if (!m->rm_rf_tmpdir)
+                                return log_oom();
+
+                        if (!mkdtemp(m->rm_rf_tmpdir)) {
+                                m->rm_rf_tmpdir = mfree(m->rm_rf_tmpdir);
+                                return log_error_errno(errno, "Failed to acquire temporary directory: %m");
+                        }
+
+                        m->source = strjoin(m->rm_rf_tmpdir, "/src");
+                        if (!m->source)
+                                return log_oom();
+
+                        if (mkdir(m->source, 0755) < 0)
+                                return log_error_errno(errno, "Failed to create %s: %m", m->source);
+                }
+
+                if (m->type == CUSTOM_MOUNT_OVERLAY) {
+                        char **j;
+
+                        STRV_FOREACH(j, m->lower) {
+                                char *s;
+
+                                s = resolve_source_path(dest, *j);
+                                if (!s)
+                                        return log_oom();
+
+                                free(*j);
+                                *j = s;
+                        }
+
+                        if (m->work_dir) {
+                                char *s;
+
+                                s = resolve_source_path(dest, m->work_dir);
+                                if (!s)
+                                        return log_oom();
+
+                                free(m->work_dir);
+                                m->work_dir = s;
+                        } else {
+                                assert(m->source);
+
+                                r = tempfn_random(m->source, NULL, &m->work_dir);
+                                if (r < 0)
+                                        return log_error_errno(r, "Failed to acquire working directory: %m");
+                        }
+
+                        (void) mkdir_label(m->work_dir, 0700);
+                }
+        }
+
+        return 0;
+}
+
 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
         _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
         const char *p = s;
@@ -111,20 +220,20 @@ int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only
                 return r;
         if (r == 0)
                 return -EINVAL;
-
         if (r == 1) {
-                destination = strdup(source);
+                destination = strdup(source[0] == '+' ? source+1 : source);
                 if (!destination)
                         return -ENOMEM;
         }
-
         if (r == 2 && !isempty(p)) {
                 opts = strdup(p);
                 if (!opts)
                         return -ENOMEM;
         }
 
-        if (!path_is_absolute(source))
+        if (isempty(source))
+                source = NULL;
+        else if (!source_path_is_valid(source))
                 return -EINVAL;
 
         if (!path_is_absolute(destination))
@@ -132,7 +241,7 @@ int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only
 
         m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
         if (!m)
-                return log_oom();
+                return -ENOMEM;
 
         m->source = source;
         m->destination = destination;
@@ -180,6 +289,71 @@ int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
         return 0;
 }
 
+int overlay_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
+        _cleanup_free_ char *upper = NULL, *destination = NULL;
+        _cleanup_strv_free_ char **lower = NULL;
+        CustomMount *m;
+        int k;
+
+        k = strv_split_extract(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+        if (k < 0)
+                return k;
+        if (k < 2)
+                return -EADDRNOTAVAIL;
+        if (k == 2) {
+                /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
+                 * we'll also define the destination mount point the same as the upper. */
+
+                if (!source_path_is_valid(lower[0]) ||
+                    !source_path_is_valid(lower[1]))
+                        return -EINVAL;
+
+                upper = lower[1];
+                lower[1] = NULL;
+
+                destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
+                if (!destination)
+                        return -ENOMEM;
+        } else {
+                char **i;
+
+                /* If more than two parameters are specified, the last one is the destination, the second to last one
+                 * the "upper", and all before that the "lower" directories. */
+
+                destination = lower[k - 1];
+                upper = lower[k - 2];
+                lower[k - 2] = NULL;
+
+                STRV_FOREACH(i, lower)
+                        if (!source_path_is_valid(*i))
+                                return -EINVAL;
+
+                /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
+                 * in /var/tmp */
+                if (isempty(upper))
+                        upper = NULL;
+                else if (!source_path_is_valid(upper))
+                        return -EINVAL;
+
+                if (!path_is_absolute(destination))
+                        return -EINVAL;
+        }
+
+        m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
+        if (!m)
+                return -ENOMEM;
+
+        m->destination = destination;
+        m->source = upper;
+        m->lower = lower;
+        m->read_only = read_only;
+
+        upper = destination = NULL;
+        lower = NULL;
+
+        return 0;
+}
+
 static int tmpfs_patch_options(
                 const char *options,
                 bool userns,
@@ -193,30 +367,24 @@ static int tmpfs_patch_options(
         if ((userns && uid_shift != 0) || patch_ids) {
                 assert(uid_shift != UID_INVALID);
 
-                if (options)
-                        (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
-                else
-                        (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
-                if (!buf)
+                if (asprintf(&buf, "%s%suid=" UID_FMT ",gid=" UID_FMT,
+                             options ?: "", options ? "," : "",
+                             uid_shift, uid_shift) < 0)
                         return -ENOMEM;
 
                 options = buf;
         }
 
-#ifdef HAVE_SELINUX
+#if HAVE_SELINUX
         if (selinux_apifs_context) {
                 char *t;
 
-                if (options)
-                        t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
-                else
-                        t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
-                if (!t) {
-                        free(buf);
+                t = strjoin(options ?: "", options ? "," : "",
+                            "context=\"", selinux_apifs_context, "\"");
+                free(buf);
+                if (!t)
                         return -ENOMEM;
-                }
 
-                free(buf);
                 buf = t;
         }
 #endif
@@ -231,9 +399,10 @@ static int tmpfs_patch_options(
         return !!buf;
 }
 
-int mount_sysfs(const char *dest) {
+int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
         const char *full, *top, *x;
         int r;
+        unsigned long extra_flags = 0;
 
         top = prefix_roota(dest, "/sys");
         r = path_check_fstype(top, SYSFS_MAGIC);
@@ -250,8 +419,13 @@ int mount_sysfs(const char *dest) {
 
         (void) mkdir(full, 0755);
 
-        if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
-                return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
+        if (mount_settings & MOUNT_APPLY_APIVFS_RO)
+                extra_flags |= MS_RDONLY;
+
+        r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs",
+                          MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
+        if (r < 0)
+                return r;
 
         FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
                 _cleanup_free_ char *from = NULL, *to = NULL;
@@ -266,22 +440,23 @@ int mount_sysfs(const char *dest) {
 
                 (void) mkdir(to, 0755);
 
-                if (mount(from, to, NULL, MS_BIND, NULL) < 0)
-                        return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
+                r = mount_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
+                if (r < 0)
+                        return r;
 
-                if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
-                        return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
+                r = mount_verbose(LOG_ERR, NULL, to, NULL,
+                                  MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
+                if (r < 0)
+                        return r;
         }
 
-        if (umount(full) < 0)
-                return log_error_errno(errno, "Failed to unmount %s: %m", full);
+        r = umount_verbose(full);
+        if (r < 0)
+                return r;
 
         if (rmdir(full) < 0)
                 return log_error_errno(errno, "Failed to remove %s: %m", full);
 
-        x = prefix_roota(top, "/fs/kdbus");
-        (void) mkdir_p(x, 0755);
-
         /* Create mountpoint for cgroups. Otherwise we are not allowed since we
          * remount /sys read-only.
          */
@@ -290,15 +465,69 @@ int mount_sysfs(const char *dest) {
                 (void) mkdir_p(x, 0755);
         }
 
-        if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
-                return log_error_errno(errno, "Failed to make %s read-only: %m", top);
+        return mount_verbose(LOG_ERR, NULL, top, NULL,
+                             MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
+}
+
+static int mkdir_userns(const char *path, mode_t mode, MountSettingsMask mask, uid_t uid_shift) {
+        int r;
+
+        assert(path);
+
+        r = mkdir(path, mode);
+        if (r < 0 && errno != EEXIST)
+                return -errno;
+
+        if ((mask & MOUNT_USE_USERNS) == 0)
+                return 0;
+
+        if (mask & MOUNT_IN_USERNS)
+                return 0;
+
+        r = lchown(path, uid_shift, uid_shift);
+        if (r < 0)
+                return -errno;
 
         return 0;
 }
 
+static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, MountSettingsMask mask, uid_t uid_shift) {
+        const char *p, *e;
+        int r;
+
+        assert(path);
+
+        if (prefix && !path_startswith(path, prefix))
+                return -ENOTDIR;
+
+        /* create every parent directory in the path, except the last component */
+        p = path + strspn(path, "/");
+        for (;;) {
+                char t[strlen(path) + 1];
+
+                e = p + strcspn(p, "/");
+                p = e + strspn(e, "/");
+
+                /* Is this the last component? If so, then we're done */
+                if (*p == 0)
+                        break;
+
+                memcpy(t, path, e - path);
+                t[e-path] = 0;
+
+                if (prefix && path_startswith(prefix, t))
+                        continue;
+
+                r = mkdir_userns(t, mode, mask, uid_shift);
+                if (r < 0)
+                        return r;
+        }
+
+        return mkdir_userns(path, mode, mask, uid_shift);
+}
+
 int mount_all(const char *dest,
-              bool use_userns, bool in_userns,
-              bool use_netns,
+              MountSettingsMask mount_settings,
               uid_t uid_shift, uid_t uid_range,
               const char *selinux_apifs_context) {
 
@@ -308,48 +537,59 @@ int mount_all(const char *dest,
                 const char *type;
                 const char *options;
                 unsigned long flags;
-                bool fatal;
-                bool in_userns;
-                bool use_netns;
+                MountSettingsMask mount_settings;
         } MountPoint;
 
         static const MountPoint mount_table[] = {
-                { "proc",                "/proc",               "proc",  NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV,                              true,  true,  false },
-                { "/proc/sys",           "/proc/sys",           NULL,    NULL,        MS_BIND,                                                   true,  true,  false },   /* Bind mount first ...*/
-                { "/proc/sys/net",       "/proc/sys/net",       NULL,    NULL,        MS_BIND,                                                   true,  true,  true  },   /* (except for this) */
-                { NULL,                  "/proc/sys",           NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true,  true,  false },   /* ... then, make it r/o */
-                { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL,    NULL,        MS_BIND,                                                   false, true,  false },   /* Bind mount first ...*/
-                { NULL,                  "/proc/sysrq-trigger", NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, true,  false },   /* ... then, make it r/o */
-                { "tmpfs",               "/sys",                "tmpfs", "mode=755",  MS_NOSUID|MS_NOEXEC|MS_NODEV,                              true,  false, true  },
-                { "sysfs",               "/sys",                "sysfs", NULL,        MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,                    true,  false, false },
-                { "tmpfs",               "/dev",                "tmpfs", "mode=755",  MS_NOSUID|MS_STRICTATIME,                                  true,  false, false },
-                { "tmpfs",               "/dev/shm",            "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         true,  false, false },
-                { "tmpfs",               "/run",                "tmpfs", "mode=755",  MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         true,  false, false },
-                { "tmpfs",               "/tmp",                "tmpfs", "mode=1777", MS_STRICTATIME,                                            true,  false, false },
-#ifdef HAVE_SELINUX
-                { "/sys/fs/selinux",     "/sys/fs/selinux",     NULL,     NULL,       MS_BIND,                                                   false, false, false },  /* Bind mount first */
-                { NULL,                  "/sys/fs/selinux",     NULL,     NULL,       MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false },  /* Then, make it r/o */
+                /* inner child mounts */
+                { "proc",                "/proc",               "proc",  NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV,                              MOUNT_FATAL|MOUNT_IN_USERNS },
+                { "/proc/sys",           "/proc/sys",           NULL,    NULL,        MS_BIND,                                                   MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* Bind mount first ... */
+                { "/proc/sys/net",       "/proc/sys/net",       NULL,    NULL,        MS_BIND,                                                   MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
+                { NULL,                  "/proc/sys",           NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* ... then, make it r/o */
+                { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL,    NULL,        MS_BIND,                                                               MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* Bind mount first ... */
+                { NULL,                  "/proc/sysrq-trigger", NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,             MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* ... then, make it r/o */
+
+                /* outer child mounts */
+                { "tmpfs",               "/tmp",                "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,                                            MOUNT_FATAL },
+                { "tmpfs",               "/sys",                "tmpfs", "mode=755",  MS_NOSUID|MS_NOEXEC|MS_NODEV,                              MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS },
+                { "sysfs",               "/sys",                "sysfs", NULL,        MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,                    MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO },    /* skipped if above was mounted */
+                { "sysfs",               "/sys",                "sysfs", NULL,                  MS_NOSUID|MS_NOEXEC|MS_NODEV,                    MOUNT_FATAL },                          /* skipped if above was mounted */
+
+                { "tmpfs",               "/dev",                "tmpfs", "mode=755",  MS_NOSUID|MS_STRICTATIME,                                  MOUNT_FATAL },
+                { "tmpfs",               "/dev/shm",            "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         MOUNT_FATAL },
+                { "tmpfs",               "/run",                "tmpfs", "mode=755",  MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         MOUNT_FATAL },
+#if HAVE_SELINUX
+                { "/sys/fs/selinux",     "/sys/fs/selinux",     NULL,     NULL,       MS_BIND,                                                   0 },  /* Bind mount first */
+                { NULL,                  "/sys/fs/selinux",     NULL,     NULL,       MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, 0 },  /* Then, make it r/o */
 #endif
         };
 
         unsigned k;
         int r;
+        bool use_userns = (mount_settings & MOUNT_USE_USERNS);
+        bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS);
+        bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO);
+        bool in_userns = (mount_settings & MOUNT_IN_USERNS);
 
         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
                 _cleanup_free_ char *where = NULL, *options = NULL;
                 const char *o;
+                bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL);
 
-                if (in_userns != mount_table[k].in_userns)
+                if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS))
                         continue;
 
-                if (!use_netns && mount_table[k].use_netns)
+                if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS))
                         continue;
 
-                where = prefix_root(dest, mount_table[k].where);
-                if (!where)
-                        return log_oom();
+                if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO))
+                        continue;
 
-                r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
+                r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where);
+
+                r = path_is_mount_point(where, NULL, 0);
                 if (r < 0 && r != -ENOENT)
                         return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
 
@@ -357,35 +597,39 @@ int mount_all(const char *dest,
                 if (mount_table[k].what && r > 0)
                         continue;
 
-                r = mkdir_p(where, 0755);
+                r = mkdir_userns_p(dest, where, 0755, mount_settings, uid_shift);
                 if (r < 0 && r != -EEXIST) {
-                        if (mount_table[k].fatal)
+                        if (fatal && r != -EROFS)
                                 return log_error_errno(r, "Failed to create directory %s: %m", where);
 
                         log_debug_errno(r, "Failed to create directory %s: %m", where);
-                        continue;
+                        /* If we failed mkdir() or chown() due to the root
+                         * directory being read only, attempt to mount this fs
+                         * anyway and let mount_verbose log any errors */
+                        if (r != -EROFS)
+                                continue;
                 }
 
                 o = mount_table[k].options;
                 if (streq_ptr(mount_table[k].type, "tmpfs")) {
-                        r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, false, selinux_apifs_context, &options);
+                        if (in_userns)
+                                r = tmpfs_patch_options(o, use_userns, 0, uid_range, true, selinux_apifs_context, &options);
+                        else
+                                r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, false, selinux_apifs_context, &options);
                         if (r < 0)
                                 return log_oom();
                         if (r > 0)
                                 o = options;
                 }
 
-                if (mount(mount_table[k].what,
-                          where,
-                          mount_table[k].type,
-                          mount_table[k].flags,
-                          o) < 0) {
-
-                        if (mount_table[k].fatal)
-                                return log_error_errno(errno, "mount(%s) failed: %m", where);
-
-                        log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
-                }
+                r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG,
+                                  mount_table[k].what,
+                                  where,
+                                  mount_table[k].type,
+                                  mount_table[k].flags,
+                                  o);
+                if (r < 0 && fatal)
+                        return r;
         }
 
         return 0;
@@ -395,12 +639,14 @@ static int parse_mount_bind_options(const char *options, unsigned long *mount_fl
         const char *p = options;
         unsigned long flags = *mount_flags;
         char *opts = NULL;
+        int r;
 
         assert(options);
 
         for (;;) {
                 _cleanup_free_ char *word = NULL;
-                int r = extract_first_word(&p, &word, ",", 0);
+
+                r = extract_first_word(&p, &word, ",", 0);
                 if (r < 0)
                         return log_error_errno(r, "Failed to extract mount option: %m");
                 if (r == 0)
@@ -424,12 +670,13 @@ static int parse_mount_bind_options(const char *options, unsigned long *mount_fl
 }
 
 static int mount_bind(const char *dest, CustomMount *m) {
-        struct stat source_st, dest_st;
-        const char *where;
+
+        _cleanup_free_ char *mount_opts = NULL, *where = NULL;
         unsigned long mount_flags = MS_BIND | MS_REC;
-        _cleanup_free_ char *mount_opts = NULL;
+        struct stat source_st, dest_st;
         int r;
 
+        assert(dest);
         assert(m);
 
         if (m->options) {
@@ -441,9 +688,14 @@ static int mount_bind(const char *dest, CustomMount *m) {
         if (stat(m->source, &source_st) < 0)
                 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
 
-        where = prefix_roota(dest, m->destination);
+        r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
+        if (r < 0)
+                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+        if (r > 0) { /* Path exists already? */
+
+                if (stat(where, &dest_st) < 0)
+                        return log_error_errno(errno, "Failed to stat %s: %m", where);
 
-        if (stat(where, &dest_st) >= 0) {
                 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
                         log_error("Cannot bind mount directory %s on file %s.", m->source, where);
                         return -EINVAL;
@@ -454,7 +706,7 @@ static int mount_bind(const char *dest, CustomMount *m) {
                         return -EINVAL;
                 }
 
-        } else if (errno == ENOENT) {
+        } else { /* Path doesn't exist yet? */
                 r = mkdir_parents_label(where, 0755);
                 if (r < 0)
                         return log_error_errno(r, "Failed to make parents of %s: %m", where);
@@ -470,12 +722,11 @@ static int mount_bind(const char *dest, CustomMount *m) {
                 if (r < 0)
                         return log_error_errno(r, "Failed to create mount point %s: %m", where);
 
-        } else {
-                return log_error_errno(errno, "Failed to stat %s: %m", where);
         }
 
-        if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
-                return log_error_errno(errno, "mount(%s) failed: %m", where);
+        r = mount_verbose(LOG_ERR, m->source, where, NULL, mount_flags, mount_opts);
+        if (r < 0)
+                return r;
 
         if (m->read_only) {
                 r = bind_remount_recursive(where, true, NULL);
@@ -492,31 +743,31 @@ static int mount_tmpfs(
                 bool userns, uid_t uid_shift, uid_t uid_range,
                 const char *selinux_apifs_context) {
 
-        const char *where, *options;
-        _cleanup_free_ char *buf = NULL;
+        const char *options;
+        _cleanup_free_ char *buf = NULL, *where = NULL;
         int r;
 
         assert(dest);
         assert(m);
 
-        where = prefix_roota(dest, m->destination);
-
-        r = mkdir_p_label(where, 0755);
-        if (r < 0 && r != -EEXIST)
-                return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
+        r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
+        if (r < 0)
+                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+        if (r == 0) { /* Doesn't exist yet? */
+                r = mkdir_p_label(where, 0755);
+                if (r < 0)
+                        return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
+        }
 
         r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, false, selinux_apifs_context, &buf);
         if (r < 0)
                 return log_oom();
         options = r > 0 ? buf : m->options;
 
-        if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
-                return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
-
-        return 0;
+        return mount_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
 }
 
-static char *joined_and_escaped_lower_dirs(char * const *lower) {
+static char *joined_and_escaped_lower_dirs(char **lower) {
         _cleanup_strv_free_ char **sv = NULL;
 
         sv = strv_copy(lower);
@@ -532,18 +783,22 @@ static char *joined_and_escaped_lower_dirs(char * const *lower) {
 }
 
 static int mount_overlay(const char *dest, CustomMount *m) {
-        _cleanup_free_ char *lower = NULL;
-        const char *where, *options;
+
+        _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
+        const char *options;
         int r;
 
         assert(dest);
         assert(m);
 
-        where = prefix_roota(dest, m->destination);
-
-        r = mkdir_label(where, 0755);
-        if (r < 0 && r != -EEXIST)
-                return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
+        r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
+        if (r < 0)
+                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+        if (r == 0) { /* Doesn't exist yet? */
+                r = mkdir_label(where, 0755);
+                if (r < 0)
+                        return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
+        }
 
         (void) mkdir_p_label(m->source, 0755);
 
@@ -551,23 +806,15 @@ static int mount_overlay(const char *dest, CustomMount *m) {
         if (!lower)
                 return log_oom();
 
-        if (m->read_only) {
-                _cleanup_free_ char *escaped_source = NULL;
-
-                escaped_source = shell_escape(m->source, ",:");
-                if (!escaped_source)
-                        return log_oom();
+        escaped_source = shell_escape(m->source, ",:");
+        if (!escaped_source)
+                return log_oom();
 
+        if (m->read_only)
                 options = strjoina("lowerdir=", escaped_source, ":", lower);
-        else {
-                _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
+        else {
+                _cleanup_free_ char *escaped_work_dir = NULL;
 
-                assert(m->work_dir);
-                (void) mkdir_label(m->work_dir, 0700);
-
-                escaped_source = shell_escape(m->source, ",:");
-                if (!escaped_source)
-                        return log_oom();
                 escaped_work_dir = shell_escape(m->work_dir, ",:");
                 if (!escaped_work_dir)
                         return log_oom();
@@ -575,10 +822,7 @@ static int mount_overlay(const char *dest, CustomMount *m) {
                 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
         }
 
-        if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
-                return log_error_errno(errno, "overlay mount to %s failed: %m", where);
-
-        return 0;
+        return mount_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
 }
 
 int mount_custom(
@@ -637,8 +881,6 @@ static int get_controllers(Set *subsystems) {
                 int r;
                 char *e, *l, *p;
 
-                truncate_nl(line);
-
                 l = strchr(line, ':');
                 if (!l)
                         continue;
@@ -650,10 +892,13 @@ static int get_controllers(Set *subsystems) {
 
                 *e = 0;
 
-                if (streq(l, "") || streq(l, "name=systemd"))
+                if (STR_IN_SET(l, "", "name=systemd", "name=unified"))
                         continue;
 
                 p = strdup(l);
+                if (!p)
+                        return -ENOMEM;
+
                 r = set_consume(subsystems, p);
                 if (r < 0)
                         return r;
@@ -662,14 +907,18 @@ static int get_controllers(Set *subsystems) {
         return 0;
 }
 
-static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy,
-                                         CGroupUnified unified_requested, bool read_only) {
-        char *to;
+static int mount_legacy_cgroup_hierarchy(
+                const char *dest,
+                const char *controller,
+                const char *hierarchy,
+                bool read_only) {
+
+        const char *to, *fstype, *opts;
         int r;
 
         to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
 
-        r = path_is_mount_point(to, 0);
+        r = path_is_mount_point(to, dest, 0);
         if (r < 0 && r != -ENOENT)
                 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
         if (r > 0)
@@ -679,30 +928,41 @@ static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controlle
 
         /* The superblock mount options of the mount point need to be
          * identical to the hosts', and hence writable... */
-        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
-                if (unified_requested >= CGROUP_UNIFIED_SYSTEMD)
-                        r = mount("cgroup", to, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
-                else
-                        r = mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
-        } else
-                r = mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller);
+        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) {
+                fstype = "cgroup2";
+                opts = NULL;
+        } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) {
+                fstype = "cgroup";
+                opts = "none,name=systemd,xattr";
+        } else {
+                fstype = "cgroup";
+                opts = controller;
+        }
 
+        r = mount_verbose(LOG_ERR, "cgroup", to, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
         if (r < 0)
-                return log_error_errno(errno, "Failed to mount to %s: %m", to);
+                return r;
 
-        /* ... hence let's only make the bind mount read-only, not the
-         * superblock. */
+        /* ... hence let's only make the bind mount read-only, not the superblock. */
         if (read_only) {
-                if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
-                        return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
+                r = mount_verbose(LOG_ERR, NULL, to, NULL,
+                                  MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+                if (r < 0)
+                        return r;
         }
+
         return 1;
 }
 
 /* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */
 static int mount_legacy_cgns_supported(
-                CGroupUnified unified_requested, bool userns, uid_t uid_shift,
-                uid_t uid_range, const char *selinux_apifs_context) {
+                const char *dest,
+                CGroupUnified unified_requested,
+                bool userns,
+                uid_t uid_shift,
+                uid_t uid_range,
+                const char *selinux_apifs_context) {
+
         _cleanup_set_free_free_ Set *controllers = NULL;
         const char *cgroup_root = "/sys/fs/cgroup", *c;
         int r;
@@ -710,7 +970,7 @@ static int mount_legacy_cgns_supported(
         (void) mkdir_p(cgroup_root, 0755);
 
         /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
-        r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
+        r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW);
         if (r < 0)
                 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
         if (r == 0) {
@@ -727,11 +987,16 @@ static int mount_legacy_cgns_supported(
                 if (r < 0)
                         return log_oom();
 
-                if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
-                        return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
+                r = mount_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs",
+                                  MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options);
+                if (r < 0)
+                        return r;
         }
 
-        if (cg_all_unified() > 0)
+        r = cg_all_unified();
+        if (r < 0)
+                return r;
+        if (r > 0)
                 goto skip_controllers;
 
         controllers = set_new(&string_hash_ops);
@@ -749,7 +1014,7 @@ static int mount_legacy_cgns_supported(
                 if (!controller)
                         break;
 
-                r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns);
+                r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns);
                 if (r < 0)
                         return r;
 
@@ -783,14 +1048,19 @@ static int mount_legacy_cgns_supported(
         }
 
 skip_controllers:
-        r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
+        if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+                r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+                if (r < 0)
+                        return r;
+        }
+
+        r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
         if (r < 0)
                 return r;
 
-        if (!userns) {
-                if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
-                        return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
-        }
+        if (!userns)
+                return mount_verbose(LOG_ERR, NULL, cgroup_root, NULL,
+                                     MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
 
         return 0;
 }
@@ -798,8 +1068,12 @@ skip_controllers:
 /* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */
 static int mount_legacy_cgns_unsupported(
                 const char *dest,
-                CGroupUnified unified_requested, bool userns, uid_t uid_shift, uid_t uid_range,
+                CGroupUnified unified_requested,
+                bool userns,
+                uid_t uid_shift,
+                uid_t uid_range,
                 const char *selinux_apifs_context) {
+
         _cleanup_set_free_free_ Set *controllers = NULL;
         const char *cgroup_root;
         int r;
@@ -809,7 +1083,7 @@ static int mount_legacy_cgns_unsupported(
         (void) mkdir_p(cgroup_root, 0755);
 
         /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
-        r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
+        r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW);
         if (r < 0)
                 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
         if (r == 0) {
@@ -819,11 +1093,16 @@ static int mount_legacy_cgns_unsupported(
                 if (r < 0)
                         return log_oom();
 
-                if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
-                        return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
+                r = mount_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs",
+                                  MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options);
+                if (r < 0)
+                        return r;
         }
 
-        if (cg_all_unified() > 0)
+        r = cg_all_unified();
+        if (r < 0)
+                return r;
+        if (r > 0)
                 goto skip_controllers;
 
         controllers = set_new(&string_hash_ops);
@@ -849,7 +1128,7 @@ static int mount_legacy_cgns_unsupported(
                 if (r == -EINVAL) {
                         /* Not a symbolic link, but directly a single cgroup hierarchy */
 
-                        r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true);
+                        r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
                         if (r < 0)
                                 return r;
 
@@ -869,7 +1148,7 @@ static int mount_legacy_cgns_unsupported(
                                 continue;
                         }
 
-                        r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true);
+                        r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
                         if (r < 0)
                                 return r;
 
@@ -882,14 +1161,18 @@ static int mount_legacy_cgns_unsupported(
         }
 
 skip_controllers:
-        r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
+        if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+                r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+                if (r < 0)
+                        return r;
+        }
+
+        r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
         if (r < 0)
                 return r;
 
-        if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
-                return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
-
-        return 0;
+        return mount_verbose(LOG_ERR, NULL, cgroup_root, NULL,
+                             MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
 }
 
 static int mount_unified_cgroups(const char *dest) {
@@ -902,7 +1185,7 @@ static int mount_unified_cgroups(const char *dest) {
 
         (void) mkdir_p(p, 0755);
 
-        r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
+        r = path_is_mount_point(p, dest, AT_SYMLINK_FOLLOW);
         if (r < 0)
                 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
         if (r > 0) {
@@ -916,33 +1199,45 @@ static int mount_unified_cgroups(const char *dest) {
                 return -EINVAL;
         }
 
-        if (mount("cgroup", p, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
-                return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
-
-        return 0;
+        return mount_verbose(LOG_ERR, "cgroup", p, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
 }
 
 int mount_cgroups(
                 const char *dest,
                 CGroupUnified unified_requested,
-                bool userns, uid_t uid_shift, uid_t uid_range,
+                bool userns,
+                uid_t uid_shift,
+                uid_t uid_range,
                 const char *selinux_apifs_context,
                 bool use_cgns) {
 
         if (unified_requested >= CGROUP_UNIFIED_ALL)
                 return mount_unified_cgroups(dest);
-        else if (use_cgns && cg_ns_supported())
-                return mount_legacy_cgns_supported(unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
+        else if (use_cgns)
+                return mount_legacy_cgns_supported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
 
         return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
 }
 
+static int mount_systemd_cgroup_writable_one(const char *systemd_own, const char *systemd_root)
+{
+        int r;
+
+        /* Make our own cgroup a (writable) bind mount */
+        r = mount_verbose(LOG_ERR, systemd_own, systemd_own,  NULL, MS_BIND, NULL);
+        if (r < 0)
+                return r;
+
+        /* And then remount the systemd cgroup root read-only */
+        return mount_verbose(LOG_ERR, NULL, systemd_root, NULL,
+                             MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+}
+
 int mount_systemd_cgroup_writable(
                 const char *dest,
                 CGroupUnified unified_requested) {
 
         _cleanup_free_ char *own_cgroup_path = NULL;
-        const char *systemd_root, *systemd_own;
         int r;
 
         assert(dest);
@@ -955,23 +1250,19 @@ int mount_systemd_cgroup_writable(
         if (path_equal(own_cgroup_path, "/"))
                 return 0;
 
-        if (unified_requested >= CGROUP_UNIFIED_ALL) {
-                systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
-                systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
-        } else {
-                systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
-                systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
-        }
-
-        /* Make our own cgroup a (writable) bind mount */
-        if (mount(systemd_own, systemd_own,  NULL, MS_BIND, NULL) < 0)
-                return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
+        if (unified_requested >= CGROUP_UNIFIED_ALL)
+                return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup", own_cgroup_path),
+                                                         prefix_roota(dest, "/sys/fs/cgroup"));
 
-        /* And then remount the systemd cgroup root read-only */
-        if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
-                return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
+        if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+                r = mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/unified", own_cgroup_path),
+                                                      prefix_roota(dest, "/sys/fs/cgroup/unified"));
+                if (r < 0)
+                        return r;
+        }
 
-        return 0;
+        return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path),
+                                                 prefix_roota(dest, "/sys/fs/cgroup/systemd"));
 }
 
 int setup_volatile_state(
@@ -1008,10 +1299,7 @@ int setup_volatile_state(
         if (r > 0)
                 options = buf;
 
-        if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
-                return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
-
-        return 0;
+        return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
 }
 
 int setup_volatile(
@@ -1044,10 +1332,9 @@ int setup_volatile(
         if (r > 0)
                 options = buf;
 
-        if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
-                r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
+        r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
+        if (r < 0)
                 goto fail;
-        }
 
         tmpfs_mounted = true;
 
@@ -1060,10 +1347,9 @@ int setup_volatile(
                 goto fail;
         }
 
-        if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
-                r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
+        r = mount_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
+        if (r < 0)
                 goto fail;
-        }
 
         bind_mounted = true;
 
@@ -1073,10 +1359,9 @@ int setup_volatile(
                 goto fail;
         }
 
-        if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
-                r = log_error_errno(errno, "Failed to move root mount: %m");
+        r = mount_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
+        if (r < 0)
                 goto fail;
-        }
 
         (void) rmdir(template);
 
@@ -1084,28 +1369,123 @@ int setup_volatile(
 
 fail:
         if (bind_mounted)
-                (void) umount(t);
+                (void) umount_verbose(t);
 
         if (tmpfs_mounted)
-                (void) umount(template);
+                (void) umount_verbose(template);
         (void) rmdir(template);
         return r;
 }
 
-VolatileMode volatile_mode_from_string(const char *s) {
-        int b;
+/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
+int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
+        _cleanup_free_ char *root_new = NULL, *root_old = NULL;
+        const char *p = s;
+        int r;
 
-        if (isempty(s))
-                return _VOLATILE_MODE_INVALID;
+        assert(pivot_root_new);
+        assert(pivot_root_old);
 
-        b = parse_boolean(s);
-        if (b > 0)
-                return VOLATILE_YES;
-        if (b == 0)
-                return VOLATILE_NO;
+        r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EINVAL;
 
-        if (streq(s, "state"))
-                return VOLATILE_STATE;
+        if (isempty(p))
+                root_old = NULL;
+        else {
+                root_old = strdup(p);
+                if (!root_old)
+                        return -ENOMEM;
+        }
+
+        if (!path_is_absolute(root_new))
+                return -EINVAL;
+        if (root_old && !path_is_absolute(root_old))
+                return -EINVAL;
+
+        free_and_replace(*pivot_root_new, root_new);
+        free_and_replace(*pivot_root_old, root_old);
+
+        return 0;
+}
+
+int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
+        _cleanup_free_ char *directory_pivot_root_new = NULL;
+        _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
+        char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
+        bool remove_pivot_tmp = false;
+        int r;
+
+        assert(directory);
+
+        if (!pivot_root_new)
+                return 0;
+
+        /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
+         * If pivot_root_old is NULL, the existing / disappears.
+         * This requires a temporary directory, pivot_tmp, which is
+         * not a child of either.
+         *
+         * This is typically used for OSTree-style containers, where
+         * the root partition contains several sysroots which could be
+         * run. Normally, one would be chosen by the bootloader and
+         * pivoted to / by initramfs.
+         *
+         * For example, for an OSTree deployment, pivot_root_new
+         * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
+         * code doesn’t do the /var mount which OSTree expects: use
+         * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
+         *
+         * So in the OSTree case, we’ll end up with something like:
+         *  - directory = /tmp/nspawn-root-123456
+         *  - pivot_root_new = /ostree/deploy/os/deploy/123abc
+         *  - pivot_root_old = /sysroot
+         *  - directory_pivot_root_new =
+         *       /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
+         *  - pivot_tmp = /tmp/nspawn-pivot-123456
+         *  - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
+         *
+         * Requires all file systems at directory and below to be mounted
+         * MS_PRIVATE or MS_SLAVE so they can be moved.
+         */
+        directory_pivot_root_new = prefix_root(directory, pivot_root_new);
+
+        /* Remount directory_pivot_root_new to make it movable. */
+        r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
+        if (r < 0)
+                goto done;
+
+        if (pivot_root_old) {
+                if (!mkdtemp(pivot_tmp)) {
+                        r = log_error_errno(errno, "Failed to create temporary directory: %m");
+                        goto done;
+                }
 
-        return _VOLATILE_MODE_INVALID;
+                remove_pivot_tmp = true;
+                pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old);
+
+                r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+
+                r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+
+                r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+        } else {
+                r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+        }
+
+done:
+        if (remove_pivot_tmp)
+                (void) rmdir(pivot_tmp);
+
+        return r;
 }