]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
sysext: new tool for managing "system extensions" for /usr/ + /opt/
authorLennart Poettering <lennart@poettering.net>
Fri, 8 Jan 2021 15:57:27 +0000 (16:57 +0100)
committerLennart Poettering <lennart@poettering.net>
Tue, 19 Jan 2021 12:41:42 +0000 (13:41 +0100)
meson.build
meson_options.txt
src/shared/machine-image.c
src/shared/machine-image.h
src/sysext/meson.build [new file with mode: 0644]
src/sysext/sysext.c [new file with mode: 0644]

index c12b399b5f29af70723faab6c03743f10a36b01f..ed01c369b1546c34e125f2621c9156df4ebc2287 100644 (file)
@@ -1502,6 +1502,7 @@ foreach term : ['analyze',
                 'nss-myhostname',
                 'nss-systemd',
                 'portabled',
+                'sysext',
                 'pstore',
                 'quotacheck',
                 'randomseed',
@@ -1745,6 +1746,7 @@ subdir('src/portable')
 subdir('src/pstore')
 subdir('src/resolve')
 subdir('src/shutdown')
+subdir('src/sysext')
 subdir('src/systemctl')
 subdir('src/timedate')
 subdir('src/timesync')
@@ -2202,6 +2204,17 @@ if conf.get('ENABLE_PORTABLED') == 1
                 install_dir : rootbindir)
 endif
 
+if conf.get('ENABLE_SYSEXT') == 1
+        public_programs += executable(
+                'systemd-sysext',
+                systemd_sysext_sources,
+                include_directories : includes,
+                link_with : [libshared],
+                install_rpath : rootlibexecdir,
+                install : true,
+                install_dir : rootbindir)
+endif
+
 if conf.get('ENABLE_USERDB') == 1
         executable(
                 'systemd-userwork',
@@ -3735,6 +3748,7 @@ foreach tuple : [
         ['logind'],
         ['machined'],
         ['portabled'],
+        ['sysext'],
         ['userdb'],
         ['homed'],
         ['importd'],
index 1707f64c177e1eab8616240d7b90ef185ac441bd..a42147302997a7943bc7f0f77e7701e3c728451e 100644 (file)
@@ -111,6 +111,8 @@ option('machined', type : 'boolean',
        description : 'install the systemd-machined stack')
 option('portabled', type : 'boolean',
        description : 'install the systemd-portabled stack')
+option('sysext', type : 'boolean',
+       description : 'install the systemd-sysext stack')
 option('userdb', type : 'boolean',
        description : 'install the systemd-userdbd stack')
 option('homed', type : 'combo', choices : ['auto', 'true', 'false'],
index df288bc0e18862c317f9dd899347a7a27a9d2b1b..f7b1f90c6f11477b49f162d821ad499048b87b42 100644 (file)
 #include "xattr-util.h"
 
 static const char* const image_search_path[_IMAGE_CLASS_MAX] = {
-        [IMAGE_MACHINE] =  "/etc/machines\0"              /* only place symlinks here */
-                           "/run/machines\0"              /* and here too */
-                           "/var/lib/machines\0"          /* the main place for images */
-                           "/var/lib/container\0"         /* legacy */
-                           "/usr/local/lib/machines\0"
-                           "/usr/lib/machines\0",
-
-        [IMAGE_PORTABLE] = "/etc/portables\0"             /* only place symlinks here */
-                           "/run/portables\0"             /* and here too */
-                           "/var/lib/portables\0"         /* the main place for images */
-                           "/usr/local/lib/portables\0"
-                           "/usr/lib/portables\0",
+        [IMAGE_MACHINE] =   "/etc/machines\0"              /* only place symlinks here */
+                            "/run/machines\0"              /* and here too */
+                            "/var/lib/machines\0"          /* the main place for images */
+                            "/var/lib/container\0"         /* legacy */
+                            "/usr/local/lib/machines\0"
+                            "/usr/lib/machines\0",
+
+        [IMAGE_PORTABLE] =  "/etc/portables\0"             /* only place symlinks here */
+                            "/run/portables\0"             /* and here too */
+                            "/var/lib/portables\0"         /* the main place for images */
+                            "/usr/local/lib/portables\0"
+                            "/usr/lib/portables\0",
+
+        [IMAGE_EXTENSION] = "/etc/extensions\0"             /* only place symlinks here */
+                            "/run/extensions\0"             /* and here too */
+                            "/var/lib/extensions\0"         /* the main place for images */
+                            "/usr/local/lib/extensions\0"
+                            "/usr/lib/extensions\0",
 };
 
 static Image *image_free(Image *i) {
index 95a8f5cfbd4818739f0371fc836cf60dcf1b7029..eea94e0324bb3d3ea4fd39502b53b043926ee738 100644 (file)
@@ -16,6 +16,7 @@
 typedef enum ImageClass {
         IMAGE_MACHINE,
         IMAGE_PORTABLE,
+        IMAGE_EXTENSION,
         _IMAGE_CLASS_MAX,
         _IMAGE_CLASS_INVALID = -1
 } ImageClass;
diff --git a/src/sysext/meson.build b/src/sysext/meson.build
new file mode 100644 (file)
index 0000000..1517df4
--- /dev/null
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_sysext_sources = files('''
+        sysext.c
+'''.split())
diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c
new file mode 100644 (file)
index 0000000..4f92d56
--- /dev/null
@@ -0,0 +1,1007 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "capability-util.h"
+#include "dissect-image.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "missing_magic.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "os-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "terminal-util.h"
+#include "user-util.h"
+
+static enum {
+        ACTION_STATUS,
+        ACTION_MERGE,
+        ACTION_UNMERGE,
+        ACTION_REFRESH,
+        ACTION_LIST,
+} arg_action = ACTION_STATUS;
+static char **arg_hierarchies = NULL; /* "/usr" + "/opt" by default */
+static char *arg_root = NULL;
+static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF;
+static PagerFlags arg_pager_flags = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_hierarchies, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+
+static int is_our_mount_point(const char *p) {
+        _cleanup_free_ char *buf = NULL, *f = NULL;
+        struct stat st;
+        dev_t dev;
+        int r;
+
+        r = path_is_mount_point(p, NULL, 0);
+        if (r == -ENOENT) {
+                log_debug_errno(r, "Hierarchy '%s' doesn't exist.", p);
+                return false;
+        }
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine whether '%s' is a mount point: %m", p);
+        if (r == 0) {
+                log_debug("Hierarchy '%s' is not a mount point, skipping.", p);
+                return false;
+        }
+
+        /* So we know now that it's a mount point. Now let's check if it's one of ours, so that we don't
+         * accidentally unmount the user's own /usr/ but just the mounts we established ourselves. We do this
+         * check by looking into the metadata directory we place in merged mounts: if the file
+         * .systemd-sysext/dev contains the major/minor device pair of the mount we have a good reason to
+         * believe this is one of our mounts. This thorough check has the benefit that we aren't easily
+         * confused if people tar up one of our merged trees and untar them elsewhere where we might mistake
+         * them for a live sysext tree. */
+
+        f = path_join(p, ".systemd-sysext/dev");
+        if (!f)
+                return log_oom();
+
+        r = read_one_line_file(f, &buf);
+        if (r == -ENOENT) {
+                log_debug("Hierarchy '%s' does not carry a .systemd-sysext/dev file, not a sysext merged tree.", p);
+                return false;
+        }
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine whether hierarchy '%s' contains '.systemd-sysext/dev': %m", p);
+
+        r = parse_dev(buf, &dev);
+        if (r < 0)
+                return log_error_errno(r, "Failed to parse device major/minor stored in '.systemd-sysext/dev' file on '%s': %m", p);
+
+        if (lstat(p, &st) < 0)
+                return log_error_errno(r, "Failed to stat %s: %m", p);
+
+        if (st.st_dev != dev) {
+                log_debug("Hierarchy '%s' reports a different device major/minor than what we are seeing, assuming offline copy.", p);
+                return false;
+        }
+
+        return true;
+}
+
+static int unmerge_hierarchy(const char *p) {
+        int r;
+
+        for (;;) {
+                /* We only unmount /usr/ if it is a mount point and really one of ours, in order not to break
+                 * systems where /usr/ is a mount point of its own already. */
+
+                r = is_our_mount_point(p);
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        break;
+
+                r = umount_verbose(LOG_ERR, p, MNT_DETACH|UMOUNT_NOFOLLOW);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to unmount file system '%s': %m", p);
+
+                log_info("Unmerged '%s'.", p);
+        }
+
+        return 0;
+}
+
+static int unmerge(void) {
+        int r, ret = 0;
+        char **p;
+
+        STRV_FOREACH(p, arg_hierarchies) {
+                _cleanup_free_ char *resolved = NULL;
+
+                r = chase_symlinks(*p, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL);
+                if (r == -ENOENT) {
+                        log_debug_errno(r, "Hierarchy '%s%s' does not exist, ignoring.", strempty(arg_root), *p);
+                        continue;
+                }
+                if (r < 0) {
+                        log_error_errno(r, "Failed to resolve path to hierarchy '%s%s': %m", strempty(arg_root), *p);
+                        if (ret == 0)
+                                ret = r;
+
+                        continue;
+                }
+
+                r = unmerge_hierarchy(resolved);
+                if (r < 0 && ret == 0)
+                        ret = r;
+        }
+
+        return ret;
+}
+
+static int status(void) {
+        _cleanup_(table_unrefp) Table *t = NULL;
+        int r, ret = 0;
+        char **p;
+
+        t = table_new("hierarchy", "extensions", "since");
+        if (!t)
+                return log_oom();
+
+        (void) table_set_empty_string(t, "-");
+
+        STRV_FOREACH(p, arg_hierarchies) {
+                _cleanup_free_ char *resolved = NULL, *f = NULL, *buf = NULL;
+                _cleanup_strv_free_ char **l = NULL;
+                struct stat st;
+
+                r = chase_symlinks(*p, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL);
+                if (r == -ENOENT) {
+                        log_debug_errno(r, "Hierarchy '%s%s' does not exist, ignoring.", strempty(arg_root), *p);
+                        continue;
+                }
+                if (r < 0) {
+                        log_error_errno(r, "Failed to resolve path to hierarchy '%s%s': %m", strempty(arg_root), *p);
+                        goto inner_fail;
+                }
+
+                r = is_our_mount_point(resolved);
+                if (r < 0)
+                        goto inner_fail;
+                if (r == 0) {
+                        r = table_add_many(
+                                        t,
+                                        TABLE_PATH, *p,
+                                        TABLE_STRING, "none",
+                                        TABLE_SET_COLOR, ansi_grey(),
+                                        TABLE_EMPTY);
+                        if (r < 0)
+                                return table_log_add_error(r);
+
+                        continue;
+                }
+
+                f = path_join(*p, ".systemd-sysext/extensions");
+                if (!f)
+                        return log_oom();
+
+                r = read_full_file(f, &buf, NULL);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to open '%s': %m", f);
+
+                l = strv_split_newlines(buf);
+                if (!l)
+                        return log_oom();
+
+                if (stat(*p, &st) < 0)
+                        return log_error_errno(r, "Failed to stat() '%s': %m", *p);
+
+                r = table_add_many(
+                                t,
+                                TABLE_PATH, *p,
+                                TABLE_STRV, l,
+                                TABLE_TIMESTAMP, timespec_load(&st.st_mtim));
+                if (r < 0)
+                        return table_log_add_error(r);
+
+                continue;
+
+        inner_fail:
+                if (ret == 0)
+                        ret = r;
+        }
+
+        (void) table_set_sort(t, (size_t) 0, (size_t) -1);
+
+        if (arg_json_format_flags & (JSON_FORMAT_OFF|JSON_FORMAT_PRETTY|JSON_FORMAT_PRETTY_AUTO))
+                (void) pager_open(arg_pager_flags);
+
+        r = table_print_json(t, stdout, arg_json_format_flags);
+        if (r < 0)
+                return table_log_add_error(r);
+
+        return ret;
+}
+
+static int mount_overlayfs(
+                const char *where,
+                char **layers) {
+
+        _cleanup_free_ char *options = NULL;
+        bool separator = false;
+        char **l;
+        int r;
+
+        assert(where);
+
+        options = strdup("lowerdir=");
+        if (!options)
+                return log_oom();
+
+        STRV_FOREACH(l, layers) {
+                _cleanup_free_ char *escaped = NULL;
+
+                escaped = shell_escape(*l, ",:");
+                if (!escaped)
+                        return log_oom();
+
+                if (!strextend(&options, separator ? ":" : "", escaped))
+                        return log_oom();
+
+                separator = true;
+        }
+
+        /* Now mount the actual overlayfs */
+        r = mount_nofollow_verbose(LOG_ERR, "sysext", where, "overlay", MS_RDONLY, options);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
+static int merge_hierarchy(
+                const char *hierarchy,
+                char **extensions,
+                char **paths,
+                const char *meta_path,
+                const char *overlay_path) {
+
+        _cleanup_free_ char *resolved_hierarchy = NULL, *f = NULL, *buf = NULL;
+        _cleanup_strv_free_ char **layers = NULL;
+        struct stat st;
+        char **p;
+        int r;
+
+        assert(hierarchy);
+        assert(meta_path);
+        assert(overlay_path);
+
+        /* Resolve the path of the host's version of the hierarchy, i.e. what we want to use as lowest layer
+         * in the overlayfs stack. */
+        r = chase_symlinks(hierarchy, arg_root, CHASE_PREFIX_ROOT, &resolved_hierarchy, NULL);
+        if (r == -ENOENT)
+                log_debug_errno(r, "Hierarchy '%s' on host doesn't exist, not merging.", hierarchy);
+        else if (r < 0)
+                return log_error_errno(r, "Failed to resolve host hierarchy '%s': %m", hierarchy);
+        else {
+                r = dir_is_empty(resolved_hierarchy);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to check if host hierarchy '%s' is empty: %m", resolved_hierarchy);
+                if (r > 0) {
+                        log_debug("Host hierarchy '%s' is empty, not merging.", resolved_hierarchy);
+                        resolved_hierarchy = mfree(resolved_hierarchy);
+                }
+        }
+
+        /* Let's generate a metadata file that lists all extensions we took into account for this
+         * hierarchy. We include this in the final fs, to make things nicely discoverable and
+         * recognizable. */
+        f = path_join(meta_path, ".systemd-sysext/extensions");
+        if (!f)
+                return log_oom();
+
+        buf = strv_join(extensions, "\n");
+        if (!buf)
+                return log_oom();
+
+        r = write_string_file(f, buf, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MKDIR_0755);
+        if (r < 0)
+                return log_error_errno(r, "Failed to write extension meta file '%s': %m", f);
+
+        /* Put the meta path (i.e. our synthesized stuff) at the top of the layer stack */
+        layers = strv_new(meta_path);
+        if (!layers)
+                return log_oom();
+
+        /* Put the extensions in the middle */
+        STRV_FOREACH(p, paths) {
+                _cleanup_free_ char *resolved = NULL;
+
+                r = chase_symlinks(hierarchy, *p, CHASE_PREFIX_ROOT, &resolved, NULL);
+                if (r == -ENOENT) {
+                        log_debug_errno(r, "Hierarchy '%s' in extension '%s' doesn't exist, not merging.", hierarchy, *p);
+                        continue;
+                }
+                if (r < 0)
+                        return log_error_errno(r, "Failed to resolve hierarchy '%s' in extension '%s': %m", hierarchy, *p);
+
+                r = dir_is_empty(resolved);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to check if hierarchy '%s' in extension '%s' is empty: %m", resolved, *p);
+                if (r > 0) {
+                        log_debug("Hierarchy '%s' in extension '%s' is empty, not merging.", hierarchy, *p);
+                        continue;
+                }
+
+                r = strv_consume(&layers, TAKE_PTR(resolved));
+                if (r < 0)
+                        return log_oom();
+        }
+
+        if (!layers[1]) /* No extension with files in this hierarchy? Then don't do anything. */
+                return 0;
+
+        if (resolved_hierarchy) {
+                /* Add the host hierarchy as last (lowest) layer in the stack */
+                r = strv_consume(&layers, TAKE_PTR(resolved_hierarchy));
+                if (r < 0)
+                        return log_oom();
+        }
+
+        r = mkdir_p(overlay_path, 0700);
+        if (r < 0)
+                return log_error_errno(r, "Failed to make directory '%s': %m", overlay_path);
+
+        r = mount_overlayfs(overlay_path, layers);
+        if (r < 0)
+                return r;
+
+        /* The overlayfs superblock is read-only. Let's also mark the bind mount read-only. Extra turbo safety ðŸ˜Ž */
+        r = bind_remount_recursive(overlay_path, MS_RDONLY, MS_RDONLY, NULL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", overlay_path);
+
+        /* Now we have mounted the new file system. Let's now figure out its .st_dev field, and make that
+         * available in the metadata directory. This is useful to detect whether the metadata dir actually
+         * belongs to the fs it is found on: if .st_dev of the top-level mount matches it, it's pretty likely
+         * we are looking at a live sysext tree, and not an unpacked tar or so of one. */
+        if (stat(overlay_path, &st) < 0)
+                return log_error_errno(r, "Failed to stat mount '%s': %m", overlay_path);
+
+        free(f);
+        f = path_join(meta_path, ".systemd-sysext/dev");
+        if (!f)
+                return log_oom();
+
+        r = write_string_filef(f, WRITE_STRING_FILE_CREATE, "%u:%u", major(st.st_dev), minor(st.st_dev));
+        if (r < 0)
+                return log_error_errno(r, "Failed to write '%s': %m", f);
+
+        /* Make sure the top-level dir has an mtime marking the point we established the merge */
+        if (utimensat(AT_FDCWD, meta_path, NULL, AT_SYMLINK_NOFOLLOW) < 0)
+                return log_error_errno(r, "Failed fix mtime of '%s': %m", meta_path);
+
+        return 1;
+}
+
+static int strverscmpp(char *const* a, char *const* b) {
+        /* usable in qsort() for sorting a string array with strverscmp() */
+        return strverscmp(*a, *b);
+}
+
+static int merge_subprocess(Hashmap *images, const char *workspace) {
+        _cleanup_free_ char *host_os_release_id = NULL, *host_os_release_version_id = NULL, *host_os_release_sysext_level = NULL,
+                *buf = NULL;
+        _cleanup_strv_free_ char **extensions = NULL, **paths = NULL;
+        size_t n_extensions = 0;
+        unsigned n_ignored = 0;
+        Image *img;
+        char **h;
+        int r;
+
+        /* Mark the whole of /run as MS_SLAVE, so that we can mount stuff below it that doesn't show up on
+         * the host otherwise. */
+        r = mount_nofollow_verbose(LOG_ERR, NULL, "/run", NULL, MS_SLAVE|MS_REC, NULL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to remount /run/ MS_SLAVE: %m");
+
+        /* Let's create the workspace if it's missing */
+        r = mkdir_p(workspace, 0700);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create /run/systemd/sysext: %m");
+
+        /* Let's mount a tmpfs to our workspace. This way we don't need to clean up the inodes we mount over,
+         * but let the kernel do that entirely automatically, once our namespace dies. Note that this file
+         * system won't be visible to anyone but us, since we opened our own namespace and then made the
+         * /run/ hierarchy (which our workspace is contained in) MS_SLAVE, see above. */
+        r = mount_nofollow_verbose(LOG_ERR, "sysexit", workspace, "tmpfs", 0, "mode=0700");
+        if (r < 0)
+                return r;
+
+        /* Acquire host OS release info, so that we can compare it with the extension's data */
+        r = parse_os_release(
+                        arg_root,
+                        "ID", &host_os_release_id,
+                        "VERSION_ID", &host_os_release_version_id,
+                        "SYSEXT_LEVEL", &host_os_release_sysext_level,
+                        NULL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(arg_root));
+
+        /* Let's now mount all images */
+        HASHMAP_FOREACH(img, images) {
+                _cleanup_free_ char *p = NULL,
+                        *extension_os_release_id = NULL, *extension_os_release_version_id = NULL, *extension_os_release_sysext_level = NULL;
+
+                p = path_join(workspace, "extensions", img->name);
+                if (!p)
+                        return log_oom();
+
+                r = mkdir_p(p, 0700);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to create %s: %m", p);
+
+                switch (img->type) {
+                case IMAGE_DIRECTORY:
+                case IMAGE_SUBVOLUME:
+                        r = mount_nofollow_verbose(LOG_ERR, img->path, p, NULL, MS_BIND, NULL);
+                        if (r < 0)
+                                return r;
+
+                        /* Make this a read-only bind mount */
+                        r = bind_remount_recursive(p, MS_RDONLY, MS_RDONLY, NULL);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", p);
+
+                        break;
+
+                case IMAGE_RAW:
+                case IMAGE_BLOCK: {
+                        _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+                        _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+                        _cleanup_(decrypted_image_unrefp) DecryptedImage *di = NULL;
+                        DissectImageFlags flags = DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_MOUNT_ROOT_ONLY;
+
+                        r = loop_device_make_by_path(img->path, O_RDONLY, 0, &d);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to set up loopback device: %m");
+
+                        r = dissect_image_and_warn(
+                                        d->fd,
+                                        img->path,
+                                        NULL,
+                                        NULL,
+                                        flags,
+                                        &m);
+                        if (r < 0)
+                                return r;
+
+                        r = dissected_image_decrypt_interactively(
+                                        m, NULL,
+                                        NULL,
+                                        flags,
+                                        &di);
+                        if (r < 0)
+                                return r;
+
+                        r = dissected_image_mount_and_warn(
+                                        m,
+                                        p,
+                                        UID_INVALID,
+                                        flags);
+                        if (r < 0)
+                                return r;
+
+                        if (di) {
+                                r = decrypted_image_relinquish(di);
+                                if (r < 0)
+                                        return log_error_errno(r, "Failed to relinquish DM devices: %m");
+                        }
+
+                        loop_device_relinquish(d);
+                        break;
+                }
+                default:
+                        assert_not_reached("Unsupported image type");
+                }
+
+                /* Insist that extension images do not overwrite the underlying OS release file (it's fine if
+                 * they place one in /etc/os-release, i.e. where things don't matter, as they aren't
+                 * merged.) */
+                r = chase_symlinks("/usr/lib/os-release", p, CHASE_PREFIX_ROOT, NULL, NULL);
+                if (r < 0) {
+                        if (r != -ENOENT)
+                                return log_error_errno(r, "Failed to determine whether /usr/lib/os-release exists in the extension image: %m");
+                } else
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                               "Extension image contains /usr/lib/os-release file, which is not allowed (it may carry /etc/os-release), refusing.");
+
+                /* Now that we can look into the extension image, let's see if the OS version is compatible */
+                r = parse_os_release(
+                                p,
+                                "ID", &extension_os_release_id,
+                                "VERSION_ID", &extension_os_release_version_id,
+                                "SYSEXT_LEVEL", &extension_os_release_sysext_level,
+                                NULL);
+                if (r == -ENOENT)
+                        log_notice_errno(r, "Extension '%s' carries no os-release data, not checking for version compatibility.", img->name);
+                else if (r < 0)
+                        return log_error_errno(r, "Failed to acquire 'os-release' data of extension '%s': %m", img->name);
+                else {
+                        if (!streq_ptr(host_os_release_id, extension_os_release_id)) {
+                                log_notice("Extension '%s' is for OS '%s', but running on '%s', ignoring extension.",
+                                           img->name, strna(extension_os_release_id), strna(host_os_release_id));
+                                n_ignored++;
+                                continue;
+                        }
+
+                        /* If the extension has a sysext API level declared, then it must match the host API level. Otherwise, compare OS version as a whole */
+                        if (extension_os_release_sysext_level) {
+                                if (!streq_ptr(host_os_release_sysext_level, extension_os_release_sysext_level)) {
+                                        log_notice("Extension '%s' is for sysext API level '%s', but running on sysext API level '%s', ignoring extension.",
+                                                   img->name, extension_os_release_sysext_level, strna(host_os_release_sysext_level));
+                                        n_ignored++;
+                                        continue;
+                                }
+                        } else {
+                                if (!streq_ptr(host_os_release_version_id, extension_os_release_version_id)) {
+                                        log_notice("Extension '%s' is for OS version '%s', but running on OS version '%s', ignoring extension.",
+                                                   img->name, extension_os_release_version_id, strna(host_os_release_version_id));
+                                        n_ignored++;
+                                        continue;
+                                }
+                        }
+
+                        log_debug("Version info of extension '%s' matches host.", img->name);
+                }
+
+                /* Noice! This one is an extension we want. */
+                r = strv_extend(&extensions, img->name);
+                if (r < 0)
+                        return log_oom();
+
+                n_extensions ++;
+        }
+
+        /* Nothing left? Then shortcut things */
+        if (n_extensions == 0) {
+                if (n_ignored > 0)
+                        log_info("No suitable extensions found (%u ignored due to incompatible version).", n_ignored);
+                else
+                        log_info("No extensions found.");
+                return 0;
+        }
+
+        /* Order by version sort (i.e. libc strverscmp()) */
+        typesafe_qsort(extensions, n_extensions, strverscmpp);
+
+        buf = strv_join(extensions, "', '");
+        if (!buf)
+                return log_oom();
+
+        log_info("Using extensions '%s'.", buf);
+
+        /* Build table of extension paths (in reverse order) */
+        paths = new0(char*, n_extensions + 1);
+        if (!paths)
+                return log_oom();
+
+        for (size_t k = 0; k < n_extensions; k++) {
+                _cleanup_free_ char *p = NULL;
+
+                assert_se(img = hashmap_get(images, extensions[n_extensions - 1 - k]));
+
+                p = path_join(workspace, "extensions", img->name);
+                if (!p)
+                        return log_oom();
+
+                paths[k] = TAKE_PTR(p);
+        }
+
+        /* Let's now unmerge the status quo ante, since to build the new overlayfs we need a reference to the
+         * underlying fs. */
+        STRV_FOREACH(h, arg_hierarchies) {
+                _cleanup_free_ char *resolved = NULL;
+
+                r = chase_symlinks(*h, arg_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved, NULL);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to resolve hierarchy '%s%s': %m", strempty(arg_root), *h);
+
+                r = unmerge_hierarchy(resolved);
+                if (r < 0)
+                        return r;
+        }
+
+        /* Create overlayfs mounts for all hierarchies */
+        STRV_FOREACH(h, arg_hierarchies) {
+                _cleanup_free_ char *meta_path = NULL, *overlay_path = NULL;
+
+                meta_path = path_join(workspace, "meta", *h); /* The place where to store metadata about this instance */
+                if (!meta_path)
+                        return log_oom();
+
+                overlay_path = path_join(workspace, "overlay", *h); /* The resulting overlayfs instance */
+                if (!overlay_path)
+                        return log_oom();
+
+                r = merge_hierarchy(*h, extensions, paths, meta_path, overlay_path);
+                if (r < 0)
+                        return r;
+        }
+
+        /* And move them all into place. This is where things appear in the host namespace */
+        STRV_FOREACH(h, arg_hierarchies) {
+                _cleanup_free_ char *p = NULL, *resolved = NULL;
+
+                p = path_join(workspace, "overlay", *h);
+                if (!p)
+                        return log_oom();
+
+                if (laccess(p, F_OK) < 0) {
+                        if (errno != ENOENT)
+                                return log_error_errno(errno, "Failed to check if '%s' exists: %m", p);
+
+                        /* Hierarchy apparently was empty in all extensions, and wasn't mounted, ignoring. */
+                        continue;
+                }
+
+                r = chase_symlinks(*h, arg_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved, NULL);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to resolve hierarchy '%s%s': %m", strempty(arg_root), *h);
+
+                r = mkdir_p(resolved, 0755);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to create hierarchy mount point '%s': %m", resolved);
+
+                r = mount_nofollow_verbose(LOG_ERR, p, resolved, NULL, MS_BIND, NULL);
+                if (r < 0)
+                        return r;
+
+                log_info("Merged extensions into '%s'.", resolved);
+        }
+
+        return 1;
+}
+
+static int merge(Hashmap *images) {
+        pid_t pid;
+        int r;
+
+        r = safe_fork("(sd-sysext)", FORK_DEATHSIG|FORK_LOG|FORK_NEW_MOUNTNS, &pid);
+        if (r < 0)
+                return log_error_errno(r, "Failed to fork off child: %m");
+        if (r == 0) {
+                /* Child with its own mount namespace */
+
+                r = merge_subprocess(images, "/run/systemd/sysext");
+                if (r < 0)
+                        _exit(EXIT_FAILURE);
+
+                /* Our namespace ceases to exist here, also implicitly detaching all temporary mounts we
+                 * created below /run. Nice! */
+
+                _exit(r > 0 ? EXIT_SUCCESS : 123); /* 123 means: didn't find any extensions */
+        }
+
+        r = wait_for_terminate_and_check("(sd-sysext)", pid, WAIT_LOG_ABNORMAL);
+        if (r < 0)
+                return r;
+
+        return r != 123; /* exit code 123 means: didn't do anything */
+}
+
+static int help(void) {
+        _cleanup_free_ char *link = NULL;
+        int r;
+
+        r = terminal_urlify_man("systemd-sysext", "1", &link);
+        if (r < 0)
+                return log_oom();
+
+        printf("%1$s [OPTIONS...] [DEVICE]\n"
+               "\n%5$sMerge extension images into /usr/ and /opt/ hierarchies.%6$s\n"
+               "\n%3$sCommands:%4$s\n"
+               "  -h --help               Show this help\n"
+               "     --version            Show package version\n"
+               "  -m --merge              Merge extensions into /usr/ and /opt/\n"
+               "  -u --unmerge            Unmerge extensions from /usr/ and /opt/\n"
+               "  -R --refresh            Unmerge/merge extensions again\n"
+               "  -l --list               List all OS images\n"
+               "\n%3$sOptions:%4$s\n"
+               "     --no-pager           Do not pipe output into a pager\n"
+               "     --root=PATH          Operate relative to root path\n"
+               "     --json=pretty|short|off\n"
+               "                          Generate JSON output\n"
+               "\nSee the %2$s for details.\n"
+               , program_invocation_short_name
+               , link
+               , ansi_underline(), ansi_normal()
+               , ansi_highlight(), ansi_normal()
+        );
+
+        return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+        enum {
+                ARG_VERSION = 0x100,
+                ARG_NO_PAGER,
+                ARG_MERGE,
+                ARG_UNMERGE,
+                ARG_REFRESH,
+                ARG_LIST,
+                ARG_ROOT,
+                ARG_JSON,
+        };
+
+        static const struct option options[] = {
+                { "help",     no_argument,       NULL, 'h'          },
+                { "version",  no_argument,       NULL, ARG_VERSION  },
+                { "no-pager", no_argument,       NULL, ARG_NO_PAGER },
+                { "root",     required_argument, NULL, ARG_ROOT     },
+                { "merge",    no_argument,       NULL, 'm'          },
+                { "unmerge",  no_argument,       NULL, 'u'          },
+                { "refresh",  no_argument,       NULL, 'R'          },
+                { "list",     no_argument,       NULL, 'l'          },
+                { "json",     required_argument, NULL, ARG_JSON     },
+                {}
+        };
+
+        int c, r;
+
+        assert(argc >= 0);
+        assert(argv);
+
+        while ((c = getopt_long(argc, argv, "hmuRl", options, NULL)) >= 0)
+
+                switch (c) {
+
+                case 'h':
+                        return help();
+
+                case ARG_VERSION:
+                        return version();
+
+                case ARG_NO_PAGER:
+                        arg_pager_flags |= PAGER_DISABLE;
+                        break;
+
+                case 'm':
+                        arg_action = ACTION_MERGE;
+                        break;
+
+                case 'u':
+                        arg_action = ACTION_UNMERGE;
+                        break;
+
+                case 'R':
+                        arg_action = ACTION_REFRESH;
+                        break;
+
+                case 'l':
+                        arg_action = ACTION_LIST;
+                        break;
+
+                case ARG_ROOT:
+                        r = parse_path_argument_and_warn(optarg, false, &arg_root);
+                        if (r < 0)
+                                return r;
+                        break;
+
+                case ARG_JSON:
+                        r = json_parse_cmdline_parameter_and_warn(optarg, &arg_json_format_flags);
+                        if (r <= 0)
+                                return r;
+
+                        break;
+
+                case '?':
+                        return -EINVAL;
+
+                default:
+                        assert_not_reached("Unhandled option");
+                }
+
+        if (argc - optind > 0)
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                       "Unexpected argument.");
+
+        return 1;
+}
+
+static int parse_env(void) {
+        _cleanup_strv_free_ char **l = NULL;
+        const char *e;
+        char **p;
+        int r;
+
+        e = secure_getenv("SYSTEMD_SYSEXT_HIERARCHIES");
+        if (!e)
+                return 0;
+
+        /* For debugging purposes it might make sense to do this for other hierarchies than /usr/ and
+         * /opt/, but let's make that a hacker/debugging feature, i.e. env var instead of cmdline
+         * switch. */
+
+        r = strv_split_full(&l, e, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+        if (r < 0)
+                return log_error_errno(r, "Failed to parse $SYSTEMD_SYSEXT_HIERARCHIES: %m");
+
+        STRV_FOREACH(p, l) {
+                if (!path_is_absolute(*p))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                               "Hierarchy path '%s' is not absolute, refusing.", *p);
+
+                if (!path_is_normalized(*p))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                               "Hierarchy path '%s' is not normalized, refusing.", *p);
+
+                if (path_equal(*p, "/"))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                               "Hierarchy path '%s' is the root fs, refusing.", *p);
+        }
+
+        if (strv_isempty(l))
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                       "No hierarchies specified, refusing.");
+
+        strv_free_and_replace(arg_hierarchies, l);
+        return 0;
+}
+
+static int run(int argc, char *argv[]) {
+        _cleanup_(hashmap_freep) Hashmap *images = NULL;
+        int r;
+
+        log_show_color(true);
+        log_parse_environment();
+        log_open();
+
+        r = parse_argv(argc, argv);
+        if (r <= 0)
+                return r;
+
+        r = parse_env();
+        if (r < 0)
+                return r;
+
+        if (!arg_hierarchies) {
+                arg_hierarchies = strv_new("/usr", "/opt");
+                if (!arg_hierarchies)
+                        return log_oom();
+        }
+
+        /* Given that things deep down in the child process will fail, let's catch the no-privilege issue
+         * early on */
+        if (!IN_SET(arg_action, ACTION_STATUS, ACTION_LIST) && !have_effective_cap(CAP_SYS_ADMIN))
+                return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be privileged.");
+
+        if (arg_action == ACTION_STATUS)
+                return status();
+
+        if (arg_action == ACTION_UNMERGE)
+                return unmerge();
+
+        images = hashmap_new(&image_hash_ops);
+        if (!images)
+                return log_oom();
+
+        r = image_discover(IMAGE_EXTENSION, images);
+        if (r < 0)
+                return log_error_errno(r, "Failed to discover extension images: %m");
+
+        switch (arg_action) {
+
+        case ACTION_LIST: {
+                _cleanup_(table_unrefp) Table *t = NULL;
+                Image *img;
+
+                if ((arg_json_format_flags & JSON_FORMAT_OFF) && hashmap_isempty(images)) {
+                        log_info("No OS extensions found.");
+                        return 0;
+                }
+
+                t = table_new("name", "type", "path", "time");
+                if (!t)
+                        return log_oom();
+
+                HASHMAP_FOREACH(img, images) {
+                        r = table_add_many(
+                                        t,
+                                        TABLE_STRING, img->name,
+                                        TABLE_STRING, image_type_to_string(img->type),
+                                        TABLE_PATH, img->path,
+                                        TABLE_TIMESTAMP, img->mtime != 0 ? img->mtime : img->crtime);
+                        if (r < 0)
+                                return table_log_add_error(r);
+                }
+
+                (void) table_set_sort(t, (size_t) 0, (size_t) -1);
+
+                if (arg_json_format_flags & (JSON_FORMAT_OFF|JSON_FORMAT_PRETTY|JSON_FORMAT_PRETTY_AUTO))
+                        (void) pager_open(arg_pager_flags);
+
+                r = table_print_json(t, stdout, arg_json_format_flags);
+                if (r < 0)
+                        return table_log_print_error(r);
+
+                r = 0;
+                break;
+        }
+
+        case ACTION_MERGE: {
+                char **p;
+
+                /* In merge mode fail if things are already merged. (In --refresh mode below we'll unmerge if
+                 * we find things are already merged...) */
+                STRV_FOREACH(p, arg_hierarchies) {
+                        _cleanup_free_ char *resolved = NULL;
+
+                        r = chase_symlinks(*p, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL);
+                        if (r == -ENOENT) {
+                                log_debug_errno(r, "Hierarchy '%s%s' does not exist, ignoring.", strempty(arg_root), *p);
+                                continue;
+                        }
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to resolve path to hierarchy '%s%s': %m", strempty(arg_root), *p);
+
+                        r = is_our_mount_point(resolved);
+                        if (r < 0)
+                                return r;
+                        if (r > 0)
+                                return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
+                                                       "Hierarchy '%s' is already merged.", *p);
+                }
+
+                r = merge(images);
+                break;
+        }
+
+        case ACTION_REFRESH:
+                r = merge(images); /* Returns > 0 if it did something, i.e. a new overlayfs is mounted
+                                    * now. When it does so it implicitly unmounts any overlayfs placed there
+                                    * before. Returns == 0 if it did nothing, i.e. no extension images
+                                    * found. In this case the old overlayfs remains in place if there was
+                                    * one. */
+                if (r < 0)
+                        return r;
+                if (r == 0) /* No images found? Then unmerge. The goal of --refresh is after all that after
+                             * having called there's a guarantee that the merge status matches the installed
+                             * extensions. */
+                        r = unmerge();
+
+                /* Net result here is that:
+                 *
+                 * 1. If an overlayfs was mounted before and no extensions exist anymore, we'll have unmerged
+                 *    things.
+                 *
+                 * 2. If an overlayfs was mounted before, and there are still extensions installed' we'll
+                 *    have unmerged and then merged things again.
+                 *
+                 * 3. If an overlayfs so far wasn't mounted, and there are extensions installed, we'll have
+                 *    it mounted now.
+                 *
+                 * 4. If there was no overlayfs mount so far, and no extensions installed, we implement a
+                 *    NOP.
+                 */
+                break;
+
+        default:
+                assert_not_reached("Uneexpected action");
+        }
+
+        return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);