]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: reload confexts when reloading notify-reload services
authormaia x. <maia+git@quatern.org>
Mon, 6 Jan 2025 18:31:44 +0000 (10:31 -0800)
committerLuca Boccassi <luca.boccassi@gmail.com>
Mon, 19 May 2025 12:36:21 +0000 (13:36 +0100)
`ExtensionImages=` and `ExtensionDirectories=` now let you specify
vpick-named extensions; however, since they just get set up once when
the service is started, you can't see newer versions without restarting
the service entirely.  Here, also reload confext extensions when you
reload a service. This allows you to deploy a new version of some
configuration and have it picked up at reload time without interruption
to your workload.

Right now, we would only reload confext extensions and leave the sysext
ones behind, since it didn't seem prudent to swap out what is likely
program code at reload. This is made possible by only going for the
`SYSTEMD_CONFEXT_HIERARCHIES` overlays (which only contains `/etc`).

Implementation wise, this uses the new kernel API and two collaborating
child processes under the host & child namespaces in order to gather the
right FDs needed:

  - (1) In child, set up the extension images and directories in a slave
mountns, and obtain their FDs.
  - (2) Fork into a grandchild under target process namespace, and do a
        "fake" unmount to obtain the FD of the underlying target folder
say /etc).
  - (3) In the child again, set up new overlay under host NS rights.

We do not want to do I/O heavy jobs inline in PID1 blocking the state
machine, so add separate async states to handle this case.

Co-authored-by: Luca Boccassi <luca.boccassi@gmail.com>
src/basic/unit-def.c
src/basic/unit-def.h
src/core/execute.c
src/core/execute.h
src/core/manager.c
src/core/namespace.c
src/core/namespace.h
src/core/service.c
src/shared/dissect-image.c
src/shared/dissect-image.h
src/shared/mount-util.c

index 06d31b7f00af30f4ce144e44be48fd4b38365b8c..d9ef13b103a5180f6393041a8d67d81f63269717 100644 (file)
@@ -213,6 +213,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = {
         [SERVICE_RELOAD]                     = "reload",
         [SERVICE_RELOAD_SIGNAL]              = "reload-signal",
         [SERVICE_RELOAD_NOTIFY]              = "reload-notify",
+        [SERVICE_REFRESH_EXTENSIONS]         = "refresh-extensions",
         [SERVICE_STOP]                       = "stop",
         [SERVICE_STOP_WATCHDOG]              = "stop-watchdog",
         [SERVICE_STOP_SIGTERM]               = "stop-sigterm",
index fe77dd0993e84b1fd9455f9da553c5a2cf5f88de..619cf1f32b21a925038e176969d87b890f288de8 100644 (file)
@@ -138,6 +138,7 @@ typedef enum ServiceState {
         SERVICE_RELOAD,            /* Reloading via ExecReload= */
         SERVICE_RELOAD_SIGNAL,     /* Reloading via SIGHUP requested */
         SERVICE_RELOAD_NOTIFY,     /* Waiting for READY=1 after RELOADING=1 notify */
+        SERVICE_REFRESH_EXTENSIONS,/* Refreshing extensions for a reload request */
         SERVICE_MOUNTING,          /* Performing a live mount into the namespace of the service */
         SERVICE_STOP,              /* No STOP_PRE state, instead just register multiple STOP executables */
         SERVICE_STOP_WATCHDOG,
index 417d0be57564bb8d6bbf9e56817a29b89abd7709..86f1be3e4fad644bb1d9741215b683b15701492b 100644 (file)
@@ -73,6 +73,7 @@
 #include "unit-serialize.h"
 #include "user-util.h"
 #include "utmp-wtmp.h"
+#include "vpick.h"
 
 static bool is_terminal_input(ExecInput i) {
         return IN_SET(i,
@@ -1994,6 +1995,25 @@ char** exec_context_get_restrict_filesystems(const ExecContext *c) {
 #endif
 }
 
+int exec_context_has_vpicked_extensions(const ExecContext *context) {
+        int r;
+
+        assert(context);
+
+        FOREACH_ARRAY(mi, context->extension_images, context->n_extension_images) {
+                r = path_uses_vpick(mi->source);
+                if (r != 0)
+                        return r;
+        }
+        STRV_FOREACH(ed, context->extension_directories) {
+                r = path_uses_vpick(*ed);
+                if (r != 0)
+                        return r;
+        }
+
+        return 0;
+}
+
 void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts) {
         assert(s);
 
index 756d78117c0cb9acb351e0f469d680bc160960f6..44c15707708f9bf8d298b3e4d607b1efda2ae24c 100644 (file)
@@ -567,6 +567,8 @@ char** exec_context_get_syscall_log(const ExecContext *c);
 char** exec_context_get_address_families(const ExecContext *c);
 char** exec_context_get_restrict_filesystems(const ExecContext *c);
 
+int exec_context_has_vpicked_extensions(const ExecContext *context);
+
 void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts);
 void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
 void exec_status_handoff(ExecStatus *s, const struct ucred *ucred, const dual_timestamp *ts);
index 1000818c7a867af2720f4f9e5fc5155c7c3f63ac..5fdde7b6b44f38386587a313c57f204e1dd0427a 100644 (file)
@@ -1850,6 +1850,7 @@ static bool manager_dbus_is_running(Manager *m, bool deserialized) {
                     SERVICE_MOUNTING,
                     SERVICE_RELOAD,
                     SERVICE_RELOAD_NOTIFY,
+                    SERVICE_REFRESH_EXTENSIONS,
                     SERVICE_RELOAD_SIGNAL))
                 return false;
 
index d80fe74afbfb624cf7b3a6c7647b14af1d75a22d..8aae6901a76498213b90dd46cb155ef06984f49d 100644 (file)
@@ -25,6 +25,7 @@
 #include "log.h"
 #include "loop-util.h"
 #include "loopback-setup.h"
+#include "missing_magic.h"
 #include "missing_syscall.h"
 #include "mkdir-label.h"
 #include "mount-util.h"
@@ -35,6 +36,8 @@
 #include "nulstr-util.h"
 #include "os-util.h"
 #include "path-util.h"
+#include "pidref.h"
+#include "process-util.h"
 #include "selinux-util.h"
 #include "socket-util.h"
 #include "sort-util.h"
@@ -111,6 +114,7 @@ typedef struct MountEntry {
         LIST_HEAD(MountOptions, image_options_const);
         char **overlay_layers;
         VeritySettings verity;
+        ImageClass filter_class; /* Used for live updates to skip unapplicable images */
         bool idmapped;
         uid_t idmap_uid;
         gid_t idmap_gid;
@@ -500,6 +504,7 @@ static int append_mount_images(MountList *ml, const MountImage *mount_images, si
                         .image_options_const = m->mount_options,
                         .ignore = m->ignore_enoent,
                         .verity = TAKE_GENERIC(verity, VeritySettings, VERITY_SETTINGS_DEFAULT),
+                        .filter_class = _IMAGE_CLASS_INVALID,
                 };
         }
 
@@ -597,6 +602,7 @@ static int append_extensions(
                         .mode = MOUNT_EXTENSION_IMAGE,
                         .has_prefix = true,
                         .verity = TAKE_GENERIC(verity, VeritySettings, VERITY_SETTINGS_DEFAULT),
+                        .filter_class = _IMAGE_CLASS_INVALID,
                 };
         }
 
@@ -663,6 +669,7 @@ static int append_extensions(
                         .ignore = ignore_enoent,
                         .has_prefix = true,
                         .read_only = true,
+                        .filter_class = _IMAGE_CLASS_INVALID,
                 };
         }
 
@@ -1575,6 +1582,7 @@ static int mount_image(
 
         _cleanup_(extension_release_data_done) ExtensionReleaseData rdata = {};
         _cleanup_free_ char *extension_name = NULL;
+        ImageClass required_class = _IMAGE_CLASS_INVALID;
         int r;
 
         assert(m);
@@ -1595,6 +1603,8 @@ static int mount_image(
                         return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
                 if (isempty(rdata.os_release_id))
                         return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'ID' field not found or empty in 'os-release' data of OS tree '%s'.", empty_to_root(root_directory));
+
+                required_class = m->filter_class;
         }
 
         r = verity_dissect_and_mount(
@@ -1605,6 +1615,7 @@ static int mount_image(
                         image_policy,
                         /* image_filter= */ NULL,
                         &rdata,
+                        required_class,
                         &m->verity,
                         /* ret_image= */ NULL);
         if (r == -ENOENT && m->ignore)
@@ -1620,6 +1631,10 @@ static int mount_image(
                                        strempty(rdata.os_release_sysext_level),
                                        rdata.os_release_confext_level ? image_class_info[IMAGE_CONFEXT].level_env_print : "",
                                        strempty(rdata.os_release_confext_level));
+        if (r == -ENOCSI) {
+                log_debug("Image %s does not match the expected class, ignoring", mount_entry_source(m));
+                return 0; /* Nothing to do, wrong class */
+        }
         if (r < 0)
                 return log_debug_errno(r, "Failed to mount image %s on %s: %m", mount_entry_source(m), mount_entry_path(m));
 
@@ -1670,7 +1685,7 @@ static int mount_overlay(const MountEntry *m) {
 
         (void) mkdir_p_label(mount_entry_path(m), 0755);
 
-        r = mount_nofollow_verbose(LOG_DEBUG, "overlay", mount_entry_path(m), "overlay", MS_RDONLY, options);
+        r = mount_nofollow_verbose(LOG_DEBUG, "systemd-extensions", mount_entry_path(m), "overlay", MS_RDONLY, options);
         if (r == -ENOENT && m->ignore)
                 return 0;
         if (r < 0)
@@ -1799,11 +1814,14 @@ static int apply_one_mount(
 
                 r = load_extension_release_pairs(
                                 mount_entry_source(m),
-                                IMAGE_SYSEXT,
+                                m->filter_class >= 0 ? m->filter_class : IMAGE_SYSEXT,
                                 extension_name,
                                 /* relax_extension_release_check= */ false,
                                 &extension_release);
                 if (r == -ENOENT) {
+                        if (m->filter_class >= 0)
+                                return 0; /* Nothing to do, wrong class */
+
                         r = load_extension_release_pairs(
                                         mount_entry_source(m),
                                         IMAGE_CONFEXT,
@@ -3291,6 +3309,521 @@ int open_shareable_ns_path(int ns_storage_socket[static 2], const char *path, un
         return 1;
 }
 
+static int is_extension_overlay(const char *path, int fd) {
+        _cleanup_free_ char *source = NULL;
+        _cleanup_close_ int dfd = -EBADF;
+        int r;
+
+        assert(path);
+
+        if (fd < 0) {
+                r = chase(path, /* root= */ NULL, CHASE_TRAIL_SLASH|CHASE_MUST_BE_DIRECTORY, /* ret_path= */ NULL, &dfd);
+                if (r < 0)
+                        return r;
+                fd = dfd;
+        }
+
+        r = is_mount_point_at(fd, /* filename= */ NULL, /* flags= */ 0);
+        if (r < 0)
+                return log_debug_errno(r, "Unable to determine whether '%s' is a mount point: %m", path);
+        if (r == 0)
+                return 0;
+
+        r = fd_is_fs_type(fd, OVERLAYFS_SUPER_MAGIC);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to check if %s is an overlayfs: %m", path);
+        if (r == 0)
+                return 0;
+
+        /* Check the 'source' field of the mount on mount_path */
+        r = path_get_mount_info_at(fd, /* path= */ NULL, /* ret_fstype= */ NULL, /* ret_options= */ NULL, &source);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to get mount info for %s: %m", path);
+        if (!streq_ptr(source, "systemd-extensions"))
+                return 0;
+
+        return 1;
+}
+
+static int unpeel_get_fd(const char *mount_path, int *ret_fd) {
+        _cleanup_close_pair_ int pipe_fds[2] = EBADF_PAIR;
+        _cleanup_close_ int fs_fd = -EBADF;
+        pid_t pid;
+        int r;
+
+        assert(mount_path);
+        assert(ret_fd);
+
+        r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pipe_fds);
+        if (r < 0)
+                return log_debug_errno(errno, "Failed to create socket pair: %m");
+
+        /* Clone mount namespace here to unpeel without affecting live process */
+        r = safe_fork("(sd-ns-unpeel)", FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &pid);
+        if (r < 0)
+                return r;
+        if (r == 0) {
+                _cleanup_close_ int dir_fd = -EBADF;
+
+                pipe_fds[0] = safe_close(pipe_fds[0]);
+
+                /* Opportunistically unmount any overlay at this path */
+                r = is_extension_overlay(mount_path, /* fd= */ -EBADF);
+                if (r < 0) {
+                        log_debug_errno(r, "Unable to determine whether '%s' is an extension overlay: %m", mount_path);
+                        _exit(EXIT_FAILURE);
+                }
+                if (r > 0) {
+                        r = umount_recursive(mount_path, MNT_DETACH);
+                        if (r < 0)
+                                _exit(EXIT_FAILURE);
+                        if (r == 0) /* no umounts done, possible if a previous reload deleted all extensions */
+                                log_debug("No overlay layer unmountable from %s", mount_path);
+                }
+
+                /* Now that /mount_path is exposed, get an FD for it and pass back */
+                dir_fd = open_tree(-EBADF, mount_path, AT_SYMLINK_NOFOLLOW|OPEN_TREE_CLONE);
+                if (dir_fd < 0) {
+                        log_debug_errno(errno, "Failed to clone mount %s: %m", mount_path);
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = fd_is_fs_type(dir_fd, OVERLAYFS_SUPER_MAGIC);
+                if (r < 0) {
+                        log_debug_errno(r, "Unable to determine whether '%s' is an overlay after opening mount tree: %m", mount_path);
+                        _exit(EXIT_FAILURE);
+                }
+                if (r > 0) {
+                        log_debug_errno(r, "'%s' is still an overlay after opening mount tree: %m", mount_path);
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = send_one_fd(pipe_fds[1], dir_fd, 0);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to send mount fd: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        pipe_fds[1] = safe_close(pipe_fds[1]);
+
+        r = receive_one_fd(pipe_fds[0], 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to receive mount fd: %m");
+        fs_fd = r;
+
+        r = fd_is_fs_type(fs_fd, OVERLAYFS_SUPER_MAGIC);
+        if (r < 0)
+                return log_debug_errno(r, "Unable to determine if unpeeled directory refers to overlayfs: %m");
+        if (r > 0)
+                return log_debug_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "Unpeeled mount is still an overlayfs, something is weird, refusing.");
+
+        *ret_fd = TAKE_FD(fs_fd);
+        return 0;
+}
+
+/* In target namespace, unmounts an existing overlayfs at mount_path (if one exists), grabs FD from the
+ * underlying directory, and sets up a new overlayfs mount. Coordinates with parent process over pair_fd:
+ * 1. Creates and sends new overlay fs fd to parent
+ * 2. Fake-unmounts overlay at mount_path to obtain underlying directory fd to build new overlay
+ * 3. Waits for parent to configure layers
+ * 4. Performs final mount at mount_path
+ *
+ * This is used by refresh_extensions_in_namespace() to peel back any existing overlays and reapply them.
+ */
+static int unpeel_mount_and_setup_overlay(int pair_fd, const char *mount_path) {
+        _cleanup_close_ int dir_unpeeled_fd = -EBADF, overlay_fs_fd = -EBADF, mount_fd = -EBADF;
+        int r;
+
+        assert(pair_fd >= 0);
+        assert(mount_path);
+
+        /* Create new OverlayFS and send to parent */
+        overlay_fs_fd = fsopen("overlay", FSOPEN_CLOEXEC);
+        if (overlay_fs_fd < 0)
+                return log_debug_errno(errno, "Failed to create overlay fs for %s: %m", mount_path);
+
+        r = send_one_fd(pair_fd, overlay_fs_fd, /* flags= */ 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to send overlay fs fd to parent: %m");
+
+        /* Unpeel in cloned mount namespace to get underlying directory fd */
+        r = unpeel_get_fd(mount_path, &dir_unpeeled_fd);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to unpeel mount %s: %m", mount_path);
+
+        /* Send the fd to the parent */
+        r = send_one_fd(pair_fd, dir_unpeeled_fd, /* flags= */ 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to send %s fd to parent: %m", mount_path);
+
+        /* Wait for parent to signal overlay configuration completion */
+        log_debug("Waiting for configured overlay fs for %s", mount_path);
+        r = receive_one_fd(pair_fd, 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to receive configured overlay: %m");
+
+        /* Create the mount */
+        mount_fd = fsmount(overlay_fs_fd, FSMOUNT_CLOEXEC, /* flags= */ 0);
+        if (mount_fd < 0)
+                return log_debug_errno(errno, "Failed to create overlay mount: %m");
+
+        /* Move mount to final location */
+        r = mount_exchange_graceful(mount_fd, mount_path, /* mount_beneath= */ true);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to move overlay to %s: %m", mount_path);
+
+        return 0;
+}
+
+static int refresh_grandchild_proc(
+                const PidRef *target,
+                MountList *ml,
+                const char *overlay_prefix,
+                int pidns_fd,
+                int mntns_fd,
+                int root_fd,
+                int pipe_fd) {
+
+        int r;
+
+        assert(pidref_is_set(target));
+        assert(ml);
+        assert(overlay_prefix);
+        assert(pidns_fd >= 0);
+        assert(mntns_fd >= 0);
+        assert(root_fd >= 0);
+        assert(pipe_fd >= 0);
+
+        r = namespace_enter(pidns_fd, mntns_fd, /* netns_fd= */ -EBADF, /* userns_fd= */ -EBADF, root_fd);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to enter namespace: %m");
+
+        /* Handle each overlay mount path */
+        FOREACH_ARRAY(m, ml->mounts, ml->n_mounts) {
+                if (m->mode != MOUNT_OVERLAY)
+                        continue;
+
+                /* Need an absolute path under the child namespace, rather than the root's */
+                _cleanup_free_ char *mount_path = NULL;
+                mount_path = path_join("/",
+                                       path_startswith(mount_entry_unprefixed_path(m), overlay_prefix) ?:
+                                            mount_entry_unprefixed_path(m));
+                if (!mount_path)
+                        return log_oom_debug();
+
+                /* If there are no extensions mounted for this overlay layer, instead of setting everything
+                 * up, the correct behavior is to unmount the existing overlay in the target namespace to
+                 * expose the original files. */
+                if (strv_isempty(m->overlay_layers)) {
+                        r = is_extension_overlay(mount_path, /* fd= */ -EBADF);
+                        if (r < 0)
+                                return log_debug_errno(r, "Unable to determine whether '%s' is an extension overlay: %m", mount_path);
+                        if (r == 0)
+                                continue;
+
+                        log_debug("No extensions for %s, undoing existing mount", mount_path);
+                        (void) umount_recursive(mount_path, MNT_DETACH);
+
+                        continue;
+                }
+
+                r = unpeel_mount_and_setup_overlay(pipe_fd, mount_path);
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to setup overlay mount for %s: %m", mount_path);
+        }
+
+        return 0;
+}
+
+static int handle_mount_from_grandchild(
+                MountEntry *m,
+                const char *overlay_prefix,
+                int **fd_layers,
+                size_t *n_fd_layers,
+                int pipe_fd) {
+
+        _cleanup_free_ char *layers = NULL, *options = NULL, *hierarchy_path_moved_mount = NULL;
+        _cleanup_close_ int hierarchy_path_fd = -EBADF, overlay_fs_fd = -EBADF;
+        _cleanup_strv_free_ char **new_layers = NULL;
+        int r;
+
+        assert(m);
+        assert(overlay_prefix);
+        assert(fd_layers);
+        assert(n_fd_layers);
+        assert(pipe_fd >= 0);
+
+        if (m->mode != MOUNT_OVERLAY)
+                return 0;
+
+        const char *mount_path = path_startswith(mount_entry_unprefixed_path(m), overlay_prefix);
+        if (!mount_path)
+                mount_path = mount_entry_unprefixed_path(m);
+
+        /* If there are no extensions mounted for this overlay layer, we only need to
+        * unmount the existing overlay (this is handled in the grandchild process) and
+        * would skip the usual cooperative processing here.
+        */
+        if (strv_isempty(m->overlay_layers)) {
+                log_debug("No layers for %s, skip setting up overlay", mount_path);
+                return 0;
+        }
+
+        /* Receive the fds from grandchild */
+        overlay_fs_fd = receive_one_fd(pipe_fd, 0);
+        if (overlay_fs_fd < 0)
+                return log_debug_errno(overlay_fs_fd, "Failed to receive overlay fs fd from grandchild: %m");
+
+        hierarchy_path_fd = receive_one_fd(pipe_fd, 0);
+        if (hierarchy_path_fd < 0)
+                return log_debug_errno(hierarchy_path_fd, "Failed to receive fd from grandchild for %s: %m", mount_path);
+
+        /* move_mount so that it is visible on our end. */
+        hierarchy_path_moved_mount = path_join(overlay_prefix, mount_path);
+        if (!hierarchy_path_moved_mount)
+                return log_oom_debug();
+
+        (void) mkdir_p_label(hierarchy_path_moved_mount, 0555);
+        r = move_mount(hierarchy_path_fd, "", AT_FDCWD, hierarchy_path_moved_mount, MOVE_MOUNT_F_EMPTY_PATH);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to move mount for %s: %m", mount_path);
+
+        /* Turn all overlay layer directories into FD-based references */
+        if (!GREEDY_REALLOC(*fd_layers, *n_fd_layers + strv_length(m->overlay_layers)))
+                return log_oom_debug();
+
+        STRV_FOREACH(ol, m->overlay_layers) {
+                _cleanup_close_ int tree_fd = -EBADF;
+
+                tree_fd = open_tree(-EBADF, *ol, /* flags= */ 0);
+                if (tree_fd < 0)
+                        return log_debug_errno(errno, "Failed to open_tree overlay layer '%s': %m", *ol);
+
+                r = strv_extend(&new_layers, FORMAT_PROC_FD_PATH(tree_fd));
+                if (r < 0)
+                        return log_oom_debug();
+
+                *fd_layers[(*n_fd_layers)++] = TAKE_FD(tree_fd);
+        }
+        m->overlay_layers = strv_free(m->overlay_layers);
+        m->overlay_layers = TAKE_PTR(new_layers);
+
+        layers = strv_join(m->overlay_layers, ":");
+        if (!layers)
+                return log_oom_debug();
+
+        /* Append the underlying hierarchy path as the last lowerdir */
+        options = strjoin(layers, ":", FORMAT_PROC_FD_PATH(hierarchy_path_fd));
+        if (!options)
+                return log_oom_debug();
+
+        if (fsconfig(overlay_fs_fd, FSCONFIG_SET_STRING, "lowerdir", options, 0) < 0)
+                return log_debug_errno(errno, "Failed to set lowerdir=%s: %m", options);
+
+        if (fsconfig(overlay_fs_fd, FSCONFIG_SET_STRING, "source", "systemd-extensions", 0) < 0)
+                return log_debug_errno(errno, "Failed to set source=systemd-extensions: %m");
+
+        /* Create the superblock */
+        if (fsconfig(overlay_fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+                return log_debug_errno(errno, "Failed to create overlay superblock: %m");
+
+        /* Signal completion to grandchild */
+        r = send_one_fd(pipe_fd, overlay_fs_fd, 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to signal overlay configuration complete for %s: %m", mount_path);
+
+        return 0;
+}
+
+static int refresh_apply_and_prune(const NamespaceParameters *p, MountList *ml) {
+        int r;
+
+        assert(p);
+        assert(ml);
+
+        /* Open all extensions on the host, drop all sysexts since they won't have /etc/. The list of
+         * overlays also need to be updated, so that if it's empty after a confext has been removed, the
+         * child process can correctly undo the overlay in the target namespace, rather than attempting to
+         * mount an empty overlay which the kernel does not allow, so this pruning has to be done here and
+         * not later (nor earlier, as we don't know if an image is a confext until this point). */
+        MountEntry *f, *t;
+        for (f = ml->mounts, t = ml->mounts; f < ml->mounts + ml->n_mounts; f++) {
+                if (IN_SET(f->mode, MOUNT_EXTENSION_DIRECTORY, MOUNT_EXTENSION_IMAGE)) {
+                        f->filter_class = IMAGE_CONFEXT;
+
+                        r = apply_one_mount("/", f, p);
+                        if (r < 0)
+                                return r;
+                        /* Nothing happened? Then it is not a confext, prune it from the lists */
+                        if (r == 0) {
+                                FOREACH_ARRAY(m, ml->mounts, ml->n_mounts) {
+                                        if (m->mode != MOUNT_OVERLAY)
+                                                continue;
+
+                                        _cleanup_strv_free_ char **pruned = NULL;
+
+                                        STRV_FOREACH(ol, m->overlay_layers)
+                                                if (!path_startswith(*ol, mount_entry_path(f))) {
+                                                        r = strv_extend(&pruned, *ol);
+                                                        if (r < 0)
+                                                                return log_oom_debug();
+                                                }
+                                        strv_free(m->overlay_layers);
+                                        m->overlay_layers = TAKE_PTR(pruned);
+                                }
+                                mount_entry_done(f);
+                                continue;
+                        }
+                }
+
+                *t = *f;
+                t++;
+        }
+
+        ml->n_mounts = t - ml->mounts;
+
+        return 0;
+}
+
+int refresh_extensions_in_namespace(
+                const PidRef *target,
+                const char *hierarchy_env,
+                const NamespaceParameters *p) {
+
+        _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF;
+        const char *overlay_prefix = "/run/systemd/mount-rootfs";
+        _cleanup_(mount_list_done) MountList ml = {};
+        _cleanup_free_ char *extension_dir = NULL;
+        _cleanup_strv_free_ char **hierarchies = NULL;
+        int r;
+
+        assert(pidref_is_set(target));
+        assert(hierarchy_env);
+        assert(p);
+
+        log_debug("Refreshing extensions in-namespace for hierarchy '%s'", hierarchy_env);
+
+        r = pidref_namespace_open(target, &pidns_fd, &mntns_fd, /* ret_netns_fd= */ NULL, /* ret_userns_fd= */ NULL, &root_fd);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to open namespace: %m");
+
+        r = is_our_namespace(mntns_fd, NAMESPACE_MOUNT);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to check if target namespace is separate: %m");
+        if (r > 0)
+                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Target namespace is not separate, cannot reload extensions");
+
+        extension_dir = path_join(p->private_namespace_dir, "unit-extensions");
+        if (!extension_dir)
+                return log_oom_debug();
+
+        r = parse_env_extension_hierarchies(&hierarchies, hierarchy_env);
+        if (r < 0)
+                return r;
+
+        r = append_extensions(
+                        &ml,
+                        overlay_prefix,
+                        p->private_namespace_dir,
+                        hierarchies,
+                        p->extension_images,
+                        p->n_extension_images,
+                        p->extension_directories);
+        if (r < 0)
+                return r;
+
+        sort_and_drop_unused_mounts(&ml, overlay_prefix);
+        if (ml.n_mounts == 0)
+                return 0;
+
+        /**
+         * There are three main steps:
+         * 1. In child, set up the extension images and directories in a slave mountns, so that we have
+         *    access to their FDs
+         * 2. Fork into a grandchild, which will enter the target namespace and attempt to "unpeel" the
+         *    overlays to obtain FDs the underlying directories, over which we will reapply the overlays
+         * 3. In the child again, receive the FDs and reapply the overlays
+         */
+        r = safe_fork("(sd-ns-refresh-exts)",
+                      FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE,
+                      NULL);
+        if (r < 0)
+                return r;
+        if (r == 0) {
+                /* Child (host namespace) */
+                _cleanup_close_pair_ int pair[2] = EBADF_PAIR;
+                _cleanup_(sigkill_waitp) pid_t grandchild_pid = 0;
+
+                 (void) mkdir_p_label(overlay_prefix, 0555);
+
+                r = refresh_apply_and_prune(p, &ml);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to apply extensions for refreshing: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                /* Create a grandchild process to handle the unmounting and reopening of hierarchy */
+                r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair);
+                if (r < 0) {
+                        log_debug_errno(errno, "Failed to create socket pair: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = safe_fork("(sd-ns-refresh-exts-grandchild)",
+                                FORK_LOG|FORK_DEATHSIG_SIGKILL,
+                                &grandchild_pid);
+                if (r < 0)
+                        _exit(EXIT_FAILURE);
+                if (r == 0) {
+                        /* Grandchild (target service namespace) */
+                        pair[0] = safe_close(pair[0]);
+
+                        r = refresh_grandchild_proc(target, &ml, overlay_prefix, pidns_fd, mntns_fd, root_fd, pair[1]);
+                        if (r < 0) {
+                                pair[1] = safe_close(pair[1]);
+                                _exit(EXIT_FAILURE);
+                        }
+
+                        _exit(EXIT_SUCCESS);
+                }
+
+                pair[1] = safe_close(pair[1]);
+
+                /* Until kernel 6.15, the FDs to the individual layers used to set up the OverlayFS via
+                 * lowerdir=/proc/self/fd/X need to remain open until the OverlayFS mount is _attached_
+                 * (as opposed to merely created) to its mount point, hence we need to ensure these FDs
+                 * stay open until the grandchild has attached the mount and exited. */
+                // TODO: once the kernel baseline is >= 6.15, move the FD array into the helper function
+                // and close them immediately
+                int *fd_layers = NULL;
+                size_t n_fd_layers = 0;
+                CLEANUP_ARRAY(fd_layers, n_fd_layers, close_many_and_free);
+
+                FOREACH_ARRAY(m, ml.mounts, ml.n_mounts) {
+                        r = handle_mount_from_grandchild(m, overlay_prefix, &fd_layers, &n_fd_layers, pair[0]);
+                        if (r < 0)
+                                _exit(EXIT_FAILURE);
+                }
+
+                r = wait_for_terminate_and_check("(sd-ns-refresh-exts-grandchild)", TAKE_PID(grandchild_pid), 0);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to wait for target namespace process to finish: %m");
+                        _exit(EXIT_FAILURE);
+                }
+                if (r != EXIT_SUCCESS) {
+                        log_debug("Target namespace fork did not succeed");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        return 0;
+}
+
 static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
         [PROTECT_HOME_NO]        = "no",
         [PROTECT_HOME_YES]       = "yes",
index f1076d0ee45441c7ef6bc0dfd4708ae22ddad96b..ced0a87e14554a13b937ac3f3ccef7a2e6190799 100644 (file)
@@ -16,6 +16,7 @@ typedef struct MountImage MountImage;
 #include "fs-util.h"
 #include "macro.h"
 #include "namespace-util.h"
+#include "pidref.h"
 #include "runtime-scope.h"
 #include "string-util.h"
 
@@ -259,3 +260,8 @@ int temporary_filesystem_add(TemporaryFileSystem **t, size_t *n,
 
 MountImage* mount_image_free_many(MountImage *m, size_t *n);
 int mount_image_add(MountImage **m, size_t *n, const MountImage *item);
+
+int refresh_extensions_in_namespace(
+                const PidRef *target,
+                const char *hierarchy_env,
+                const NamespaceParameters *p);
index 141c85745a19e0667515c49468f55ebcca647430..95b8d1eb73330e4b9bd04dee7ece5693c534847e 100644 (file)
@@ -21,6 +21,7 @@
 #include "devnum-util.h"
 #include "env-util.h"
 #include "escape.h"
+#include "execute.h"
 #include "exec-credential.h"
 #include "exit-status.h"
 #include "fd-util.h"
 #include "manager.h"
 #include "missing_audit.h"
 #include "mount-util.h"
+#include "namespace.h"
 #include "open-file.h"
 #include "parse-util.h"
 #include "path-util.h"
 #include "pidfd-util.h"
 #include "process-util.h"
 #include "random-util.h"
+#include "runtime-scope.h"
 #include "selinux-util.h"
 #include "serialize.h"
 #include "service.h"
@@ -66,6 +69,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
         [SERVICE_RELOAD]                     = UNIT_RELOADING,
         [SERVICE_RELOAD_SIGNAL]              = UNIT_RELOADING,
         [SERVICE_RELOAD_NOTIFY]              = UNIT_RELOADING,
+        [SERVICE_REFRESH_EXTENSIONS]         = UNIT_REFRESHING,
         [SERVICE_MOUNTING]                   = UNIT_REFRESHING,
         [SERVICE_STOP]                       = UNIT_DEACTIVATING,
         [SERVICE_STOP_WATCHDOG]              = UNIT_DEACTIVATING,
@@ -97,6 +101,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
         [SERVICE_RELOAD]                     = UNIT_RELOADING,
         [SERVICE_RELOAD_SIGNAL]              = UNIT_RELOADING,
         [SERVICE_RELOAD_NOTIFY]              = UNIT_RELOADING,
+        [SERVICE_REFRESH_EXTENSIONS]         = UNIT_REFRESHING,
         [SERVICE_MOUNTING]                   = UNIT_REFRESHING,
         [SERVICE_STOP]                       = UNIT_DEACTIVATING,
         [SERVICE_STOP_WATCHDOG]              = UNIT_DEACTIVATING,
@@ -127,7 +132,7 @@ static bool SERVICE_STATE_WITH_MAIN_PROCESS(ServiceState state) {
         return IN_SET(state,
                       SERVICE_START, SERVICE_START_POST,
                       SERVICE_RUNNING,
-                      SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+                      SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_REFRESH_EXTENSIONS,
                       SERVICE_MOUNTING,
                       SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
                       SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL);
@@ -137,7 +142,7 @@ static bool SERVICE_STATE_WITH_CONTROL_PROCESS(ServiceState state) {
         return IN_SET(state,
                       SERVICE_CONDITION,
                       SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
-                      SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+                      SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_REFRESH_EXTENSIONS,
                       SERVICE_MOUNTING,
                       SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
                       SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
@@ -1271,7 +1276,7 @@ static void service_set_state(Service *s, ServiceState state) {
         if (!IN_SET(state,
                     SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
                     SERVICE_RUNNING,
-                    SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+                    SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_REFRESH_EXTENSIONS,
                     SERVICE_MOUNTING,
                     SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
                     SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
@@ -1299,7 +1304,7 @@ static void service_set_state(Service *s, ServiceState state) {
         if (state != SERVICE_START)
                 s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source);
 
-        if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_MOUNTING))
+        if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_REFRESH_EXTENSIONS, SERVICE_MOUNTING))
                 service_stop_watchdog(s);
 
         if (state != SERVICE_MOUNTING) /* Just in case */
@@ -1344,6 +1349,7 @@ static usec_t service_coldplug_timeout(Service *s) {
         case SERVICE_RELOAD:
         case SERVICE_RELOAD_SIGNAL:
         case SERVICE_RELOAD_NOTIFY:
+        case SERVICE_REFRESH_EXTENSIONS:
         case SERVICE_MOUNTING:
                 return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_start_usec);
 
@@ -1410,7 +1416,7 @@ static int service_coldplug(Unit *u) {
                     SERVICE_DEAD_RESOURCES_PINNED))
                 (void) unit_setup_exec_runtime(u);
 
-        if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_MOUNTING))
+        if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_REFRESH_EXTENSIONS, SERVICE_MOUNTING))
                 service_start_watchdog(s);
 
         if (UNIT_ISSET(s->accept_socket)) {
@@ -2676,7 +2682,7 @@ static void service_enter_reload_by_notify(Service *s) {
                 log_unit_warning(UNIT(s), "Failed to schedule propagation of reload, ignoring: %s", bus_error_message(&error, r));
 }
 
-static void service_enter_reload(Service *s) {
+static void service_enter_reload_signal_exec(Service *s) {
         bool killed = false;
         int r;
 
@@ -2738,6 +2744,126 @@ fail:
         service_enter_running(s, SERVICE_SUCCESS);
 }
 
+static bool service_should_reload_extensions(Service *s) {
+        int r;
+
+        assert(s);
+
+        if (!pidref_is_set(&s->main_pid)) {
+                log_unit_debug(UNIT(s), "Not reloading extensions for service without main PID.");
+                return false;
+        }
+
+        r = exec_context_has_vpicked_extensions(&s->exec_context);
+        if (r < 0)
+                log_unit_warning_errno(UNIT(s), r, "Failed to determine if service should reload extensions, assuming false: %m");
+        if (r == 0)
+                log_unit_debug(UNIT(s), "Service has no extensions to reload.");
+        if (r <= 0)
+                return false;
+
+        // TODO: Add support for user services, which can use ExtensionDirectories= + notify-reload.
+        // For now, skip for user services.
+        if (!MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
+                log_once(LOG_WARNING, "Not reloading extensions for user services.");
+                return false;
+        }
+
+        return true;
+}
+
+static void service_enter_refresh_extensions(Service *s) {
+        _cleanup_(pidref_done) PidRef worker = PIDREF_NULL;
+        int r;
+
+        assert(s);
+
+        /* If we don't have extensions to reload, immediately go to the signal step */
+        if (!service_should_reload_extensions(s))
+                return (void) service_enter_reload_signal_exec(s);
+
+        service_unwatch_control_pid(s);
+        s->reload_result = SERVICE_SUCCESS;
+        s->control_command = NULL;
+        s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+
+        /* Given we are running from PID1, avoid doing potentially heavy I/O operations like opening images
+         * directly, and instead fork a worker process. */
+        r = unit_fork_helper_process(UNIT(s), "(sd-refresh-extensions)", /* into_cgroup= */ false, &worker);
+        if (r < 0) {
+                log_unit_error_errno(UNIT(s), r, "Failed to fork process to refresh extensions in unit's namespace: %m");
+                goto fail;
+        }
+        if (r == 0) {
+                PidRef *unit_pid = &s->main_pid;
+                assert(pidref_is_set(unit_pid));
+
+                _cleanup_free_ char *propagate_dir = path_join("/run/systemd/propagate/", UNIT(s)->id);
+                if (!propagate_dir) {
+                        log_unit_error_errno(UNIT(s), -ENOMEM, "Failed to allocate memory for propagate directory: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                NamespaceParameters p = {
+                        .private_namespace_dir = "/run/systemd",
+                        .incoming_dir = "/run/systemd/incoming",
+                        .propagate_dir = propagate_dir,
+                        .runtime_scope = UNIT(s)->manager->runtime_scope,
+                        .extension_images = s->exec_context.extension_images,
+                        .n_extension_images = s->exec_context.n_extension_images,
+                        .extension_directories = s->exec_context.extension_directories,
+                        .extension_image_policy = s->exec_context.extension_image_policy,
+                };
+
+                /* Only reload confext, and not sysext as they also typically contain the executable(s) used
+                 * by the service and a simply reload cannot meaningfully handle that. */
+                r = refresh_extensions_in_namespace(
+                                unit_pid,
+                                "SYSTEMD_CONFEXT_HIERARCHIES",
+                                &p);
+                if (r < 0)
+                        log_unit_error_errno(UNIT(s), r, "Failed to refresh extensions in unit's namespace: %m");
+                else
+                        log_unit_debug(UNIT(s), "Refreshed extensions in unit's namespace");
+
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+        }
+
+        r = unit_watch_pidref(UNIT(s), &worker, /* exclusive= */ true);
+        if (r < 0) {
+                log_unit_warning_errno(UNIT(s), r, "Failed to watch extensions refresh helper process: %m");
+                goto fail;
+        }
+
+        s->control_pid = TAKE_PIDREF(worker);
+        service_set_state(s, SERVICE_REFRESH_EXTENSIONS);
+        return;
+
+fail:
+        s->reload_result = SERVICE_FAILURE_RESOURCES;
+        service_enter_running(s, SERVICE_SUCCESS);
+}
+
+static void service_enter_reload_mounting(Service *s) {
+        int r;
+
+        assert(s);
+
+        usec_t ts = now(CLOCK_MONOTONIC);
+
+        r = service_arm_timer(s, /* relative= */ true, s->timeout_start_usec);
+        if (r < 0) {
+                log_unit_warning_errno(UNIT(s), r, "Failed to install timer: %m");
+                s->reload_result = SERVICE_FAILURE_RESOURCES;
+                service_enter_running(s, SERVICE_SUCCESS);
+                return;
+        }
+
+        s->reload_begin_usec = ts;
+
+        service_enter_refresh_extensions(s);
+}
+
 static void service_run_next_control(Service *s) {
         usec_t timeout;
         int r;
@@ -2922,9 +3048,11 @@ static int service_stop(Unit *u) {
                 return 0;
 
         case SERVICE_MOUNTING:
-                service_kill_control_process(s);
                 service_live_mount_finish(s, SERVICE_FAILURE_PROTOCOL, BUS_ERROR_UNIT_INACTIVE);
                 _fallthrough_;
+        case SERVICE_REFRESH_EXTENSIONS:
+                service_kill_control_process(s);
+                _fallthrough_;
         case SERVICE_CONDITION:
         case SERVICE_START_PRE:
         case SERVICE_START:
@@ -2963,7 +3091,8 @@ static int service_reload(Unit *u) {
 
         assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
 
-        service_enter_reload(s);
+        service_enter_reload_mounting(s);
+
         return 1;
 }
 
@@ -3959,6 +4088,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                                 case SERVICE_RELOAD:
                                 case SERVICE_RELOAD_SIGNAL:
                                 case SERVICE_RELOAD_NOTIFY:
+                                case SERVICE_REFRESH_EXTENSIONS:
                                 case SERVICE_MOUNTING:
                                         /* If neither main nor control processes are running then the current
                                          * state can never exit cleanly, hence immediately terminate the
@@ -4180,6 +4310,11 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                                         service_enter_running(s, SERVICE_SUCCESS);
                                 break;
 
+                        case SERVICE_REFRESH_EXTENSIONS:
+                                /* Remounting extensions asynchronously done, proceed to signal */
+                                service_enter_reload_signal_exec(s);
+                                break;
+
                         case SERVICE_MOUNTING:
                                 service_live_mount_finish(s, f, SD_BUS_ERROR_FAILED);
 
@@ -4278,6 +4413,7 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
         case SERVICE_RELOAD:
         case SERVICE_RELOAD_SIGNAL:
         case SERVICE_RELOAD_NOTIFY:
+        case SERVICE_REFRESH_EXTENSIONS:
                 log_unit_warning(UNIT(s), "Reload operation timed out. Killing reload process.");
                 service_kill_control_process(s);
                 s->reload_result = SERVICE_FAILURE_TIMEOUT;
@@ -4612,7 +4748,7 @@ static void service_notify_message(
         r = service_notify_message_parse_new_pid(u, tags, fds, &new_main_pid);
         if (r > 0 &&
             IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING,
-                             SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+                             SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_REFRESH_EXTENSIONS,
                              SERVICE_STOP, SERVICE_STOP_SIGTERM) &&
             (!s->main_pid_known || !pidref_equal(&new_main_pid, &s->main_pid))) {
 
@@ -4649,7 +4785,7 @@ static void service_notify_message(
         if (strv_contains(tags, "STOPPING=1")) {
                 s->notify_state = NOTIFY_STOPPING;
 
-                if (IN_SET(s->state, SERVICE_RUNNING, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY))
+                if (IN_SET(s->state, SERVICE_RUNNING, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_REFRESH_EXTENSIONS))
                         service_enter_stop_by_notify(s);
 
                 notify_dbus = true;
@@ -4671,7 +4807,7 @@ static void service_notify_message(
                                 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
 
                                 /* Propagate a reload explicitly for plain RELOADING=1 (semantically equivalent to
-                                 * service_enter_reload_by_notify() call in below) */
+                                 * service_enter_reload_mounting() call in below) */
                                 r = manager_propagate_reload(UNIT(s)->manager, UNIT(s), JOB_FAIL, &error);
                                 if (r < 0)
                                         log_unit_warning(UNIT(s), "Failed to schedule propagation of reload, ignoring: %s",
@@ -4955,6 +5091,7 @@ static bool pick_up_pid_from_bus_name(Service *s) {
                        SERVICE_RELOAD,
                        SERVICE_RELOAD_SIGNAL,
                        SERVICE_RELOAD_NOTIFY,
+                       SERVICE_REFRESH_EXTENSIONS,
                        SERVICE_MOUNTING);
 }
 
@@ -5139,6 +5276,7 @@ static bool service_needs_console(Unit *u) {
                       SERVICE_RELOAD,
                       SERVICE_RELOAD_SIGNAL,
                       SERVICE_RELOAD_NOTIFY,
+                      SERVICE_REFRESH_EXTENSIONS,
                       SERVICE_MOUNTING,
                       SERVICE_STOP,
                       SERVICE_STOP_WATCHDOG,
index 7d04e23cd24fe4ca11877817db9d7cc55a797e45..1aadae0832b7b703a353506ff513ec4c48c34e61 100644 (file)
@@ -4289,6 +4289,7 @@ int verity_dissect_and_mount(
                 const ImagePolicy *image_policy,
                 const ImageFilter *image_filter,
                 const ExtensionReleaseData *extension_release_data,
+                ImageClass required_class,
                 VeritySettings *verity,
                 DissectedImage **ret_image) {
 
@@ -4400,15 +4401,19 @@ int verity_dissect_and_mount(
          * extension-release.d/ content. Return -EINVAL if there's any mismatch.
          * First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
          * available, or else fallback to VERSION_ID. If neither is present (eg: rolling release),
-         * then a simple match on the ID will be performed. */
-        if (extension_release_data && extension_release_data->os_release_id) {
+         * then a simple match on the ID will be performed. Also if an extension class was specified,
+         * check that it matches or return ENOCSI (which looks like error-no-class if one squints enough). */
+        if ((extension_release_data && extension_release_data->os_release_id) || required_class >= 0) {
                 _cleanup_strv_free_ char **extension_release = NULL;
                 ImageClass class = IMAGE_SYSEXT;
 
                 assert(!isempty(extension_release_data->os_release_id));
 
-                r = load_extension_release_pairs(dest, IMAGE_SYSEXT, dissected_image->image_name, relax_extension_release_check, &extension_release);
+                r = load_extension_release_pairs(dest, required_class >= 0 ? required_class : IMAGE_SYSEXT, dissected_image->image_name, relax_extension_release_check, &extension_release);
                 if (r == -ENOENT) {
+                        if (required_class >= 0)
+                                return log_debug_errno(SYNTHETIC_ERRNO(ENOCSI), "Image %s extension-release metadata does not match the expected class", dissected_image->image_name);
+
                         r = load_extension_release_pairs(dest, IMAGE_CONFEXT, dissected_image->image_name, relax_extension_release_check, &extension_release);
                         if (r >= 0)
                                 class = IMAGE_CONFEXT;
@@ -4416,18 +4421,20 @@ int verity_dissect_and_mount(
                 if (r < 0)
                         return log_debug_errno(r, "Failed to parse image %s extension-release metadata: %m", dissected_image->image_name);
 
-                r = extension_release_validate(
-                                dissected_image->image_name,
-                                extension_release_data->os_release_id,
-                                extension_release_data->os_release_version_id,
-                                class == IMAGE_SYSEXT ? extension_release_data->os_release_sysext_level : extension_release_data->os_release_confext_level,
-                                extension_release_data->os_release_extension_scope,
-                                extension_release,
-                                class);
-                if (r == 0)
-                        return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Image %s extension-release metadata does not match the root's", dissected_image->image_name);
-                if (r < 0)
-                        return log_debug_errno(r, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image->image_name);
+                if (extension_release_data && !isempty(extension_release_data->os_release_id)) {
+                        r = extension_release_validate(
+                                        dissected_image->image_name,
+                                        extension_release_data->os_release_id,
+                                        extension_release_data->os_release_version_id,
+                                        class == IMAGE_SYSEXT ? extension_release_data->os_release_sysext_level : extension_release_data->os_release_confext_level,
+                                        extension_release_data->os_release_extension_scope,
+                                        extension_release,
+                                        class);
+                        if (r == 0)
+                                return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Image %s extension-release metadata does not match the root's", dissected_image->image_name);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image->image_name);
+                }
         }
 
         r = dissected_image_relinquish(dissected_image);
index 53b56917b3141eee2ad9643adef33ee9fe55fe1b..191bfb8fbb73aa31238ea7c16cc6a7036941cbc6 100644 (file)
@@ -255,7 +255,7 @@ bool dissected_image_verity_sig_ready(const DissectedImage *image, PartitionDesi
 
 int mount_image_privately_interactively(const char *path, const ImagePolicy *image_policy, DissectImageFlags flags, char **ret_directory, int *ret_dir_fd, LoopDevice **ret_loop_device);
 
-int verity_dissect_and_mount(int src_fd, const char *src, const char *dest, const MountOptions *options, const ImagePolicy *image_policy, const ImageFilter *image_filter, const ExtensionReleaseData *required_release_data, VeritySettings *verity, DissectedImage **ret_image);
+int verity_dissect_and_mount(int src_fd, const char *src, const char *dest, const MountOptions *options, const ImagePolicy *image_policy, const ImageFilter *image_filter, const ExtensionReleaseData *required_release_data, ImageClass required_class, VeritySettings *verity, DissectedImage **ret_image);
 
 int dissect_fstype_ok(const char *fstype);
 
index 90912dde4f8572fd181ced57e37f65bf596f3d7e..10868be08ebba81f63dfec1b706c6650da6f1673 100644 (file)
@@ -972,6 +972,7 @@ static int mount_in_namespace_legacy(
                                 image_policy,
                                 /* image_filter= */ NULL,
                                 /* extension_release_data= */ NULL,
+                                /* required_class= */ _IMAGE_CLASS_INVALID,
                                 /* verity= */ NULL,
                                 /* ret_image= */ NULL);
         else
@@ -1193,6 +1194,7 @@ static int mount_in_namespace(
                                 image_policy,
                                 /* image_filter= */ NULL,
                                 /* extension_release_data= */ NULL,
+                                /* required_class= */ _IMAGE_CLASS_INVALID,
                                 /* verity= */ NULL,
                                 &img);
                 if (r < 0)