]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: support FD Store preservation through kexec via LUO
authorLuca Boccassi <luca.boccassi@gmail.com>
Fri, 1 May 2026 13:25:11 +0000 (14:25 +0100)
committerLuca Boccassi <luca.boccassi@gmail.com>
Fri, 15 May 2026 12:46:08 +0000 (13:46 +0100)
The kernel Live Update Orchestrator (LUO) exposes /dev/liveupdate, which
allows userspace to hand a set of "preservable" kernel objects to the
new kernel across a kexec-based reboot. For now it only supports memfds,
with more object types (virtio devices, etc.) expected to be added later.

This is a natural fit for systemd's FD Store feature: services hand
memfds (containing serialized state or other service data) to PID 1 via
FDSTORE=1 sd_notify() messages, and get them back on their next start.
Today this works across service restarts, soft reboots and
initrd→rootfs transitions. With LUO we can extend the same mechanism to
work across kexec, too.

The protocol on the PID 1 side works roughly as follows:

 * All preservable fds are collected into a single LUO session named
   "systemd". Each FD gets uploaded with a token. Token 0 in that session
   is reserved for a "mapping" memfd, which carries a JSON object
   describing how to dispatch the other tokens back to units on the next
   boot:

       {
         "foo.service": [
           { "type": "fd", "name": "stateA", "token": 1 },
           { "type": "fd", "name": "stateB", "token": 2 }
         ],
         ...
       }

   unit IDs are used as the unit identifier, as they're stable
   across daemon-reexec, switch-root and kexec. token refers to the
   LUO token assigned to the object in the session.

 * On shutdown for MANAGER_KEXEC, just before manager_free(), systemd
   walks all services and serializes their persistent fd store contents
   (fds + FDNAMEs + unit IDs) into a JSON memfd. The FDs themselves are
   gathered into a FDSet to be kept around. The fdset and the
   serialization memfd are passed to systemd-shutdown via the
   SYSTEMD_LUO_SERIALIZE_FD environment variable providing the fd number,
   so the actual LUO session creation and ioctls can happen as the very
   last step before kexec (shutdown implementation is the next commit).

 * On boot, manager_luo_restore_fd_stores() opens /dev/liveupdate,
   tries to retrieve the "systemd" session, reads the mapping memfd,
   then for each entry retrieves the fd from the session and attempts
   to attach it to the matching unit's fd store.

 * The FDs are injected in the appropriate unit's FD stores using the
   same mechanism as the LISTEN_FDS propagation that was set up earlier.

Non-kexec shutdown paths are unaffected: if MANAGER_KEXEC is not the
final objective, no serialization file is produced and no LUO session
is ever created. Likewise if /dev/liveupdate does not exist, nothing
happens.

src/core/luo.c [new file with mode: 0644]
src/core/luo.h [new file with mode: 0644]
src/core/main.c
src/core/manager.c
src/core/meson.build
src/shared/luo-util.h

diff --git a/src/core/luo.c b/src/core/luo.c
new file mode 100644 (file)
index 0000000..4b5632c
--- /dev/null
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-json.h"
+
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "fileio.h"
+#include "json-util.h"
+#include "log.h"
+#include "luo.h"
+#include "luo-util.h"
+#include "manager.h"
+#include "serialize.h"
+#include "service.h"
+#include "unit.h"
+#include "unit-name.h"
+
+static int luo_read_mapping(int session_fd, sd_json_variant **ret) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL;
+        _cleanup_close_ int mapping_fd = -EBADF;
+        _cleanup_fclose_ FILE *f = NULL;
+        int r;
+
+        assert(session_fd >= 0);
+        assert(ret);
+
+        mapping_fd = luo_session_retrieve_fd(session_fd, LUO_MAPPING_INDEX);
+        if (mapping_fd < 0)
+                return log_warning_errno(mapping_fd, "Failed to retrieve LUO mapping fd (fd_index 0): %m");
+
+        r = fdopen_independent(mapping_fd, "r", &f);
+        if (r < 0)
+                return log_warning_errno(r, "Failed to open LUO mapping fd for reading: %m");
+
+        r = sd_json_parse_file(f, "luo-mapping", SD_JSON_PARSE_MUST_BE_OBJECT, &v, /* reterr_line= */ NULL, /* reterr_column= */ NULL);
+        if (r < 0)
+                return log_warning_errno(r, "Failed to parse LUO mapping JSON: %m");
+
+        *ret = TAKE_PTR(v);
+        return 0;
+}
+
+static void luo_session_finishp(int *fd) {
+        assert(fd);
+
+        if (*fd >= 0)
+                (void) luo_session_finish(*fd);
+        safe_close(*fd);
+}
+
+int manager_luo_restore_fd_stores(Manager *m) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *mapping = NULL;
+        _cleanup_close_ int device_fd = -EBADF;
+        _cleanup_(luo_session_finishp) int session_fd = -EBADF;
+        const char *unit_id;
+        sd_json_variant *fds_json;
+        int r, n_total = 0;
+
+        assert(m);
+
+        if (MANAGER_IS_USER(m))
+                return 0;
+
+        device_fd = luo_open_device();
+        if (ERRNO_IS_NEG_DEVICE_ABSENT(device_fd)) {
+                log_debug_errno(device_fd, "No /dev/liveupdate device found, skipping LUO fd store restoration.");
+                return 0;
+        }
+        if (device_fd < 0)
+                return log_warning_errno(device_fd, "Failed to open /dev/liveupdate: %m");
+
+        session_fd = luo_retrieve_session(device_fd, LUO_SESSION_NAME);
+        if (session_fd == -ENOENT) {
+                log_debug("No LUO session '%s' found, skipping fd store restoration.", LUO_SESSION_NAME);
+                return 0;
+        }
+        if (session_fd < 0)
+                return log_warning_errno(session_fd, "Failed to retrieve LUO session '%s': %m", LUO_SESSION_NAME);
+
+        log_debug("Found LUO session '%s', restoring fd stores.", LUO_SESSION_NAME);
+
+        r = luo_read_mapping(session_fd, &mapping);
+        if (r < 0)
+                return r;
+
+        /* Retrieve all fds from the session and dispatch each to the named unit, eagerly loading the
+         * unit if necessary. */
+        JSON_VARIANT_OBJECT_FOREACH(unit_id, fds_json, mapping) {
+                sd_json_variant *entry;
+
+                if (!unit_name_is_valid(unit_id, UNIT_NAME_ANY)) {
+                        log_warning("Invalid unit name '%s' in LUO mapping, skipping.", unit_id);
+                        continue;
+                }
+
+                if (!sd_json_variant_is_array(fds_json)) {
+                        log_warning("LUO mapping for unit '%s' is not a JSON array, skipping.", unit_id);
+                        continue;
+                }
+
+                JSON_VARIANT_ARRAY_FOREACH(entry, fds_json) {
+                        struct {
+                                const char *type;
+                                const char *name;
+                                uint64_t token;
+                        } p = {
+                                .token = UINT64_MAX,
+                        };
+
+                        static const sd_json_dispatch_field dispatch_table[] = {
+                                { "type",        SD_JSON_VARIANT_STRING,        sd_json_dispatch_const_string, voffsetof(p, type),         SD_JSON_MANDATORY },
+                                { "name",        SD_JSON_VARIANT_STRING,        sd_json_dispatch_const_string, voffsetof(p, name),         SD_JSON_MANDATORY },
+                                { "token",       _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64,       voffsetof(p, token),        0                 },
+                                {}
+                        };
+
+                        _cleanup_close_ int fd = -EBADF;
+
+                        r = sd_json_dispatch(entry, dispatch_table, SD_JSON_ALLOW_EXTENSIONS|SD_JSON_LOG|SD_JSON_WARNING, &p);
+                        if (r < 0)
+                                continue;
+
+                        if (streq(p.type, "fd")) {
+                                if (p.token == UINT64_MAX) {
+                                        log_warning("LUO mapping for unit '%s' fd '%s': missing 'token' field.", unit_id, p.name);
+                                        continue;
+                                }
+                                if (p.token == LUO_MAPPING_INDEX) {
+                                        log_warning("LUO mapping for unit '%s' fd '%s': token %" PRIu64 " is reserved for the mapping memfd.", unit_id, p.name, p.token);
+                                        continue;
+                                }
+
+                                fd = luo_session_retrieve_fd(session_fd, p.token);
+                                if (fd < 0) {
+                                        log_warning_errno(fd, "Failed to retrieve LUO fd for unit '%s' name '%s' token %" PRIu64 ": %m",
+                                                          unit_id, p.name, p.token);
+                                        continue;
+                                }
+                        } else {
+                                log_warning("LUO mapping for unit '%s' fd '%s': unknown type '%s', skipping.",
+                                            unit_id, p.name, p.type);
+                                continue;
+                        }
+
+                        r = manager_dispatch_external_fd_to_unit(m, unit_id, p.name, /* index= */ 0, TAKE_FD(fd), "LUO");
+                        if (r > 0)
+                                n_total++;
+                        /* On error fd is already consumed by manager_dispatch_external_fd_to_unit. */
+                }
+        }
+
+        if (n_total > 0)
+                log_debug("Restored %d fd(s) total from LUO session.", n_total);
+
+        return n_total;
+}
+
+int manager_luo_serialize_fd_stores(Manager *m, FILE **ret_f, FDSet **ret_fds) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *root = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        _cleanup_fdset_free_ FDSet *fds = NULL;
+        Unit *u;
+        int r, n_serialized = 0;
+
+        assert(m);
+        assert(ret_f);
+        assert(ret_fds);
+
+        if (MANAGER_IS_USER(m)) {
+                *ret_f = NULL;
+                *ret_fds = NULL;
+                return 0;
+        }
+
+        fds = fdset_new();
+        if (!fds)
+                return log_oom();
+
+        /* Build a JSON object: { "unit_id": [ { "type": "fd", "name": "...", "fd": N }, ... ], ... }
+         * This is passed to systemd-shutdown which will create a LUO session and preserve the fds. */
+        HASHMAP_FOREACH(u, m->units) {
+                _cleanup_(sd_json_variant_unrefp) sd_json_variant *entries = NULL;
+                Service *s;
+
+                if (u->type != UNIT_SERVICE)
+                        continue;
+
+                s = SERVICE(u);
+
+                if (s->fd_store_preserve_mode != EXEC_PRESERVE_YES)
+                        continue;
+
+                if (!s->fd_store)
+                        continue;
+
+                LIST_FOREACH(fd_store, fs, s->fd_store) {
+                        int copy;
+
+                        copy = fdset_put_dup(fds, fs->fd);
+                        if (copy < 0)
+                                return log_error_errno(copy, "Failed to duplicate fd for LUO serialization: %m");
+
+                        r = sd_json_variant_append_arraybo(
+                                        &entries,
+                                        SD_JSON_BUILD_PAIR_STRING("type", "fd"),
+                                        SD_JSON_BUILD_PAIR_STRING("name", fs->fdname),
+                                        SD_JSON_BUILD_PAIR_INTEGER("fd", copy));
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to build JSON for LUO serialization: %m");
+
+                        n_serialized++;
+                }
+
+                r = sd_json_variant_set_field(&root, u->id, entries);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to add unit to LUO serialization JSON: %m");
+        }
+
+        if (n_serialized == 0) {
+                log_debug("No fd store entries to serialize for LUO.");
+                *ret_f = NULL;
+                *ret_fds = NULL;
+                return 0;
+        }
+
+        r = open_serialization_file("luo-fd-store", &f);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create LUO serialization file: %m");
+
+        r = sd_json_variant_dump(root, /* flags= */ 0, f, /* prefix= */ NULL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to dump LUO serialization JSON: %m");
+
+        r = finish_serialization_file(f);
+        if (r < 0)
+                return log_error_errno(r, "Failed to finish LUO serialization file: %m");
+
+        r = fd_cloexec(fileno(f), false);
+        if (r < 0)
+                return log_error_errno(r, "Failed to disable O_CLOEXEC for LUO serialization: %m");
+
+        r = fdset_cloexec(fds, false);
+        if (r < 0)
+                return log_error_errno(r, "Failed to disable O_CLOEXEC for LUO serialization fds: %m");
+
+        log_info("Serialized %d fd store entries for LUO.", n_serialized);
+
+        *ret_f = TAKE_PTR(f);
+        *ret_fds = TAKE_PTR(fds);
+        return n_serialized;
+}
diff --git a/src/core/luo.h b/src/core/luo.h
new file mode 100644 (file)
index 0000000..314006c
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "core-forward.h"
+
+int manager_luo_restore_fd_stores(Manager *m);
+int manager_luo_serialize_fd_stores(Manager *m, FILE **ret_f, FDSet **ret_fds);
index 8d5ab57e3df5485e3429520c208c4ab85ba23c4e..f98d0edb2d0049fb6c807e9f23546aec0018fe59 100644 (file)
@@ -72,6 +72,7 @@
 #include "load-fragment.h"
 #include "log.h"
 #include "loopback-setup.h"
+#include "luo.h"
 #include "machine-id-setup.h"
 #include "main.h"
 #include "manager.h"
@@ -164,6 +165,7 @@ static RestrictFileSystemAccess arg_restrict_filesystem_access;
 static nsec_t arg_timer_slack_nsec;
 static Set* arg_syscall_archs;
 static FILE* arg_serialization;
+static FILE* arg_luo_serialization;
 static sd_id128_t arg_machine_id;
 static bool arg_machine_id_from_firmware = false;
 static EmergencyAction arg_cad_burst_action;
@@ -1806,6 +1808,13 @@ static int become_shutdown(int objective, int retval) {
         if (arg_minimum_uptime_usec != USEC_INFINITY)
                 (void) strv_extendf(&env_block, "MINIMUM_UPTIME_USEC=" USEC_FMT, arg_minimum_uptime_usec);
 
+        /* If we have a LUO serialization file, pass the fd to systemd-shutdown so it can
+         * preserve FD store entries across kexec via the kernel Live Update Orchestrator. */
+        if (arg_luo_serialization) {
+                log_debug("Passing LUO serialization fd to systemd-shutdown.");
+                (void) strv_extendf(&env_block, "SYSTEMD_LUO_SERIALIZE_FD=%i", fileno(arg_luo_serialization));
+        }
+
         (void) write_boot_or_shutdown_osc("shutdown");
 
         execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
@@ -3777,6 +3786,12 @@ finish:
         if (m) {
                 arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
                 arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
+
+                /* For kexec, serialize fd stores now. Services have stopped and sent
+                 * their FDs to the store, but the manager (and its fd stores) is still alive. */
+                if (r == MANAGER_KEXEC)
+                        (void) manager_luo_serialize_fd_stores(m, &arg_luo_serialization, &fds);
+
                 m = manager_free(m);
         }
 
@@ -3794,7 +3809,13 @@ finish:
                                  &error_message); /* This only returns if reexecution failed */
 
         arg_serialization = safe_fclose(arg_serialization);
-        fds = fdset_free(fds);
+
+        /* For kexec, the FDSet and LUO serialization file must survive until become_shutdown() calls
+         * execve() (CLOEXEC is already cleared on these FDs). For all other paths, free them now. */
+        if (r != MANAGER_KEXEC) {
+                fds = fdset_free(fds);
+                arg_luo_serialization = safe_fclose(arg_luo_serialization);
+        }
 
         saved_env = strv_free(saved_env);
 
index 1bd35a7d21b53e8289275d8bb9d3092ae06339ba..accf9c8ff94f142d197279a29dfd1087aa15be0e 100644 (file)
@@ -57,6 +57,7 @@
 #include "libaudit-util.h"
 #include "locale-setup.h"
 #include "log.h"
+#include "luo.h"
 #include "manager-dump.h"
 #include "manager-serialize.h"
 #include "manager.h"
@@ -2191,6 +2192,10 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, Hashmap *named_
                 if (m->previous_objective == MANAGER_SOFT_REBOOT)
                         m->soft_reboots_count++;
 
+                /* If a LUO (Live Update Orchestrator) session from a previous kexec is available, restore
+                 * preserved file descriptors into the appropriate service fd stores now, before coldplug. */
+                (void) manager_luo_restore_fd_stores(m);
+
                 /* Pick up fds passed via the LISTEN_FDS=/LISTEN_FDNAMES= protocol that are tagged with a
                  * unit id ("unit-id|fdname"), and route them into the matching unit's fd store. Untagged
                  * fds remain in 'fds' and are handed to socket units below as before. */
index 98cf02aef8879a10837638b8740400a26876869e..2bd8170c6a2eb10f7cda0002eeeeff983717ee49 100644 (file)
@@ -43,6 +43,7 @@ libcore_sources = files(
         'kill.c',
         'load-dropin.c',
         'load-fragment.c',
+        'luo.c',
         'manager-dump.c',
         'manager-serialize.c',
         'manager.c',
index 8ff70c17e0d31a1c5587caadf88af5cb897c79f8..a4f9a01223a1bc39da8aedfda431fc42b8477d23 100644 (file)
@@ -3,6 +3,25 @@
 
 #include "basic-forward.h"
 
+#define LUO_SESSION_NAME "systemd"
+
+/* Index (token) 0 in the LUO session is always the mapping memfd, which contains a JSON document mapping
+ * unit ids to arrays of fd store entries:
+ *
+ *   {
+ *     "unit-name.service": [
+ *       { "type": "fd",          "name": "fdname1", "token": 1 },
+ *       { "type": "fd",          "name": "fdname2", "token": 2 },
+ *     ],
+ *     "other-unit.service": [
+ *       { "type": "fd",          "name": "stored", "token": 3 }
+ *     ]
+ *   }
+ *
+ * type=fd:          the fd was preserved in the "systemd" LUO session with the given token.
+ */
+#define LUO_MAPPING_INDEX UINT64_C(0)
+
 int luo_open_device(void);
 int luo_create_session(int device_fd, const char *name);
 int luo_retrieve_session(int device_fd, const char *name);