]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
liveupdate: luo_session: add ioctls for file preservation
authorPasha Tatashin <pasha.tatashin@soleen.com>
Tue, 25 Nov 2025 16:58:37 +0000 (11:58 -0500)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 27 Nov 2025 22:24:39 +0000 (14:24 -0800)
Introducing the userspace interface and internal logic required to manage
the lifecycle of file descriptors within a session.  Previously, a session
was merely a container; this change makes it a functional management unit.

The following capabilities are added:

A new set of ioctl commands are added, which operate on the file
descriptor returned by CREATE_SESSION. This allows userspace to:
- LIVEUPDATE_SESSION_PRESERVE_FD: Add a file descriptor to a session
  to be preserved across the live update.
- LIVEUPDATE_SESSION_RETRIEVE_FD: Retrieve a preserved file in the
  new kernel using its unique token.
- LIVEUPDATE_SESSION_FINISH: finish session

The session's .release handler is enhanced to be state-aware.  When a
session's file descriptor is closed, it correctly unpreserves the session
based on its current state before freeing all associated file resources.

Link: https://lkml.kernel.org/r/20251125165850.3389713-8-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: David Matlack <dmatlack@google.com>
Cc: Aleksander Lobakin <aleksander.lobakin@intel.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: anish kumar <yesanishhere@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chanwoo Choi <cw00.choi@samsung.com>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Daniel Wagner <wagi@kernel.org>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Jeffery <djeffery@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guixin Liu <kanie@linux.alibaba.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Matthew Maurer <mmaurer@google.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Myugnjoo Ham <myungjoo.ham@samsung.com>
Cc: Parav Pandit <parav@nvidia.com>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Song Liu <song@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Stuart Hayes <stuart.w.hayes@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: William Tu <witu@nvidia.com>
Cc: Yoann Congal <yoann.congal@smile.fr>
Cc: Zhu Yanjun <yanjun.zhu@linux.dev>
Cc: Zijun Hu <quic_zijuhu@quicinc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/uapi/linux/liveupdate.h
kernel/liveupdate/luo_session.c

index 1183cf984b5f891a551e0630b6df816e62d2e4a8..30bc66ee9436a0a052ce038a6418c2998bcd6ace 100644 (file)
@@ -53,6 +53,14 @@ enum {
        LIVEUPDATE_CMD_RETRIEVE_SESSION = 0x01,
 };
 
+/* ioctl commands for session file descriptors */
+enum {
+       LIVEUPDATE_CMD_SESSION_BASE = 0x40,
+       LIVEUPDATE_CMD_SESSION_PRESERVE_FD = LIVEUPDATE_CMD_SESSION_BASE,
+       LIVEUPDATE_CMD_SESSION_RETRIEVE_FD = 0x41,
+       LIVEUPDATE_CMD_SESSION_FINISH = 0x42,
+};
+
 /**
  * struct liveupdate_ioctl_create_session - ioctl(LIVEUPDATE_IOCTL_CREATE_SESSION)
  * @size:      Input; sizeof(struct liveupdate_ioctl_create_session)
@@ -110,4 +118,99 @@ struct liveupdate_ioctl_retrieve_session {
 #define LIVEUPDATE_IOCTL_RETRIEVE_SESSION \
        _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_RETRIEVE_SESSION)
 
+/* Session specific IOCTLs */
+
+/**
+ * struct liveupdate_session_preserve_fd - ioctl(LIVEUPDATE_SESSION_PRESERVE_FD)
+ * @size:  Input; sizeof(struct liveupdate_session_preserve_fd)
+ * @fd:    Input; The user-space file descriptor to be preserved.
+ * @token: Input; An opaque, unique token for preserved resource.
+ *
+ * Holds parameters for preserving a file descriptor.
+ *
+ * User sets the @fd field identifying the file descriptor to preserve
+ * (e.g., memfd, kvm, iommufd, VFIO). The kernel validates if this FD type
+ * and its dependencies are supported for preservation. If validation passes,
+ * the kernel marks the FD internally and *initiates the process* of preparing
+ * its state for saving. The actual snapshotting of the state typically occurs
+ * during the subsequent %LIVEUPDATE_IOCTL_PREPARE execution phase, though
+ * some finalization might occur during freeze.
+ * On successful validation and initiation, the kernel uses the @token
+ * field with an opaque identifier representing the resource being preserved.
+ * This token confirms the FD is targeted for preservation and is required for
+ * the subsequent %LIVEUPDATE_SESSION_RETRIEVE_FD call after the live update.
+ *
+ * Return: 0 on success (validation passed, preservation initiated), negative
+ * error code on failure (e.g., unsupported FD type, dependency issue,
+ * validation failed).
+ */
+struct liveupdate_session_preserve_fd {
+       __u32           size;
+       __s32           fd;
+       __aligned_u64   token;
+};
+
+#define LIVEUPDATE_SESSION_PRESERVE_FD                                 \
+       _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_PRESERVE_FD)
+
+/**
+ * struct liveupdate_session_retrieve_fd - ioctl(LIVEUPDATE_SESSION_RETRIEVE_FD)
+ * @size:  Input; sizeof(struct liveupdate_session_retrieve_fd)
+ * @fd:    Output; The new file descriptor representing the fully restored
+ *         kernel resource.
+ * @token: Input; An opaque, token that was used to preserve the resource.
+ *
+ * Retrieve a previously preserved file descriptor.
+ *
+ * User sets the @token field to the value obtained from a successful
+ * %LIVEUPDATE_IOCTL_FD_PRESERVE call before the live update. On success,
+ * the kernel restores the state (saved during the PREPARE/FREEZE phases)
+ * associated with the token and populates the @fd field with a new file
+ * descriptor referencing the restored resource in the current (new) kernel.
+ * This operation must be performed *before* signaling completion via
+ * %LIVEUPDATE_IOCTL_FINISH.
+ *
+ * Return: 0 on success, negative error code on failure (e.g., invalid token).
+ */
+struct liveupdate_session_retrieve_fd {
+       __u32           size;
+       __s32           fd;
+       __aligned_u64   token;
+};
+
+#define LIVEUPDATE_SESSION_RETRIEVE_FD                                 \
+       _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_RETRIEVE_FD)
+
+/**
+ * struct liveupdate_session_finish - ioctl(LIVEUPDATE_SESSION_FINISH)
+ * @size:     Input; sizeof(struct liveupdate_session_finish)
+ * @reserved: Input; Must be zero. Reserved for future use.
+ *
+ * Signals the completion of the restoration process for a retrieved session.
+ * This is the final operation that should be performed on a session file
+ * descriptor after a live update.
+ *
+ * This ioctl must be called once all required file descriptors for the session
+ * have been successfully retrieved (using %LIVEUPDATE_SESSION_RETRIEVE_FD) and
+ * are fully restored from the userspace and kernel perspective.
+ *
+ * Upon success, the kernel releases its ownership of the preserved resources
+ * associated with this session. This allows internal resources to be freed,
+ * typically by decrementing reference counts on the underlying preserved
+ * objects.
+ *
+ * If this operation fails, the resources remain preserved in memory. Userspace
+ * may attempt to call finish again. The resources will otherwise be reset
+ * during the next live update cycle.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+struct liveupdate_session_finish {
+       __u32           size;
+       __u32           reserved;
+};
+
+#define LIVEUPDATE_SESSION_FINISH                                      \
+       _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_FINISH)
+
 #endif /* _UAPI_LIVEUPDATE_H */
index 3a031446d3a4895f16742ae2ca26646886dc53f0..dbdbc3bd7929dd8c4e21079810bd4dd9e09e2bec 100644 (file)
@@ -125,6 +125,8 @@ static struct luo_session *luo_session_alloc(const char *name)
                return ERR_PTR(-ENOMEM);
 
        strscpy(session->name, name, sizeof(session->name));
+       INIT_LIST_HEAD(&session->file_set.files_list);
+       luo_file_set_init(&session->file_set);
        INIT_LIST_HEAD(&session->list);
        mutex_init(&session->mutex);
 
@@ -133,6 +135,7 @@ static struct luo_session *luo_session_alloc(const char *name)
 
 static void luo_session_free(struct luo_session *session)
 {
+       luo_file_set_destroy(&session->file_set);
        mutex_destroy(&session->mutex);
        kfree(session);
 }
@@ -177,16 +180,46 @@ static void luo_session_remove(struct luo_session_header *sh,
        sh->count--;
 }
 
+static int luo_session_finish_one(struct luo_session *session)
+{
+       guard(mutex)(&session->mutex);
+       return luo_file_finish(&session->file_set);
+}
+
+static void luo_session_unfreeze_one(struct luo_session *session,
+                                    struct luo_session_ser *ser)
+{
+       guard(mutex)(&session->mutex);
+       luo_file_unfreeze(&session->file_set, &ser->file_set_ser);
+}
+
+static int luo_session_freeze_one(struct luo_session *session,
+                                 struct luo_session_ser *ser)
+{
+       guard(mutex)(&session->mutex);
+       return luo_file_freeze(&session->file_set, &ser->file_set_ser);
+}
+
 static int luo_session_release(struct inode *inodep, struct file *filep)
 {
        struct luo_session *session = filep->private_data;
        struct luo_session_header *sh;
 
        /* If retrieved is set, it means this session is from incoming list */
-       if (session->retrieved)
+       if (session->retrieved) {
+               int err = luo_session_finish_one(session);
+
+               if (err) {
+                       pr_warn("Unable to finish session [%s] on release\n",
+                               session->name);
+                       return err;
+               }
                sh = &luo_session_global.incoming;
-       else
+       } else {
+               scoped_guard(mutex, &session->mutex)
+                       luo_file_unpreserve_files(&session->file_set);
                sh = &luo_session_global.outgoing;
+       }
 
        luo_session_remove(sh, session);
        luo_session_free(session);
@@ -194,9 +227,140 @@ static int luo_session_release(struct inode *inodep, struct file *filep)
        return 0;
 }
 
+static int luo_session_preserve_fd(struct luo_session *session,
+                                  struct luo_ucmd *ucmd)
+{
+       struct liveupdate_session_preserve_fd *argp = ucmd->cmd;
+       int err;
+
+       guard(mutex)(&session->mutex);
+       err = luo_preserve_file(&session->file_set, argp->token, argp->fd);
+       if (err)
+               return err;
+
+       err = luo_ucmd_respond(ucmd, sizeof(*argp));
+       if (err)
+               pr_warn("The file was successfully preserved, but response to user failed\n");
+
+       return err;
+}
+
+static int luo_session_retrieve_fd(struct luo_session *session,
+                                  struct luo_ucmd *ucmd)
+{
+       struct liveupdate_session_retrieve_fd *argp = ucmd->cmd;
+       struct file *file;
+       int err;
+
+       argp->fd = get_unused_fd_flags(O_CLOEXEC);
+       if (argp->fd < 0)
+               return argp->fd;
+
+       guard(mutex)(&session->mutex);
+       err = luo_retrieve_file(&session->file_set, argp->token, &file);
+       if (err < 0)
+               goto  err_put_fd;
+
+       err = luo_ucmd_respond(ucmd, sizeof(*argp));
+       if (err)
+               goto err_put_file;
+
+       fd_install(argp->fd, file);
+
+       return 0;
+
+err_put_file:
+       fput(file);
+err_put_fd:
+       put_unused_fd(argp->fd);
+
+       return err;
+}
+
+static int luo_session_finish(struct luo_session *session,
+                             struct luo_ucmd *ucmd)
+{
+       struct liveupdate_session_finish *argp = ucmd->cmd;
+       int err = luo_session_finish_one(session);
+
+       if (err)
+               return err;
+
+       return luo_ucmd_respond(ucmd, sizeof(*argp));
+}
+
+union ucmd_buffer {
+       struct liveupdate_session_finish finish;
+       struct liveupdate_session_preserve_fd preserve;
+       struct liveupdate_session_retrieve_fd retrieve;
+};
+
+struct luo_ioctl_op {
+       unsigned int size;
+       unsigned int min_size;
+       unsigned int ioctl_num;
+       int (*execute)(struct luo_session *session, struct luo_ucmd *ucmd);
+};
+
+#define IOCTL_OP(_ioctl, _fn, _struct, _last)                                  \
+       [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_SESSION_BASE] = {                    \
+               .size = sizeof(_struct) +                                      \
+                       BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) <          \
+                                         sizeof(_struct)),                    \
+               .min_size = offsetofend(_struct, _last),                       \
+               .ioctl_num = _ioctl,                                           \
+               .execute = _fn,                                                \
+       }
+
+static const struct luo_ioctl_op luo_session_ioctl_ops[] = {
+       IOCTL_OP(LIVEUPDATE_SESSION_FINISH, luo_session_finish,
+                struct liveupdate_session_finish, reserved),
+       IOCTL_OP(LIVEUPDATE_SESSION_PRESERVE_FD, luo_session_preserve_fd,
+                struct liveupdate_session_preserve_fd, token),
+       IOCTL_OP(LIVEUPDATE_SESSION_RETRIEVE_FD, luo_session_retrieve_fd,
+                struct liveupdate_session_retrieve_fd, token),
+};
+
+static long luo_session_ioctl(struct file *filep, unsigned int cmd,
+                             unsigned long arg)
+{
+       struct luo_session *session = filep->private_data;
+       const struct luo_ioctl_op *op;
+       struct luo_ucmd ucmd = {};
+       union ucmd_buffer buf;
+       unsigned int nr;
+       int ret;
+
+       nr = _IOC_NR(cmd);
+       if (nr < LIVEUPDATE_CMD_SESSION_BASE || (nr - LIVEUPDATE_CMD_SESSION_BASE) >=
+           ARRAY_SIZE(luo_session_ioctl_ops)) {
+               return -EINVAL;
+       }
+
+       ucmd.ubuffer = (void __user *)arg;
+       ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
+       if (ret)
+               return ret;
+
+       op = &luo_session_ioctl_ops[nr - LIVEUPDATE_CMD_SESSION_BASE];
+       if (op->ioctl_num != cmd)
+               return -ENOIOCTLCMD;
+       if (ucmd.user_size < op->min_size)
+               return -EINVAL;
+
+       ucmd.cmd = &buf;
+       ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
+                                   ucmd.user_size);
+       if (ret)
+               return ret;
+
+       return op->execute(session, &ucmd);
+}
+
 static const struct file_operations luo_session_fops = {
        .owner = THIS_MODULE,
        .release = luo_session_release,
+       .unlocked_ioctl = luo_session_ioctl,
 };
 
 /* Create a "struct file" for session */
@@ -392,6 +556,11 @@ int luo_session_deserialize(void)
                        luo_session_free(session);
                        return err;
                }
+
+               scoped_guard(mutex, &session->mutex) {
+                       luo_file_deserialize(&session->file_set,
+                                            &sh->ser[i].file_set_ser);
+               }
        }
 
        kho_restore_free(sh->header_ser);
@@ -406,9 +575,14 @@ int luo_session_serialize(void)
        struct luo_session_header *sh = &luo_session_global.outgoing;
        struct luo_session *session;
        int i = 0;
+       int err;
 
        guard(rwsem_write)(&sh->rwsem);
        list_for_each_entry(session, &sh->list, list) {
+               err = luo_session_freeze_one(session, &sh->ser[i]);
+               if (err)
+                       goto err_undo;
+
                strscpy(sh->ser[i].name, session->name,
                        sizeof(sh->ser[i].name));
                i++;
@@ -416,6 +590,15 @@ int luo_session_serialize(void)
        sh->header_ser->count = sh->count;
 
        return 0;
+
+err_undo:
+       list_for_each_entry_continue_reverse(session, &sh->list, list) {
+               i--;
+               luo_session_unfreeze_one(session, &sh->ser[i]);
+               memset(sh->ser[i].name, 0, sizeof(sh->ser[i].name));
+       }
+
+       return err;
 }
 
 /**