]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
vmspawn: implement io.systemd.MachineInstance.ReplaceStorage
authorChristian Brauner <brauner@kernel.org>
Fri, 8 May 2026 08:52:07 +0000 (10:52 +0200)
committerChristian Brauner <brauner@kernel.org>
Tue, 12 May 2026 20:54:07 +0000 (22:54 +0200)
Wire up the runtime hot-swap Varlink method. The signature mirrors
AddStorage minus 'config': the device frontend (virtio-blk,
virtio-scsi, nvme, scsi-cd) doesn't change, only the backing file
behind it. Read-only/read-write may flip based on the new fd's
O_ACCMODE; scsi-cd is forced read-only to match the boot-time policy.

QMP sequence (entry: vmspawn_qmp_replace_block_device):

  add-fd                          → on_replace_observe_stage
  blockdev-add (new file)         → on_replace_blockdev_add_complete
  remove-fd (new fdset)           → on_replace_observe_stage
  blockdev-reopen (format)        → on_replace_blockdev_reopen_complete
                                    [commit + fire trailing del]
  blockdev-del (old file)         → on_replace_old_blockdev_del_complete

The reopen options must be a superset of every option that
qmp_build_blockdev_add_format() may emit, otherwise reopen rejects
'Cannot reset option X to default'. The 'file' field is a string
reference to the new file node — case 3 of the schema in
qemu/qapi/block-core.json:5034-5040 ("the current child is replaced
with that other node"). The format node's qmp_node_name is preserved
so the device frontend's drive=<X> binding does not move.

ReplaceCtx tracks the per-call state with a refcount mirroring the
add-stage drive-info pattern. On any pre-commit failure replace_fail
tears down whatever new-side state we created on the wire and replies
on drive->link via reply_qmp_error (disconnect → NotConnected). On
post-commit del failure we log a warning, leak the orphan, and reply
success — the swap itself succeeded and the leak resolves at VM exit.

file_generation is bumped before issuing blockdev-add so failed
attempts cannot collide on node-name when the user retries.

Errors:
  NoSuchStorage     - drive not in the registry
  StorageImmutable  - drive lacks QMP_DRIVE_REMOVABLE (boot-time)
  EBUSY             - add still pending or another replace/remove in flight
  NotConnected      - QMP transport disconnect during the chain
  EIO               - QEMU rejected blockdev-reopen

Also gates RemoveStorage on REPLACE_PENDING so a device_del cannot
race a mid-flight blockdev-reopen on the same drive.

Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
src/vmspawn/vmspawn-qmp.c
src/vmspawn/vmspawn-qmp.h
src/vmspawn/vmspawn-varlink.c

index d2cbb67e79ebb26ac0e8d7b6c9c74879163e47c5..7dc58a8ccb1d2f7cf736bcb653830698b595d8a9 100644 (file)
@@ -906,7 +906,6 @@ int vmspawn_qmp_add_block_device(VmspawnQmpBridge *bridge, DriveInfo *drive) {
                 return log_oom();
         if (asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", drive->counter) < 0)
                 return log_oom();
-        drive->file_generation = 0;
         if (asprintf(&drive->qmp_file_node_name, "vmspawn-%" PRIu64 "-file-%" PRIu64,
                      drive->counter, drive->file_generation) < 0)
                 return log_oom();
@@ -1067,8 +1066,8 @@ int vmspawn_qmp_remove_block_device(VmspawnQmpBridge *bridge, sd_varlink *link,
                 return sd_varlink_error(link, "io.systemd.MachineInstance.StorageImmutable", NULL);
         if (!FLAGS_SET(drive->state, BLOCK_DEVICE_STATE_BLOCKDEV_ADDED))
                 return reply_qmp_error(link, "Block device add pending", -EBUSY);
-        if (FLAGS_SET(drive->state, BLOCK_DEVICE_STATE_REMOVE_PENDING))
-                return reply_qmp_error(link, "Block device removal pending", -EBUSY);
+        if (drive->state & (BLOCK_DEVICE_STATE_REMOVE_PENDING|BLOCK_DEVICE_STATE_REPLACE_PENDING))
+                return reply_qmp_error(link, "Block device replace/remove pending", -EBUSY);
 
         _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
         r = sd_json_buildo(&args, SD_JSON_BUILD_PAIR_STRING("id", drive->qmp_device_id));
@@ -1114,6 +1113,361 @@ int vmspawn_qmp_dispatch_device_deleted(VmspawnQmpBridge *bridge, sd_json_varian
         return 0;
 }
 
+typedef enum ReplaceCtxStateFlags {
+        REPLACE_CTX_FAILED             = 1u << 0,  /* idempotency sentinel */
+        REPLACE_CTX_NEW_BLOCKDEV_ADDED = 1u << 1,  /* blockdev-add(new file) ack'd */
+        REPLACE_CTX_REOPEN_COMMITTED   = 1u << 2,  /* blockdev-reopen ack'd; new state moved onto DriveInfo */
+} ReplaceCtxStateFlags;
+
+/* Bits ReplaceStorage may change; others are preserved across replace. */
+#define QMP_DRIVE_REPLACE_MUTABLE_MASK \
+        (QMP_DRIVE_BLOCK_DEVICE | QMP_DRIVE_READ_ONLY | QMP_DRIVE_IO_URING)
+
+/* Per-ReplaceStorage state. One ref per outstanding QMP callback plus one for
+ * the entry-point local (until TAKE_PTR'd into the chain). On commit the
+ * contents are folded into DriveInfo. */
+typedef struct ReplaceCtx {
+        unsigned n_ref;
+
+        DriveInfo *drive;            /* ref'd */
+        char *new_file_node_name;
+        char *new_fdset_path;
+        uint64_t new_fdset_id;
+        QmpDriveFlags new_flags;
+
+        char *old_file_node_name;
+
+        ReplaceCtxStateFlags state;
+} ReplaceCtx;
+
+static ReplaceCtx* replace_ctx_free(ReplaceCtx *ctx) {
+        if (!ctx)
+                return NULL;
+
+        drive_info_unref(ctx->drive);
+        free(ctx->new_file_node_name);
+        free(ctx->new_fdset_path);
+        free(ctx->old_file_node_name);
+        return mfree(ctx);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_FUNC(ReplaceCtx, replace_ctx);
+DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(ReplaceCtx, replace_ctx, replace_ctx_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(ReplaceCtx*, replace_ctx_unref);
+
+/* First-error handler for the replace pipeline. Idempotent. Best-effort tears
+ * down whatever new-side state we created on the wire, clears REPLACE_PENDING,
+ * and replies on drive->link (if any). */
+static int replace_fail(ReplaceCtx *ctx, int error, const char *error_desc) {
+        assert(ctx);
+
+        if (FLAGS_SET(ctx->state, REPLACE_CTX_FAILED))
+                return 0;
+        ctx->state |= REPLACE_CTX_FAILED;
+
+        DriveInfo *drive = ctx->drive;
+        assert(drive);
+
+        /* If the new file node was added, del it; that also drops the fdset's
+         * dup so the new fdset auto-frees. If add-fd succeeded but blockdev-add
+         * never did, an explicit remove-fd is needed instead. */
+        if (FLAGS_SET(ctx->state, REPLACE_CTX_NEW_BLOCKDEV_ADDED)) {
+                _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+                if (sd_json_buildo(&args, SD_JSON_BUILD_PAIR_STRING("node-name", ctx->new_file_node_name)) >= 0)
+                        (void) qmp_client_invoke(drive->bridge->qmp, /* ret_slot= */ NULL, "blockdev-del",
+                                                 QMP_CLIENT_ARGS(args),
+                                                 on_qmp_complete, (void*) "replace rollback blockdev-del");
+        } else if (ctx->new_fdset_path) {
+                _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+                if (sd_json_buildo(&args, SD_JSON_BUILD_PAIR_UNSIGNED("fdset-id", ctx->new_fdset_id)) >= 0)
+                        (void) qmp_client_invoke(drive->bridge->qmp, /* ret_slot= */ NULL, "remove-fd",
+                                                 QMP_CLIENT_ARGS(args),
+                                                 on_qmp_complete, (void*) "replace rollback remove-fd");
+        }
+
+        drive->state &= ~BLOCK_DEVICE_STATE_REPLACE_PENDING;
+        _cleanup_(sd_varlink_unrefp) sd_varlink *link = TAKE_PTR(drive->link);
+        if (link)
+                return reply_qmp_error(link, error_desc, error);
+        return 0;
+}
+
+static int on_replace_observe_stage(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(replace_ctx_unrefp) ReplaceCtx *ctx = ASSERT_PTR(userdata);
+        assert(client);
+
+        if (error < 0)
+                return replace_fail(ctx, error, error_desc);
+        return 0;
+}
+
+static int on_replace_blockdev_add_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(replace_ctx_unrefp) ReplaceCtx *ctx = ASSERT_PTR(userdata);
+        assert(client);
+
+        if (error < 0)
+                return replace_fail(ctx, error, error_desc);
+
+        /* If a sync error elsewhere has already marked the chain failed, the
+         * just-added file node is orphaned — tear it down retroactively. */
+        if (FLAGS_SET(ctx->state, REPLACE_CTX_FAILED)) {
+                _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+                if (sd_json_buildo(&args, SD_JSON_BUILD_PAIR_STRING("node-name", ctx->new_file_node_name)) >= 0)
+                        (void) qmp_client_invoke(ctx->drive->bridge->qmp, /* ret_slot= */ NULL,
+                                                 "blockdev-del", QMP_CLIENT_ARGS(args),
+                                                 on_qmp_complete,
+                                                 (void*) "replace retroactive blockdev-del");
+                return 0;
+        }
+
+        ctx->state |= REPLACE_CTX_NEW_BLOCKDEV_ADDED;
+        return 0;
+}
+
+static int on_replace_old_blockdev_del_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(replace_ctx_unrefp) ReplaceCtx *ctx = ASSERT_PTR(userdata);
+        assert(client);
+
+        DriveInfo *drive = ctx->drive;
+
+        /* The swap itself succeeded at reopen-commit time. If del of the old
+         * file node failed, the orphan persists until VM exit — log and reply
+         * success. The fdset auto-freed when the dup was released regardless. */
+        if (error < 0)
+                log_warning("Failed to delete orphaned file node '%s' after replace: %s",
+                            ctx->old_file_node_name, strna(error_desc));
+
+        drive->state &= ~BLOCK_DEVICE_STATE_REPLACE_PENDING;
+        _cleanup_(sd_varlink_unrefp) sd_varlink *link = TAKE_PTR(drive->link);
+        if (link)
+                (void) sd_varlink_reply(link, NULL);
+
+        log_info("Block device '%s' backing replaced", drive->id);
+        return 0;
+}
+
+static int on_replace_blockdev_reopen_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(replace_ctx_unrefp) ReplaceCtx *ctx = ASSERT_PTR(userdata);
+        assert(client);
+
+        if (error < 0)
+                return replace_fail(ctx, error, error_desc);
+
+        DriveInfo *drive = ctx->drive;
+
+        /* Atomic commit: the format graph now references the new file node.
+         * Move the new-side state from ctx onto DriveInfo so subsequent
+         * teardowns find the right names. */
+        ctx->state |= REPLACE_CTX_REOPEN_COMMITTED;
+        free_and_replace(drive->qmp_file_node_name, ctx->new_file_node_name);
+        free_and_replace(drive->fdset_path, ctx->new_fdset_path);
+        drive->fdset_id = ctx->new_fdset_id;
+        /* Only commit the mutable bits so unrelated future flags aren't silently flipped. */
+        drive->flags = (drive->flags & ~QMP_DRIVE_REPLACE_MUTABLE_MASK) |
+                       (ctx->new_flags & QMP_DRIVE_REPLACE_MUTABLE_MASK);
+
+        /* Trailing blockdev-del of the OLD file node. The format no longer
+         * references it, so it's an orphan; deleting it also drops the dup
+         * that kept the old fdset alive. */
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *del_args = NULL;
+        int r = sd_json_buildo(&del_args, SD_JSON_BUILD_PAIR_STRING("node-name", ctx->old_file_node_name));
+        if (r >= 0) {
+                _cleanup_(replace_ctx_unrefp) ReplaceCtx *slot_ref = replace_ctx_ref(ctx);
+                r = qmp_client_invoke(drive->bridge->qmp, /* ret_slot= */ NULL,
+                                      "blockdev-del", QMP_CLIENT_ARGS(del_args),
+                                      on_replace_old_blockdev_del_complete, slot_ref);
+                if (r >= 0) {
+                        TAKE_PTR(slot_ref);
+                        return 0;
+                }
+        }
+
+        /* Couldn't even queue blockdev-del. The swap succeeded; reply success
+         * and leave the orphan to clean up at VM exit. */
+        log_warning_errno(r, "Failed to queue blockdev-del for orphaned file node '%s': %m",
+                          ctx->old_file_node_name);
+
+        drive->state &= ~BLOCK_DEVICE_STATE_REPLACE_PENDING;
+        _cleanup_(sd_varlink_unrefp) sd_varlink *link = TAKE_PTR(drive->link);
+        if (link)
+                (void) sd_varlink_reply(link, NULL);
+        return 0;
+}
+
+int vmspawn_qmp_replace_block_device(
+                VmspawnQmpBridge *bridge,
+                sd_varlink *link,
+                const char *id,
+                int fd,
+                QmpDriveFlags fd_flags) {
+
+        _cleanup_close_ int owned_fd = fd;
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *file_args = NULL, *reopen_args = NULL;
+        _cleanup_(replace_ctx_unrefp) ReplaceCtx *ctx = NULL;
+        /* Not _cleanup_'d: aliasing it with ctx tripped a gcc-12
+         * -Wuse-after-free false positive on the cleanup chain. Error paths
+         * unref it explicitly; success leaks the ref to the callback. */
+        ReplaceCtx *slot_ref;
+        int r;
+
+        assert(bridge);
+        assert(link);
+        assert(id);
+        assert(fd >= 0);
+
+        DriveInfo *drive = hashmap_get(bridge->block_devices, id);
+        if (!drive)
+                return sd_varlink_error(link, "io.systemd.MachineInstance.NoSuchStorage", NULL);
+        if (!FLAGS_SET(drive->flags, QMP_DRIVE_REMOVABLE))
+                return sd_varlink_error(link, "io.systemd.MachineInstance.StorageImmutable", NULL);
+        /* QEMU's blockdev-reopen rejects RW->RO on a node with attached writers
+         * (the guest device). For an RW drive the new backing must be writable. */
+        if (!FLAGS_SET(drive->flags, QMP_DRIVE_READ_ONLY) && FLAGS_SET(fd_flags, QMP_DRIVE_READ_ONLY))
+                return sd_varlink_error_errno(link, -EROFS);
+        if (!FLAGS_SET(drive->state, BLOCK_DEVICE_STATE_BLOCKDEV_ADDED))
+                return reply_qmp_error(link, "Block device add pending", -EBUSY);
+        if (drive->state & (BLOCK_DEVICE_STATE_REMOVE_PENDING|BLOCK_DEVICE_STATE_REPLACE_PENDING))
+                return reply_qmp_error(link, "Block device replace/remove pending", -EBUSY);
+        assert(!drive->link);
+        assert(drive->qmp_file_node_name);
+
+        /* Bump generation EARLY so failed attempts don't collide on retry. */
+        uint64_t new_gen = ++drive->file_generation;
+
+        _cleanup_free_ char *new_file_node_name = NULL;
+        if (asprintf(&new_file_node_name, "vmspawn-%" PRIu64 "-file-%" PRIu64,
+                     drive->counter, new_gen) < 0)
+                return sd_varlink_error_errno(link, -ENOMEM);
+
+        /* Compute new flags: keep the existing drive flags, swap in the
+         * caller-derived bits (only RO and BLOCK_DEVICE are caller-controlled),
+         * fold scsi-cd into RO, and fold in the bridge's io_uring feature. */
+        const QmpDriveFlags FD_DERIVED_MASK = QMP_DRIVE_READ_ONLY | QMP_DRIVE_BLOCK_DEVICE;
+        QmpDriveFlags new_flags = (drive->flags & ~QMP_DRIVE_REPLACE_MUTABLE_MASK) |
+                                  (fd_flags & FD_DERIVED_MASK);
+        if (drive->disk_type == DISK_TYPE_VIRTIO_SCSI_CDROM)
+                new_flags |= QMP_DRIVE_READ_ONLY;
+        if (FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_IO_URING))
+                new_flags |= QMP_DRIVE_IO_URING;
+
+        ctx = new0(ReplaceCtx, 1);
+        if (!ctx)
+                return sd_varlink_error_errno(link, -ENOMEM);
+        ctx->n_ref = 1;
+        ctx->drive = drive_info_ref(drive);
+        ctx->new_file_node_name = TAKE_PTR(new_file_node_name);
+        ctx->new_flags = new_flags;
+        ctx->old_file_node_name = strdup(drive->qmp_file_node_name);
+        if (!ctx->old_file_node_name)
+                return sd_varlink_error_errno(link, -ENOMEM);
+
+        drive->link = sd_varlink_ref(link);
+        drive->state |= BLOCK_DEVICE_STATE_REPLACE_PENDING;
+
+        /* 1. add-fd → new fdset */
+        slot_ref = replace_ctx_ref(ctx);
+        r = qmp_fdset_add(bridge->qmp, TAKE_FD(owned_fd),
+                          on_replace_observe_stage, slot_ref,
+                          &ctx->new_fdset_path, &ctx->new_fdset_id);
+        if (r < 0) {
+                replace_ctx_unref(slot_ref);
+                goto rollback_sync;
+        }
+
+        /* 2. blockdev-add (new file node, new fdset) */
+        QmpFileNodeParams file_params = {
+                .node_name = ctx->new_file_node_name,
+                .filename  = ctx->new_fdset_path,
+                .driver    = FLAGS_SET(new_flags, QMP_DRIVE_BLOCK_DEVICE) ? "host_device" : "file",
+                .flags     = new_flags,
+        };
+        r = qmp_build_blockdev_add_file(&file_params, &file_args);
+        if (r < 0)
+                goto rollback_sync;
+
+        slot_ref = replace_ctx_ref(ctx);
+        r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(file_args),
+                              on_replace_blockdev_add_complete, slot_ref);
+        if (r < 0) {
+                replace_ctx_unref(slot_ref);
+                goto rollback_sync;
+        }
+
+        /* 3. remove-fd (new fdset; blockdev-add holds the dup) */
+        slot_ref = replace_ctx_ref(ctx);
+        r = qmp_fdset_remove(bridge->qmp, ctx->new_fdset_id,
+                             on_replace_observe_stage, slot_ref);
+        if (r < 0) {
+                replace_ctx_unref(slot_ref);
+                goto rollback_sync;
+        }
+
+        /* 4. blockdev-reopen the format node, file → new
+         * NB: the option set must be a superset of every field
+         * qmp_build_blockdev_add_format() may emit; otherwise reopen rejects
+         * "Cannot reset option X to default" or silently flips a flag.
+         * No "backing" field: only ephemeral overlays carry backing, and
+         * those are never REMOVABLE. */
+        r = sd_json_buildo(&reopen_args,
+                        SD_JSON_BUILD_PAIR("options", SD_JSON_BUILD_ARRAY(
+                                SD_JSON_BUILD_OBJECT(
+                                        SD_JSON_BUILD_PAIR_STRING("node-name", drive->qmp_node_name),
+                                        SD_JSON_BUILD_PAIR_STRING("driver",    drive->format),
+                                        SD_JSON_BUILD_PAIR_STRING("file",      ctx->new_file_node_name),
+                                        /* blockdev-reopen resets unspecified options to driver defaults,
+                                         * so emit the format-agnostic options unconditionally. */
+                                        SD_JSON_BUILD_PAIR_BOOLEAN("read-only", FLAGS_SET(new_flags, QMP_DRIVE_READ_ONLY)),
+                                        SD_JSON_BUILD_PAIR_STRING("discard",
+                                                                  FLAGS_SET(new_flags, QMP_DRIVE_DISCARD) ? "unmap" : "ignore"),
+                                        /* qcow2-only option; raw rejects it as an unknown property. */
+                                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(new_flags, QMP_DRIVE_DISCARD_NO_UNREF),
+                                                                     "discard-no-unref", SD_JSON_BUILD_BOOLEAN(true))))));
+        if (r < 0)
+                goto rollback_sync;
+
+        slot_ref = replace_ctx_ref(ctx);
+        r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "blockdev-reopen", QMP_CLIENT_ARGS(reopen_args),
+                              on_replace_blockdev_reopen_complete, slot_ref);
+        if (r < 0) {
+                replace_ctx_unref(slot_ref);
+                goto rollback_sync;
+        }
+
+        return 0;
+
+rollback_sync:
+        /* Mark failed so any in-flight callbacks observe the failure and
+         * rollback their just-added state retroactively. */
+        ctx->state |= REPLACE_CTX_FAILED;
+        drive->state &= ~BLOCK_DEVICE_STATE_REPLACE_PENDING;
+        drive->link = sd_varlink_unref(drive->link);
+        return sd_varlink_error_errno(link, r);
+}
+
 int vmspawn_qmp_setup_network(VmspawnQmpBridge *bridge, NetworkInfo *network) {
         _cleanup_(sd_json_variant_unrefp) sd_json_variant *netdev_args = NULL, *device_args = NULL;
         bool tap_by_fd;
index dca0b03b10a824f1d52db321ff181755fa604dbd..f627c663757fe768348515d5cb7e0e0cdc3a2046 100644 (file)
@@ -80,6 +80,7 @@ typedef enum BlockDeviceStateFlags {
         BLOCK_DEVICE_STATE_ADD_FAILED      = 1u << 1,  /* first error fired; suppress cascades */
         BLOCK_DEVICE_STATE_REMOVE_PENDING  = 1u << 2,  /* device_del in flight; reject concurrent removes */
         BLOCK_DEVICE_STATE_FILE_NODE_ADDED = 1u << 3,  /* blockdev-add(file) succeeded; teardown must del it */
+        BLOCK_DEVICE_STATE_REPLACE_PENDING = 1u << 4,  /* blockdev-reopen pipeline in flight */
 } BlockDeviceStateFlags;
 
 /* Ref-counted; each of the four add-stage QMP slots holds one ref.
@@ -184,4 +185,12 @@ int vmspawn_qmp_setup_virtiofs(VmspawnQmpBridge *bridge, const VirtiofsInfos *vi
 int vmspawn_qmp_setup_vsock(VmspawnQmpBridge *bridge, VsockInfo *vsock);
 int vmspawn_qmp_add_block_device(VmspawnQmpBridge *bridge, DriveInfo *drive);
 int vmspawn_qmp_remove_block_device(VmspawnQmpBridge *bridge, sd_varlink *link, const char *id);
+/* fd_flags encodes the new fd's properties: only QMP_DRIVE_READ_ONLY and
+ * QMP_DRIVE_BLOCK_DEVICE are caller-controlled; other bits are ignored. */
+int vmspawn_qmp_replace_block_device(
+                VmspawnQmpBridge *bridge,
+                sd_varlink *link,
+                const char *id,
+                int fd,
+                QmpDriveFlags fd_flags);
 int vmspawn_qmp_dispatch_device_deleted(VmspawnQmpBridge *bridge, sd_json_variant *data);
index a782a5c9c1a4fc60a19061c779de3dd5862beadb..cb56a5a59799632c95db6e8aaa676920e193ab46 100644 (file)
@@ -1,5 +1,8 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
+#include <fcntl.h>
+#include <sys/stat.h>
+
 #include "alloc-util.h"
 #include "errno-util.h"
 #include "fd-util.h"
@@ -7,6 +10,7 @@
 #include "log.h"
 #include "path-util.h"
 #include "qmp-client.h"
+#include "stat-util.h"
 #include "string-util.h"
 #include "strv.h"
 #include "varlink-io.systemd.MachineInstance.h"
@@ -241,6 +245,62 @@ static int vl_method_remove_storage(sd_varlink *link, sd_json_variant *parameter
         return vmspawn_qmp_remove_block_device(ctx->bridge, link, p.name);
 }
 
+static int vl_method_replace_storage(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata);
+        int r;
+
+        struct {
+                int fd_index;
+                const char *name;
+        } p = {
+                .fd_index = -1,
+        };
+
+        static const sd_json_dispatch_field dispatch_table[] = {
+                { "fileDescriptorIndex", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int,          voffsetof(p, fd_index), SD_JSON_MANDATORY },
+                { "name",                SD_JSON_VARIANT_STRING,        sd_json_dispatch_const_string, voffsetof(p, name),     SD_JSON_MANDATORY },
+                {}
+        };
+
+        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
+        if (r != 0)
+                return r;
+
+        if (isempty(p.name))
+                return sd_varlink_error_invalid_parameter_name(link, "name");
+
+        if (p.fd_index < 0)
+                return sd_varlink_error_invalid_parameter_name(link, "fileDescriptorIndex");
+
+        _cleanup_close_ int fd = sd_varlink_take_fd(link, p.fd_index);
+        if (fd < 0)
+                return sd_varlink_error_errno(link, fd);
+
+        struct stat st;
+        if (fstat(fd, &st) < 0)
+                return sd_varlink_error_errno(link, -errno);
+        r = stat_verify_regular_or_block(&st);
+        if (r < 0)
+                return sd_varlink_error_errno(link, r);
+
+        int oflags = fcntl(fd, F_GETFL);
+        if (oflags < 0)
+                return sd_varlink_error_errno(link, -errno);
+        if (FLAGS_SET(oflags, O_PATH))
+                return sd_varlink_error_errno(link, -EBADF);
+        if ((oflags & O_ACCMODE_STRICT) == O_WRONLY)
+                return sd_varlink_error_errno(link, -EBADF);
+
+        QmpDriveFlags fd_flags = 0;
+        if (S_ISBLK(st.st_mode))
+                fd_flags |= QMP_DRIVE_BLOCK_DEVICE;
+        if ((oflags & O_ACCMODE_STRICT) == O_RDONLY)
+                fd_flags |= QMP_DRIVE_READ_ONLY;
+
+        return vmspawn_qmp_replace_block_device(ctx->bridge, link, p.name, TAKE_FD(fd), fd_flags);
+        /* Async reply via on_replace_old_blockdev_del_complete or replace_fail. */
+}
+
 static int vl_method_subscribe_events(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
         VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata);
         _cleanup_strv_free_ char **filter = NULL;
@@ -478,7 +538,8 @@ int vmspawn_varlink_setup(
                         "io.systemd.MachineInstance.Describe",          vl_method_describe,
                         "io.systemd.MachineInstance.SubscribeEvents",   vl_method_subscribe_events,
                         "io.systemd.MachineInstance.AddStorage",        vl_method_add_storage,
-                        "io.systemd.MachineInstance.RemoveStorage",     vl_method_remove_storage);
+                        "io.systemd.MachineInstance.RemoveStorage",     vl_method_remove_storage,
+                        "io.systemd.MachineInstance.ReplaceStorage",    vl_method_replace_storage);
         if (r < 0)
                 return log_error_errno(r, "Failed to bind varlink methods: %m");