]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
vmspawn-qmp: add the hotplug-capable block-device add machinery
authorChristian Brauner <brauner@kernel.org>
Tue, 21 Apr 2026 22:34:50 +0000 (00:34 +0200)
committerChristian Brauner <brauner@kernel.org>
Fri, 24 Apr 2026 12:39:25 +0000 (14:39 +0200)
This is the bulk of the runtime block-device hotplug feature. The
boot-time qmp_setup_regular_drive() path is rewritten on top of a new
vmspawn_qmp_add_block_device() that owns the DriveInfo, drives a four
QMP-command pipeline (add-fd → blockdev-add → remove-fd → device_add)
through staged ref-counted callbacks, and registers the drive in a
per-bridge block-device registry on success.

New on the bridge:

  - block_devices: user-id → DriveInfo* registry (owned ref).
  - hotplug_port_owner[VMSPAWN_PCIE_HOTPLUG_SPARES]: per-port owner
    string for the spare pcie-root-ports, allocated/released through
    vmspawn_qmp_bridge_{allocate,release_pcie_port_by_idx}().
  - scsi_controller_port_idx / scsi_controller_created: track the
    on-demand virtio-scsi-pci controller so the first SCSI hotplug
    creates it (against an allocated spare port) and subsequent ones
    just attach.
  - next_block_counter: already in place from the previous commit, now
    actually consumed by add_block_device().

New on DriveInfo:

  - bridge (weak), id (varlink-visible — caller-supplied or auto-set
    to qmp_device_id), disk_type (for List replies later), counter,
    qmp_node_name / qmp_device_id (already added), fdset_path,
    pcie_port_idx (the hotplug port reserved by this drive — stays
    set across the add pipeline so drive_info_free releases it when
    the registry ref drops at DEVICE_DELETED time),
    rollback_mask (BlockDeviceAddStage bits of completed stages —
    plus a FAILED sentinel that suppresses cascading errors), and
    link (NULL ⇒ boot-time, sd_event_exit on fail; non-NULL ⇒
    hotplug, varlink reply on fail).

Helpers:

  - drive_info_unref() now releases any reserved hotplug port through
    the bridge and unrefs link.
  - drive_info_add_fail(): single failure entry point — fires the
    teardown for completed stages, sets the FAILED bit, and sends
    either the varlink error or sd_event_exit. Boot-time failures
    (link == NULL) always exit the loop, so late-arriving ephemeral
    continuation replies don't get silenced after cont.
  - vmspawn_qmp_block_device_teardown(): post-hoc blockdev-del when
    blockdev-add succeeded but a later stage failed.
  - reply_qmp_error: varlink reply helper — disconnect errors map to
    io.systemd.MachineInstance.NotConnected, everything else goes
    through sd_varlink_error_errno.
  - on_add_observe_stage / on_add_blockdev_stage /
    on_add_device_add_complete: the staged callbacks that drive the
    add pipeline, each holding one slot ref on the DriveInfo. The
    ephemeral blockdev-create continuation reuses the latter two so
    its post-cont replies go through drive_info_add_fail instead of
    the generic on_qmp_complete (which would silently log under
    setup_done).
  - on_scsi_controller_complete: handles the SCSI controller setup
    (releases the reserved port on failure, propagates the boot-time
    fatal error policy).
  - qmp_build_blockdev_add_inline(): single blockdev-add JSON that
    creates the file+format pair as one node, used by the hotplug
    path (the boot-time helpers stay separate so they can stack with
    the ephemeral path's base/overlay format nodes).

EphemeralDriveCtx is trimmed down to a DriveInfo ref plus the two
ephemeral-local scratch node names (overlay-file, base-fmt). The
copies of disk_driver/serial/pcie_port/flags/qmp_node_name/
qmp_device_id go away — the continuation reads them straight off the
ref'd drive. qmp_setup_ephemeral_drive now sets drive->bridge /
drive->id / drive->counter up front (matching the hotplug path) and
folds the feature-dependent DISCARD_NO_UNREF into drive->flags so
qmp_build_blockdev_add_format picks it up.

vmspawn_qmp_bridge_free() unrefs the qmp client first — its pending
callbacks may still reach for the bridge's hotplug port table when
they drop their last DriveInfo ref — then tears down the hashmaps
and the port owner strings.

The boot-time qmp_setup_regular_drive() collapses to a thin wrapper
that asserts "caller hasn't pre-set drive->id", takes ownership and
calls vmspawn_qmp_add_block_device(); the dispatcher in
vmspawn_qmp_setup_drives() now hands ownership over with TAKE_PTR.
The previous qmp_setup_drive() dispatcher disappears (its body is
inlined into the loop). vmspawn_qmp_init() initialises
scsi_controller_port_idx to -1.

The cosmetic (*d) → DriveInfo *drive = *d; locals in
drives_need_scsi_controller (vmspawn-qmp.c) and assign_pcie_ports
(vmspawn.c) ride along — they live in code touched by this commit
and would otherwise produce churn.

vmspawn_qmp_remove_block_device() — the symmetric remove API — is
added in the next commit so this one stays focused on the add path.

Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
src/vmspawn/vmspawn-qmp.c
src/vmspawn/vmspawn-qmp.h
src/vmspawn/vmspawn.c

index 7eff45f5e933fb57a2c366f9e602084c6c7f3cd9..9158d5b10a99b20be3eb0c4ac58cd93f93cf6c2b 100644 (file)
@@ -6,9 +6,11 @@
 
 #include "sd-event.h"
 #include "sd-json.h"
+#include "sd-varlink.h"
 
 #include "alloc-util.h"
 #include "blockdev-util.h"
+#include "errno-util.h"
 #include "ether-addr-util.h"
 #include "fd-util.h"
 #include "hashmap.h"
 #include "string-util.h"
 #include "strv.h"
 #include "vmspawn-qmp.h"
+#include "vmspawn-util.h"
 
 DEFINE_PRIVATE_HASH_OPS_FULL(
                 pending_job_hash_ops,
                 char, string_hash_func, string_compare_func, free,
                 PendingJob, pending_job_free);
 
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+                block_devices_hash_ops,
+                char, string_hash_func, string_compare_func,
+                DriveInfo, drive_info_unref);
+
 DriveInfo* drive_info_new(void) {
         DriveInfo *d = new(DriveInfo, 1);
         if (!d)
@@ -34,20 +42,68 @@ DriveInfo* drive_info_new(void) {
                 .n_ref = 1,
                 .fd = -EBADF,
                 .overlay_fd = -EBADF,
+                .pcie_port_idx = -1,
         };
         return d;
 }
 
+static int vmspawn_qmp_bridge_allocate_pcie_port(
+                VmspawnQmpBridge *bridge,
+                const char *owner_id,
+                char **ret_name,
+                int *ret_idx) {
+
+        assert(bridge);
+        assert(owner_id);
+        assert(ret_name);
+        assert(ret_idx);
+
+        for (int i = 0; i < VMSPAWN_PCIE_HOTPLUG_SPARES; i++) {
+                if (bridge->hotplug_port_owner[i])
+                        continue;
+
+                _cleanup_free_ char *owner = strdup(owner_id), *name = NULL;
+                if (!owner || asprintf(&name, "vmspawn-hotplug-pci-root-port-%d", i) < 0)
+                        return -ENOMEM;
+
+                bridge->hotplug_port_owner[i] = TAKE_PTR(owner);
+                *ret_name = TAKE_PTR(name);
+                *ret_idx = i;
+                return 0;
+        }
+
+        return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+                               "No free PCIe hotplug port available for owner '%s'.",
+                               owner_id);
+}
+
+static void vmspawn_qmp_bridge_release_pcie_port_by_idx(VmspawnQmpBridge *bridge, int idx) {
+        assert(bridge);
+
+        if (idx < 0)
+                return;
+
+        assert(idx < VMSPAWN_PCIE_HOTPLUG_SPARES);
+
+        bridge->hotplug_port_owner[idx] = mfree(bridge->hotplug_port_owner[idx]);
+}
+
 static DriveInfo* drive_info_free(DriveInfo *d) {
         assert(d);
 
+        if (d->bridge)
+                vmspawn_qmp_bridge_release_pcie_port_by_idx(d->bridge, d->pcie_port_idx);
+
         free(d->path);
         free(d->format);
         free(d->disk_driver);
         free(d->serial);
+        free(d->pcie_port);
+        free(d->id);
         free(d->qmp_node_name);
         free(d->qmp_device_id);
-        free(d->pcie_port);
+        free(d->fdset_path);
+        sd_varlink_unref(d->link);
         safe_close(d->fd);
         safe_close(d->overlay_fd);
         return mfree(d);
@@ -61,7 +117,6 @@ void drive_infos_done(DriveInfos *infos) {
                 drive_info_unref(*d);
         infos->drives = mfree(infos->drives);
         infos->n_drives = 0;
-        infos->scsi_pcie_port = mfree(infos->scsi_pcie_port);
 }
 
 void network_info_done(NetworkInfo *info) {
@@ -269,6 +324,43 @@ static int qmp_build_device_add(const DriveInfo *drive, sd_json_variant **ret) {
                                         "bus", SD_JSON_BUILD_STRING(drive->pcie_port)));
 }
 
+/* Inline form: one blockdev-add creates format+file; one blockdev-del tears
+ * down the whole tree. Used for regular boot drives and hotplug. */
+static int qmp_build_blockdev_add_inline(
+                const char *node_name,
+                const char *format,
+                const char *filename,
+                const char *file_driver,
+                QmpDriveFlags flags,
+                VmspawnQmpFeatureFlags features,
+                sd_json_variant **ret) {
+
+        bool use_io_uring = FLAGS_SET(features, VMSPAWN_QMP_FEATURE_IO_URING);
+        bool use_discard_no_unref = FLAGS_SET(flags, QMP_DRIVE_DISCARD_NO_UNREF);
+
+        assert(node_name);
+        assert(format);
+        assert(filename);
+        assert(file_driver);
+        assert(ret);
+
+        return sd_json_buildo(
+                        ret,
+                        SD_JSON_BUILD_PAIR_STRING("node-name", node_name),
+                        SD_JSON_BUILD_PAIR_STRING("driver", format),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(flags, QMP_DRIVE_READ_ONLY), "read-only", SD_JSON_BUILD_BOOLEAN(true)),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(flags, QMP_DRIVE_DISCARD), "discard", JSON_BUILD_CONST_STRING("unmap")),
+                        SD_JSON_BUILD_PAIR_CONDITION(use_discard_no_unref, "discard-no-unref", SD_JSON_BUILD_BOOLEAN(true)),
+                        SD_JSON_BUILD_PAIR("file", SD_JSON_BUILD_OBJECT(
+                                        SD_JSON_BUILD_PAIR_STRING("driver", file_driver),
+                                        SD_JSON_BUILD_PAIR_STRING("filename", filename),
+                                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(flags, QMP_DRIVE_READ_ONLY), "read-only", SD_JSON_BUILD_BOOLEAN(true)),
+                                        SD_JSON_BUILD_PAIR_CONDITION(use_io_uring, "aio", JSON_BUILD_CONST_STRING("io_uring")),
+                                        SD_JSON_BUILD_PAIR("cache", SD_JSON_BUILD_OBJECT(
+                                                        SD_JSON_BUILD_PAIR_BOOLEAN("direct", false),
+                                                        SD_JSON_BUILD_PAIR_BOOLEAN("no-flush", FLAGS_SET(flags, QMP_DRIVE_NO_FLUSH)))))));
+}
+
 /* Issue blockdev-add for a file node. */
 static int qmp_add_file_node(QmpClient *qmp, const QmpFileNodeParams *p) {
         _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
@@ -329,28 +421,27 @@ static int get_image_virtual_size(int fd, const char *format, bool is_block_devi
         return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Unsupported image format '%s'", format);
 }
 
+/* Forward declarations — on_ephemeral_create_concluded routes failures through
+ * the shared block-device add callbacks defined further below. */
+static int drive_info_add_fail(DriveInfo *d, int error, const char *error_desc);
+static int on_add_blockdev_stage(QmpClient *client, sd_json_variant *result,
+                                 const char *error_desc, int error, void *userdata);
+static int on_add_device_add_complete(QmpClient *client, sd_json_variant *result,
+                                      const char *error_desc, int error, void *userdata);
+
 /* Continuation state for on_ephemeral_create_concluded: overlay format + device_add. */
 typedef struct EphemeralDriveCtx {
-        char *qmp_node_name;       /* overlay format node name, "vmspawn-<N>-storage" */
-        char *qmp_device_id;       /* qdev id, "vmspawn-<N>-disk" */
+        DriveInfo *drive;          /* ref */
         char *overlay_file_node;
         char *base_fmt_node;
-        char *disk_driver;
-        char *serial;
-        char *pcie_port;           /* NULL on non-PCIe */
-        QmpDriveFlags flags;       /* subset forwarded to device_add */
 } EphemeralDriveCtx;
 
 static EphemeralDriveCtx* ephemeral_drive_ctx_free(EphemeralDriveCtx *ctx) {
         if (!ctx)
                 return NULL;
-        free(ctx->qmp_node_name);
-        free(ctx->qmp_device_id);
+        drive_info_unref(ctx->drive);
         free(ctx->overlay_file_node);
         free(ctx->base_fmt_node);
-        free(ctx->disk_driver);
-        free(ctx->serial);
-        free(ctx->pcie_port);
         return mfree(ctx);
 }
 
@@ -363,50 +454,48 @@ static void ephemeral_drive_ctx_free_void(void *p) {
 static int on_ephemeral_create_concluded(QmpClient *qmp, void *userdata) {
         _cleanup_(ephemeral_drive_ctx_freep) EphemeralDriveCtx *ctx = ASSERT_PTR(userdata);
         _cleanup_(sd_json_variant_unrefp) sd_json_variant *fmt_args = NULL, *device_args = NULL;
+        _cleanup_(drive_info_unrefp) DriveInfo *slot_ref = NULL;
+        DriveInfo *drive = ctx->drive;
         int r;
 
+        assert(qmp);
+
         /* Open formatted overlay as qcow2 with backing reference */
         QmpFormatNodeParams overlay_fmt_params = {
-                .node_name      = ctx->qmp_node_name,
+                .node_name      = drive->qmp_node_name,
                 .format         = "qcow2",
                 .file_node_name = ctx->overlay_file_node,
                 .backing        = ctx->base_fmt_node,
-                .flags          = ctx->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_DISCARD_NO_UNREF),
+                .flags          = drive->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_DISCARD_NO_UNREF),
         };
         r = qmp_build_blockdev_add_format(&overlay_fmt_params, &fmt_args);
         if (r < 0)
-                return log_error_errno(r, "Failed to build overlay format JSON for '%s': %m", ctx->qmp_node_name);
+                return drive_info_add_fail(drive, r, NULL);
 
-        r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(fmt_args), on_qmp_complete, (void*) "blockdev-add");
+        slot_ref = drive_info_ref(drive);
+        r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(fmt_args),
+                              on_add_blockdev_stage, slot_ref);
         if (r < 0)
-                return r;
+                return drive_info_add_fail(drive, r, NULL);
+        TAKE_PTR(slot_ref);
 
-        /* Temporary DriveInfo view so we can reuse qmp_build_device_add(). */
-        const DriveInfo tmp = {
-                .disk_driver    = ctx->disk_driver,
-                .serial         = ctx->serial,
-                .pcie_port      = ctx->pcie_port,
-                .flags          = ctx->flags & QMP_DRIVE_BOOT,
-                .fd             = -EBADF,
-                .overlay_fd     = -EBADF,
-                .qmp_node_name  = ctx->qmp_node_name,
-                .qmp_device_id  = ctx->qmp_device_id,
-        };
-        r = qmp_build_device_add(&tmp, &device_args);
+        r = qmp_build_device_add(drive, &device_args);
         if (r < 0)
-                return log_error_errno(r, "Failed to build device_add JSON for '%s': %m", ctx->qmp_device_id);
+                return drive_info_add_fail(drive, r, NULL);
 
-        r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_complete, (void*) "device_add");
+        slot_ref = drive_info_ref(drive);
+        r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args),
+                              on_add_device_add_complete, slot_ref);
         if (r < 0)
-                return r;
+                return drive_info_add_fail(drive, r, NULL);
+        TAKE_PTR(slot_ref);
 
-        log_debug("Queued ephemeral drive completion for '%s'", ctx->qmp_device_id);
+        log_debug("Queued ephemeral drive completion for '%s'", drive->qmp_device_id);
         return 0;
 }
 
-/* Set up an ephemeral drive: base image (read-only) + anonymous qcow2 overlay (read-write).
- * The final steps (overlay format + device_add) are deferred to a job continuation that
- * fires when the blockdev-create job concludes. */
+/* Base image (read-only) + anonymous qcow2 overlay (read-write). Overlay format
+ * and device_add run from the blockdev-create continuation. */
 static int qmp_setup_ephemeral_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
         int r;
 
@@ -416,16 +505,24 @@ static int qmp_setup_ephemeral_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, D
         assert(drive->fd >= 0);
         assert(drive->overlay_fd >= 0);
 
-        uint64_t counter = bridge->next_block_counter++;
+        drive->bridge = bridge;
+        drive->counter = bridge->next_block_counter++;
 
         _cleanup_free_ char *base_file_node = NULL, *base_fmt_node = NULL, *overlay_file_node = NULL;
-        if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", counter) < 0 ||
-            asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", counter) < 0 ||
-            asprintf(&base_file_node, "vmspawn-%" PRIu64 "-base-file", counter) < 0 ||
-            asprintf(&base_fmt_node, "vmspawn-%" PRIu64 "-base-fmt", counter) < 0 ||
-            asprintf(&overlay_file_node, "vmspawn-%" PRIu64 "-overlay-file", counter) < 0)
+        if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", drive->counter) < 0 ||
+            asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", drive->counter) < 0 ||
+            asprintf(&base_file_node, "vmspawn-%" PRIu64 "-base-file", drive->counter) < 0 ||
+            asprintf(&base_fmt_node, "vmspawn-%" PRIu64 "-base-fmt", drive->counter) < 0 ||
+            asprintf(&overlay_file_node, "vmspawn-%" PRIu64 "-overlay-file", drive->counter) < 0)
                 return log_oom();
 
+        /* Auto-assigned user id reuses qmp_device_id (matching vmspawn_qmp_add_block_device). */
+        if (!drive->id) {
+                drive->id = strdup(drive->qmp_device_id);
+                if (!drive->id)
+                        return log_oom();
+        }
+
         /* Read virtual size before passing the fd to QEMU (TAKE_FD consumes it) */
         uint64_t virtual_size;
         r = get_image_virtual_size(drive->fd, drive->format, FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE), &virtual_size);
@@ -515,7 +612,7 @@ static int qmp_setup_ephemeral_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, D
                 return log_error_errno(r, "Failed to build blockdev-create options: %m");
 
         _cleanup_free_ char *job_id = NULL;
-        if (asprintf(&job_id, "vmspawn-%" PRIu64 "-overlay-create", counter) < 0)
+        if (asprintf(&job_id, "vmspawn-%" PRIu64 "-overlay-create", drive->counter) < 0)
                 return log_oom();
 
         _cleanup_(sd_json_variant_unrefp) sd_json_variant *cmd_args = NULL;
@@ -525,29 +622,23 @@ static int qmp_setup_ephemeral_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, D
         if (r < 0)
                 return log_error_errno(r, "Failed to build blockdev-create JSON: %m");
 
+        /* Fold DISCARD_NO_UNREF into drive->flags so the continuation's overlay format blockdev-add
+         * picks it up via drive->flags. */
+        if (FLAGS_SET(drive->flags, QMP_DRIVE_DISCARD) &&
+            FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_DISCARD_NO_UNREF))
+                drive->flags |= QMP_DRIVE_DISCARD_NO_UNREF;
+
         /* Register continuation: when the job concludes, fire overlay format + device_add */
         _cleanup_(ephemeral_drive_ctx_freep) EphemeralDriveCtx *ectx = new(EphemeralDriveCtx, 1);
         if (!ectx)
                 return log_oom();
 
-        QmpDriveFlags ectx_flags = drive->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_BOOT);
-        if (FLAGS_SET(drive->flags, QMP_DRIVE_DISCARD) &&
-            FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_DISCARD_NO_UNREF))
-                ectx_flags |= QMP_DRIVE_DISCARD_NO_UNREF;
-
         *ectx = (EphemeralDriveCtx) {
-                .qmp_node_name      = strdup(drive->qmp_node_name),
-                .qmp_device_id      = strdup(drive->qmp_device_id),
+                .drive              = drive_info_ref(drive),
                 .overlay_file_node  = strdup(overlay_file_node),
                 .base_fmt_node      = strdup(base_fmt_node),
-                .disk_driver        = strdup(drive->disk_driver),
-                .serial             = drive->serial ? strdup(drive->serial) : NULL,
-                .pcie_port          = drive->pcie_port ? strdup(drive->pcie_port) : NULL,
-                .flags              = ectx_flags,
         };
-        if (!ectx->qmp_node_name || !ectx->qmp_device_id || !ectx->overlay_file_node ||
-            !ectx->base_fmt_node || !ectx->disk_driver ||
-            (drive->serial && !ectx->serial) || (drive->pcie_port && !ectx->pcie_port))
+        if (!ectx->overlay_file_node || !ectx->base_fmt_node)
                 return log_oom();
 
         r = vmspawn_qmp_bridge_register_job(bridge, job_id,
@@ -559,91 +650,345 @@ static int qmp_setup_ephemeral_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, D
         TAKE_PTR(ectx);
 
         r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-create", QMP_CLIENT_ARGS(cmd_args), on_qmp_complete, (void*) "blockdev-create");
-        if (r < 0)
+        if (r < 0) {
+                _unused_ _cleanup_(pending_job_freep) PendingJob *dead = hashmap_remove(bridge->pending_jobs, job_id);
                 return log_error_errno(r, "Failed to send blockdev-create for '%s': %m", drive->path);
+        }
 
         log_debug("Queued ephemeral drive setup for '%s' (job %s)", drive->path, job_id);
         return 0;
 }
 
-/* Set up a regular (non-ephemeral) drive: single file node + format node + device_add. */
-static int qmp_setup_regular_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
+static int reply_qmp_error(sd_varlink *link, const char *error_desc, int error) {
+        assert(link);
+
+        if (ERRNO_IS_DISCONNECT(error))
+                return sd_varlink_error(link, "io.systemd.MachineInstance.NotConnected", NULL);
+        if (error_desc)
+                log_warning("QMP error: %s", error_desc);
+        return sd_varlink_error_errno(link, error < 0 ? error : -EIO);
+}
+
+/* After the pipelined remove-fd at add time, QEMU auto-frees the fdset when
+ * raw_close (during blockdev-del) releases the last dup. So teardown only
+ * needs to fire blockdev-del. */
+static void vmspawn_qmp_block_device_teardown(QmpClient *client, const char *qmp_node_name, BlockDeviceStateFlags stages) {
+        assert(client);
+        assert(qmp_node_name);
+
+        if (!FLAGS_SET(stages, BLOCK_DEVICE_STATE_BLOCKDEV_ADDED))
+                return;
+
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+        if (sd_json_buildo(&args, SD_JSON_BUILD_PAIR_STRING("node-name", qmp_node_name)) >= 0)
+                (void) qmp_client_invoke(client, /* ret_slot= */ NULL, "blockdev-del", QMP_CLIENT_ARGS(args),
+                                         on_qmp_complete, (void*) "teardown blockdev-del");
+}
+
+/* Insert into the owning primary map and the non-owning qmp_device_id view. On
+ * secondary-put failure, roll back the primary so neither map carries a stale entry. */
+static int bridge_register_drive(VmspawnQmpBridge *b, DriveInfo *d) {
+        int r;
+
+        assert(b);
+        assert(d);
+        assert(d->id);
+        assert(d->qmp_device_id);
+
+        r = hashmap_ensure_put(&b->block_devices, &block_devices_hash_ops,
+                               d->id, drive_info_ref(d));
+        if (r < 0) {
+                drive_info_unref(d);
+                return r;
+        }
+
+        r = hashmap_ensure_put(&b->block_devices_by_qmp_id, &string_hash_ops,
+                               d->qmp_device_id, d);
+        if (r < 0) {
+                drive_info_unref(hashmap_remove(b->block_devices, d->id));
+                return r;
+        }
+
+        return 0;
+}
+
+/* Drop the drive from both maps; returns the pointer removed from the primary
+ * (NULL if it wasn't there) so the caller can decide whether to unref. */
+static DriveInfo* bridge_unregister_drive(VmspawnQmpBridge *b, DriveInfo *d) {
+        assert(b);
+        assert(d);
+
+        hashmap_remove_value(b->block_devices_by_qmp_id, d->qmp_device_id, d);
+        return hashmap_remove_value(b->block_devices, d->id, d);
+}
+
+/* First-error entry point: marks FAILED so cascading callbacks no-op, drops
+ * the registry slot, then replies on the link or exits the loop. */
+static int drive_info_add_fail(DriveInfo *d, int error, const char *error_desc) {
+        assert(d);
+
+        if (FLAGS_SET(d->state, BLOCK_DEVICE_STATE_ADD_FAILED))
+                return 0;
+
+        vmspawn_qmp_block_device_teardown(d->bridge->qmp, d->qmp_node_name, d->state);
+        d->state = BLOCK_DEVICE_STATE_ADD_FAILED;
+
+        if (bridge_unregister_drive(d->bridge, d))
+                drive_info_unref(d);
+
+        if (d->link) {
+                (void) reply_qmp_error(d->link, error_desc, error);
+                d->link = sd_varlink_unref(d->link);
+                return 0;
+        }
+
+        log_error_errno(error, "Block device '%s' setup failed: %s",
+                        strna(d->id), strna(error_desc));
+
+        /* Boot-time (link == NULL) is always fatal — even for late-arriving ephemeral replies. */
+        return sd_event_exit(qmp_client_get_event(d->bridge->qmp), error);
+}
+
+/* Rolls back the up-front registry insert on a sync error path. */
+static void drive_info_unregister_on_failurep(DriveInfo **dp) {
+        assert(dp);
+
+        DriveInfo *d = *dp;
+        if (!d)
+                return;
+        d->state |= BLOCK_DEVICE_STATE_ADD_FAILED;
+        if (bridge_unregister_drive(d->bridge, d))
+                drive_info_unref(d);
+}
+
+/* Shared by the intermediate stages that don't need to record a rollback bit
+ * (add-fd, remove-fd). Just forwards errors to drive_info_add_fail so cascades
+ * from earlier stage failures get suppressed via the FAILED sentinel. */
+static int on_add_observe_stage(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(drive_info_unrefp) DriveInfo *d = ASSERT_PTR(userdata);
+        assert(client);
+
+        if (error < 0)
+                return drive_info_add_fail(d, error, error_desc);
+        return 0;
+}
+
+static int on_add_blockdev_stage(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(drive_info_unrefp) DriveInfo *d = ASSERT_PTR(userdata);
+        assert(client);
+
+        if (error < 0)
+                return drive_info_add_fail(d, error, error_desc);
+
+        /* A sync error after blockdev-add was queued may have marked the chain FAILED.
+         * The blockdev node we just created is orphaned — tear it down retroactively
+         * and don't claim BLOCKDEV_ADDED on a drive that's already been unregistered. */
+        if (FLAGS_SET(d->state, BLOCK_DEVICE_STATE_ADD_FAILED)) {
+                vmspawn_qmp_block_device_teardown(d->bridge->qmp, d->qmp_node_name,
+                                                  BLOCK_DEVICE_STATE_BLOCKDEV_ADDED);
+                return 0;
+        }
+
+        d->state |= BLOCK_DEVICE_STATE_BLOCKDEV_ADDED;
+        return 0;
+}
+
+static int on_add_device_add_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(drive_info_unrefp) DriveInfo *d = ASSERT_PTR(userdata);
+
+        assert(client);
+
+        if (error < 0)
+                return drive_info_add_fail(d, error, error_desc);
+
+        if (FLAGS_SET(d->state, BLOCK_DEVICE_STATE_ADD_FAILED))
+                return 0;
+
+        if (d->link) {
+                (void) sd_varlink_replybo(d->link, SD_JSON_BUILD_PAIR_STRING("id", d->id));
+                d->link = sd_varlink_unref(d->link);
+        }
+
+        log_info("Block device '%s' attached", d->id);
+        return 0;
+}
+
+static int on_scsi_controller_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        assert(client);
+
+        VmspawnQmpBridge *bridge = ASSERT_PTR(qmp_client_get_userdata(client));
+
+        if (error < 0) {
+                /* QEMU's device_add is transactional — on error it calls object_unparent()
+                 * before replying, so the "vmspawn_scsi" id is free for the next retry. */
+                vmspawn_qmp_bridge_release_pcie_port_by_idx(bridge, bridge->scsi_controller_port_idx);
+                bridge->scsi_controller_port_idx = -1;
+                bridge->scsi_controller_created = false;
+                log_warning("virtio-scsi-pci controller setup failed: %s", strna(error_desc));
+                if (!bridge->setup_done)
+                        return sd_event_exit(qmp_client_get_event(client), error);
+        }
+
+        return 0;
+}
+
+static int qmp_setup_scsi_controller(VmspawnQmpBridge *bridge, const char *pcie_port) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+        int r;
+
+        assert(bridge);
+
+        r = sd_json_buildo(
+                        &args,
+                        SD_JSON_BUILD_PAIR_STRING("driver", "virtio-scsi-pci"),
+                        SD_JSON_BUILD_PAIR_STRING("id", "vmspawn_scsi"),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!pcie_port, "bus", SD_JSON_BUILD_STRING(pcie_port)));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build SCSI controller JSON: %m");
+
+        r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(args),
+                              on_scsi_controller_complete, NULL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to send SCSI controller device_add: %m");
+
+        log_debug("Queued virtio-scsi-pci controller setup");
+        return 0;
+}
+
+static int vmspawn_qmp_add_block_device(VmspawnQmpBridge *bridge, DriveInfo *drive) {
         int r;
 
         assert(bridge);
-        assert(qmp);
         assert(drive);
+        assert(drive->format);
+        assert(drive->disk_driver);
         assert(drive->fd >= 0);
 
-        uint64_t counter = bridge->next_block_counter++;
-        _cleanup_free_ char *file_node_name = NULL;
-        if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", counter) < 0 ||
-            asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", counter) < 0 ||
-            asprintf(&file_node_name, "vmspawn-%" PRIu64 "-file", counter) < 0)
+        _unused_ _cleanup_(drive_info_unrefp) DriveInfo *owned = drive;
+        _cleanup_(drive_info_unrefp) DriveInfo *slot_ref = NULL;
+        _cleanup_(drive_info_unregister_on_failurep) DriveInfo *registered = NULL;
+
+        drive->bridge = bridge;
+        drive->counter = bridge->next_block_counter++;
+        if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", drive->counter) < 0)
                 return log_oom();
+        if (asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", drive->counter) < 0)
+                return log_oom();
+        /* Auto-assigned user ids reuse qmp_device_id. */
+        if (!drive->id) {
+                drive->id = strdup(drive->qmp_device_id);
+                if (!drive->id)
+                        return log_oom();
+        }
 
-        _cleanup_free_ char *fdset_path = NULL;
-        uint64_t fdset_id;
-        r = qmp_fdset_add(qmp, TAKE_FD(drive->fd),
-                          on_qmp_complete, (void*) "add-fd", &fdset_path, &fdset_id);
+        /* Reserve the registry slot up-front so the device_add callback's commit can't fail. */
+        r = bridge_register_drive(bridge, drive);
         if (r < 0)
-                return log_error_errno(r, "Failed to send add-fd for '%s': %m", drive->path);
+                return r;
+        registered = drive;
+
+        /* First SCSI hotplug needs a virtio-scsi-pci controller to attach to. */
+        if (STR_IN_SET(drive->disk_driver, "scsi-hd", "scsi-cd") && !bridge->scsi_controller_created) {
+                _cleanup_free_ char *controller_port = NULL;
+                int controller_port_idx = -1;
+                if (ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS) {
+                        r = vmspawn_qmp_bridge_allocate_pcie_port(bridge, "vmspawn_scsi",
+                                                                  &controller_port, &controller_port_idx);
+                        if (r == -EBUSY)
+                                return log_error_errno(r, "No PCIe hotplug ports left for SCSI controller");
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to allocate PCIe hotplug port for SCSI controller: %m");
+                }
+
+                r = qmp_setup_scsi_controller(bridge, controller_port);
+                if (r < 0) {
+                        vmspawn_qmp_bridge_release_pcie_port_by_idx(bridge, controller_port_idx);
+                        return r;
+                }
 
-        QmpFileNodeParams file_params = {
-                .node_name = file_node_name,
-                .filename  = fdset_path,
-                .driver    = FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE) ? "host_device" : "file",
-                .flags     = drive->flags & (QMP_DRIVE_READ_ONLY|QMP_DRIVE_NO_FLUSH),
-        };
-        if (FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_IO_URING))
-                file_params.flags |= QMP_DRIVE_IO_URING;
-        r = qmp_add_file_node(qmp, &file_params);
+                /* Set before the reply so a second SCSI hotplug queued in the meantime
+                 * doesn't re-create the controller; reset in on_scsi_controller_complete on error. */
+                bridge->scsi_controller_port_idx = controller_port_idx;
+                bridge->scsi_controller_created = true;
+        }
+
+        uint64_t fdset_id;
+        slot_ref = drive_info_ref(drive);
+        r = qmp_fdset_add(bridge->qmp, TAKE_FD(drive->fd),
+                          on_add_observe_stage, slot_ref, &drive->fdset_path, &fdset_id);
         if (r < 0)
-                return log_error_errno(r, "Failed to send blockdev-add for '%s': %m", drive->path);
+                return r;
+        TAKE_PTR(slot_ref);
 
-        /* The file node now holds a dup of the fd; release the monitor's
-         * original so the fdset auto-frees when raw_close runs at teardown. */
-        r = qmp_fdset_remove(qmp, fdset_id, on_qmp_complete, (void*) "remove-fd");
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *blockdev_args = NULL;
+        r = qmp_build_blockdev_add_inline(
+                        drive->qmp_node_name, drive->format, drive->fdset_path,
+                        FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE) ? "host_device" : "file",
+                        drive->flags, bridge->features, &blockdev_args);
         if (r < 0)
-                return log_error_errno(r, "Failed to send remove-fd for '%s': %m", drive->path);
+                return r;
 
-        QmpFormatNodeParams fmt_params = {
-                .node_name      = drive->qmp_node_name,
-                .format         = drive->format,
-                .file_node_name = file_node_name,
-                .flags          = drive->flags & (QMP_DRIVE_READ_ONLY|QMP_DRIVE_DISCARD),
-        };
-        _cleanup_(sd_json_variant_unrefp) sd_json_variant *fmt_args = NULL;
-        r = qmp_build_blockdev_add_format(&fmt_params, &fmt_args);
+        slot_ref = drive_info_ref(drive);
+        r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(blockdev_args),
+                              on_add_blockdev_stage, slot_ref);
         if (r < 0)
                 return r;
+        TAKE_PTR(slot_ref);
 
-        r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(fmt_args), on_qmp_complete, (void*) "blockdev-add");
+        /* Release the monitor's original fd; the blockdev-add above took a dup. */
+        slot_ref = drive_info_ref(drive);
+        r = qmp_fdset_remove(bridge->qmp, fdset_id, on_add_observe_stage, slot_ref);
         if (r < 0)
-                return log_error_errno(r, "Failed to send blockdev-add format for '%s': %m", drive->path);
+                return r;
+        TAKE_PTR(slot_ref);
 
-        /* device_add: attach to virtual hardware */
         _cleanup_(sd_json_variant_unrefp) sd_json_variant *device_args = NULL;
         r = qmp_build_device_add(drive, &device_args);
         if (r < 0)
                 return r;
 
-        r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_complete, (void*) "device_add");
+        slot_ref = drive_info_ref(drive);
+        r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args),
+                              on_add_device_add_complete, slot_ref);
         if (r < 0)
-                return log_error_errno(r, "Failed to send device_add for '%s': %m", drive->path);
+                return r;
+        TAKE_PTR(slot_ref);
 
-        log_debug("Queued drive setup for '%s'", drive->path);
+        TAKE_PTR(registered);
         return 0;
 }
 
-/* Configure a single drive via QMP. Dispatches to ephemeral or regular setup. */
-static int qmp_setup_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
+static int qmp_setup_regular_drive(VmspawnQmpBridge *bridge, DriveInfo *drive) {
+        assert(bridge);
         assert(drive);
+        assert(drive->fd >= 0);
+        assert(!drive->id);
 
-        if (drive->overlay_fd >= 0)
-                return qmp_setup_ephemeral_drive(bridge, qmp, drive);
-
-        return qmp_setup_regular_drive(bridge, qmp, drive);
+        return vmspawn_qmp_add_block_device(bridge, drive);
 }
 
 int vmspawn_qmp_setup_network(VmspawnQmpBridge *bridge, NetworkInfo *network) {
@@ -830,36 +1175,6 @@ int vmspawn_qmp_setup_vsock(VmspawnQmpBridge *bridge, VsockInfo *vsock) {
         return 0;
 }
 
-static bool drives_need_scsi_controller(DriveInfos *drives) {
-        assert(drives);
-
-        FOREACH_ARRAY(d, drives->drives, drives->n_drives)
-                if (STR_IN_SET((*d)->disk_driver, "scsi-hd", "scsi-cd"))
-                        return true;
-
-        return false;
-}
-
-static int qmp_setup_scsi_controller(QmpClient *qmp, const char *pcie_port) {
-        _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
-        int r;
-
-        r = sd_json_buildo(
-                        &args,
-                        SD_JSON_BUILD_PAIR_STRING("driver", "virtio-scsi-pci"),
-                        SD_JSON_BUILD_PAIR_STRING("id", "vmspawn_scsi"),
-                        SD_JSON_BUILD_PAIR_CONDITION(!!pcie_port, "bus", SD_JSON_BUILD_STRING(pcie_port)));
-        if (r < 0)
-                return log_error_errno(r, "Failed to build SCSI controller JSON: %m");
-
-        r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(args), on_qmp_complete, (void*) "device_add");
-        if (r < 0)
-                return log_error_errno(r, "Failed to send SCSI controller device_add: %m");
-
-        log_debug("Queued virtio-scsi-pci controller setup");
-        return 0;
-}
-
 int vmspawn_qmp_setup_drives(VmspawnQmpBridge *bridge, DriveInfos *drives) {
         int r;
 
@@ -869,16 +1184,15 @@ int vmspawn_qmp_setup_drives(VmspawnQmpBridge *bridge, DriveInfos *drives) {
         QmpClient *qmp = vmspawn_qmp_bridge_get_qmp(bridge);
 
         /* io_uring support was probed during vmspawn_qmp_init(). The cached result in
-         * bridge->features is passed to each file node setup call. */
-
-        if (drives_need_scsi_controller(drives)) {
-                r = qmp_setup_scsi_controller(qmp, drives->scsi_pcie_port);
-                if (r < 0)
-                        return r;
-        }
+         * bridge->features is passed to each file node setup call. SCSI controller
+         * creation is handled on-demand by vmspawn_qmp_add_block_device() for the first
+         * SCSI drive, using the hotplug-spares pool. */
 
         FOREACH_ARRAY(d, drives->drives, drives->n_drives) {
-                r = qmp_setup_drive(bridge, qmp, *d);
+                if ((*d)->overlay_fd >= 0)
+                        r = qmp_setup_ephemeral_drive(bridge, qmp, *d);
+                else
+                        r = qmp_setup_regular_drive(bridge, TAKE_PTR(*d));
                 if (r < 0)
                         return r;
         }
@@ -898,9 +1212,16 @@ VmspawnQmpBridge* vmspawn_qmp_bridge_free(VmspawnQmpBridge *b) {
         if (!b)
                 return NULL;
 
+        /* Unref first: pending QMP callbacks may release hotplug ports through the bridge. */
+        qmp_client_unref(b->qmp);
+
+        hashmap_free(b->block_devices_by_qmp_id);
+        hashmap_free(b->block_devices);
         hashmap_free(b->pending_jobs);
 
-        qmp_client_unref(b->qmp);
+        FOREACH_ELEMENT(owner, b->hotplug_port_owner)
+                free(*owner);
+
         return mfree(b);
 }
 
@@ -1067,6 +1388,8 @@ int vmspawn_qmp_init(VmspawnQmpBridge **ret, int fd, sd_event *event) {
         if (!bridge)
                 return log_oom();
 
+        bridge->scsi_controller_port_idx = -1;
+
         r = qmp_client_connect_fd(&bridge->qmp, fd);
         if (r < 0)
                 return log_error_errno(r, "Failed to create QMP client: %m");
index a58dfae3beb63f5298fadaad6137048dfd80960c..f1407c6f2f5b156d24df00ba7f68ca2656923038 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <net/ethernet.h>
 
+#include "machine-util.h"
 #include "shared-forward.h"
 
 #define VMSPAWN_PCIE_HOTPLUG_SPARES 10
@@ -28,10 +29,15 @@ typedef enum VmspawnQmpFeatureFlags {
 
 typedef struct VmspawnQmpBridge {
         QmpClient *qmp;
-        Hashmap *pending_jobs;        /* blockdev-create continuations */
-        uint64_t next_block_counter;  /* monotonic counter feeding internal QMP names (vmspawn-<N>-*) */
+        Hashmap *pending_jobs;         /* blockdev-create continuations */
+        Hashmap *block_devices;        /* user_id (char*) → DriveInfo* (owned ref) */
+        Hashmap *block_devices_by_qmp_id; /* qmp_device_id (char*) → DriveInfo* (non-owning view) */
+        char *hotplug_port_owner[VMSPAWN_PCIE_HOTPLUG_SPARES];  /* owner id per port; NULL = free */
+        int scsi_controller_port_idx;  /* hotplug port idx taken by virtio-scsi-pci, -1 if none */
+        uint64_t next_block_counter;   /* monotonic counter feeding internal QMP names (vmspawn-<N>-*) */
         VmspawnQmpFeatureFlags features;
         bool setup_done;
+        bool scsi_controller_created;  /* virtio-scsi-pci has been device_add'd */
 } VmspawnQmpBridge;
 
 VmspawnQmpBridge* vmspawn_qmp_bridge_free(VmspawnQmpBridge *b);
@@ -68,21 +74,39 @@ typedef enum QmpDriveFlags {
         QMP_DRIVE_DISCARD_NO_UNREF = 1u << 6,  /* qcow2 only */
 } QmpDriveFlags;
 
-/* Drive info for QMP-based drive setup. All string fields are owned.
- * Each DriveInfo is individually heap-allocated so it can be handed off
- * to the block device registry via TAKE_PTR. */
+typedef enum BlockDeviceStateFlags {
+        BLOCK_DEVICE_STATE_BLOCKDEV_ADDED = 1u << 0,
+        BLOCK_DEVICE_STATE_ADD_FAILED     = 1u << 1,  /* first error fired; suppress cascades */
+} BlockDeviceStateFlags;
+
+/* Ref-counted; each of the four add-stage QMP slots holds one ref.
+ *
+ * link == NULL → boot-time: failure calls sd_event_exit (if !setup_done).
+ * link != NULL → hotplug:    failure replies via the varlink link. */
 typedef struct DriveInfo {
         unsigned n_ref;
+
+        /* Config */
         char *path;                /* original path (for logging; not passed to QEMU) */
         char *format;              /* "raw" or "qcow2" */
         char *disk_driver;         /* "virtio-blk-pci", "scsi-hd", "scsi-cd", "nvme" */
         char *serial;
-        char *qmp_node_name;       /* "vmspawn-<N>-storage" */
-        char *qmp_device_id;       /* "vmspawn-<N>-disk" */
         char *pcie_port;           /* pcie-root-port id for device_add bus (NULL on non-PCIe) */
         int fd;                    /* pre-opened image fd (-EBADF if unused) */
         int overlay_fd;            /* pre-opened anonymous overlay fd for ephemeral (-EBADF if unused) */
         QmpDriveFlags flags;
+
+        /* Per-add-op state (populated by the add flow; zeroed at CLI-parse time) */
+        VmspawnQmpBridge *bridge;  /* weak */
+        char *id;                  /* varlink-visible id (caller-supplied, or falls back to qmp_device_id) */
+        DiskType disk_type;        /* for the ListBlockDevices `driver` field */
+        uint64_t counter;          /* internal N used in qmp_node_name / qmp_device_id */
+        char *qmp_node_name;       /* "vmspawn-<counter>-storage" */
+        char *qmp_device_id;       /* "vmspawn-<counter>-disk" */
+        char *fdset_path;          /* "/dev/fdset/N" */
+        int pcie_port_idx;         /* hotplug port idx held by this drive; -1 once committed or unused */
+        BlockDeviceStateFlags state;
+        sd_varlink *link;          /* ref'd iff hotplug */
 } DriveInfo;
 
 DriveInfo* drive_info_new(void);
@@ -92,7 +116,6 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(DriveInfo *, drive_info_unref);
 typedef struct DriveInfos {
         DriveInfo **drives;     /* array of individually heap-allocated entries */
         size_t n_drives;
-        char *scsi_pcie_port;  /* owned: pcie-root-port id for SCSI controller (NULL if no SCSI or non-PCIe) */
 } DriveInfos;
 
 void drive_infos_done(DriveInfos *infos);
index 3f99d0547cdb7f58f4c72223317766cf88f9ab84..1a349a1d634f34f39f3b45c1b5824ca62e905fc4 100644 (file)
@@ -2440,19 +2440,15 @@ static int assign_pcie_ports(MachineConfig *c) {
 
         size_t port = 0;
 
-        /* Drives: non-SCSI drives get individual ports, SCSI controller gets one port */
-        bool need_scsi = false;
+        /* Non-SCSI drives get individual ports. SCSI controllers (if any) allocate
+         * from the hotplug-spares pool on demand at device-add time. */
         FOREACH_ARRAY(d, drives->drives, drives->n_drives) {
-                if (STR_IN_SET((*d)->disk_driver, "scsi-hd", "scsi-cd")) {
-                        need_scsi = true;
+                DriveInfo *drive = *d;
+                if (STR_IN_SET(drive->disk_driver, "scsi-hd", "scsi-cd"))
                         continue;
-                }
-                if (asprintf(&(*d)->pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
+                if (asprintf(&drive->pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
                         return log_oom();
         }
-        if (need_scsi)
-                if (asprintf(&drives->scsi_pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
-                        return log_oom();
 
         if (network->type)
                 if (asprintf(&network->pcie_port, "vmspawn-pcieport-%zu", port++) < 0)