#include "sd-event.h"
#include "sd-json.h"
+#include "sd-varlink.h"
#include "alloc-util.h"
#include "blockdev-util.h"
+#include "errno-util.h"
#include "ether-addr-util.h"
#include "fd-util.h"
#include "hashmap.h"
#include "string-util.h"
#include "strv.h"
#include "vmspawn-qmp.h"
+#include "vmspawn-util.h"
DEFINE_PRIVATE_HASH_OPS_FULL(
pending_job_hash_ops,
char, string_hash_func, string_compare_func, free,
PendingJob, pending_job_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ block_devices_hash_ops,
+ char, string_hash_func, string_compare_func,
+ DriveInfo, drive_info_unref);
+
DriveInfo* drive_info_new(void) {
DriveInfo *d = new(DriveInfo, 1);
if (!d)
.n_ref = 1,
.fd = -EBADF,
.overlay_fd = -EBADF,
+ .pcie_port_idx = -1,
};
return d;
}
+static int vmspawn_qmp_bridge_allocate_pcie_port(
+ VmspawnQmpBridge *bridge,
+ const char *owner_id,
+ char **ret_name,
+ int *ret_idx) {
+
+ assert(bridge);
+ assert(owner_id);
+ assert(ret_name);
+ assert(ret_idx);
+
+ for (int i = 0; i < VMSPAWN_PCIE_HOTPLUG_SPARES; i++) {
+ if (bridge->hotplug_port_owner[i])
+ continue;
+
+ _cleanup_free_ char *owner = strdup(owner_id), *name = NULL;
+ if (!owner || asprintf(&name, "vmspawn-hotplug-pci-root-port-%d", i) < 0)
+ return -ENOMEM;
+
+ bridge->hotplug_port_owner[i] = TAKE_PTR(owner);
+ *ret_name = TAKE_PTR(name);
+ *ret_idx = i;
+ return 0;
+ }
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "No free PCIe hotplug port available for owner '%s'.",
+ owner_id);
+}
+
+static void vmspawn_qmp_bridge_release_pcie_port_by_idx(VmspawnQmpBridge *bridge, int idx) {
+ assert(bridge);
+
+ if (idx < 0)
+ return;
+
+ assert(idx < VMSPAWN_PCIE_HOTPLUG_SPARES);
+
+ bridge->hotplug_port_owner[idx] = mfree(bridge->hotplug_port_owner[idx]);
+}
+
static DriveInfo* drive_info_free(DriveInfo *d) {
assert(d);
+ if (d->bridge)
+ vmspawn_qmp_bridge_release_pcie_port_by_idx(d->bridge, d->pcie_port_idx);
+
free(d->path);
free(d->format);
free(d->disk_driver);
free(d->serial);
+ free(d->pcie_port);
+ free(d->id);
free(d->qmp_node_name);
free(d->qmp_device_id);
- free(d->pcie_port);
+ free(d->fdset_path);
+ sd_varlink_unref(d->link);
safe_close(d->fd);
safe_close(d->overlay_fd);
return mfree(d);
drive_info_unref(*d);
infos->drives = mfree(infos->drives);
infos->n_drives = 0;
- infos->scsi_pcie_port = mfree(infos->scsi_pcie_port);
}
void network_info_done(NetworkInfo *info) {
"bus", SD_JSON_BUILD_STRING(drive->pcie_port)));
}
+/* Inline form: one blockdev-add creates format+file; one blockdev-del tears
+ * down the whole tree. Used for regular boot drives and hotplug. */
+static int qmp_build_blockdev_add_inline(
+ const char *node_name,
+ const char *format,
+ const char *filename,
+ const char *file_driver,
+ QmpDriveFlags flags,
+ VmspawnQmpFeatureFlags features,
+ sd_json_variant **ret) {
+
+ bool use_io_uring = FLAGS_SET(features, VMSPAWN_QMP_FEATURE_IO_URING);
+ bool use_discard_no_unref = FLAGS_SET(flags, QMP_DRIVE_DISCARD_NO_UNREF);
+
+ assert(node_name);
+ assert(format);
+ assert(filename);
+ assert(file_driver);
+ assert(ret);
+
+ return sd_json_buildo(
+ ret,
+ SD_JSON_BUILD_PAIR_STRING("node-name", node_name),
+ SD_JSON_BUILD_PAIR_STRING("driver", format),
+ SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(flags, QMP_DRIVE_READ_ONLY), "read-only", SD_JSON_BUILD_BOOLEAN(true)),
+ SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(flags, QMP_DRIVE_DISCARD), "discard", JSON_BUILD_CONST_STRING("unmap")),
+ SD_JSON_BUILD_PAIR_CONDITION(use_discard_no_unref, "discard-no-unref", SD_JSON_BUILD_BOOLEAN(true)),
+ SD_JSON_BUILD_PAIR("file", SD_JSON_BUILD_OBJECT(
+ SD_JSON_BUILD_PAIR_STRING("driver", file_driver),
+ SD_JSON_BUILD_PAIR_STRING("filename", filename),
+ SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(flags, QMP_DRIVE_READ_ONLY), "read-only", SD_JSON_BUILD_BOOLEAN(true)),
+ SD_JSON_BUILD_PAIR_CONDITION(use_io_uring, "aio", JSON_BUILD_CONST_STRING("io_uring")),
+ SD_JSON_BUILD_PAIR("cache", SD_JSON_BUILD_OBJECT(
+ SD_JSON_BUILD_PAIR_BOOLEAN("direct", false),
+ SD_JSON_BUILD_PAIR_BOOLEAN("no-flush", FLAGS_SET(flags, QMP_DRIVE_NO_FLUSH)))))));
+}
+
/* Issue blockdev-add for a file node. */
static int qmp_add_file_node(QmpClient *qmp, const QmpFileNodeParams *p) {
_cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Unsupported image format '%s'", format);
}
+/* Forward declarations — on_ephemeral_create_concluded routes failures through
+ * the shared block-device add callbacks defined further below. */
+static int drive_info_add_fail(DriveInfo *d, int error, const char *error_desc);
+static int on_add_blockdev_stage(QmpClient *client, sd_json_variant *result,
+ const char *error_desc, int error, void *userdata);
+static int on_add_device_add_complete(QmpClient *client, sd_json_variant *result,
+ const char *error_desc, int error, void *userdata);
+
/* Continuation state for on_ephemeral_create_concluded: overlay format + device_add. */
typedef struct EphemeralDriveCtx {
- char *qmp_node_name; /* overlay format node name, "vmspawn-<N>-storage" */
- char *qmp_device_id; /* qdev id, "vmspawn-<N>-disk" */
+ DriveInfo *drive; /* ref */
char *overlay_file_node;
char *base_fmt_node;
- char *disk_driver;
- char *serial;
- char *pcie_port; /* NULL on non-PCIe */
- QmpDriveFlags flags; /* subset forwarded to device_add */
} EphemeralDriveCtx;
static EphemeralDriveCtx* ephemeral_drive_ctx_free(EphemeralDriveCtx *ctx) {
if (!ctx)
return NULL;
- free(ctx->qmp_node_name);
- free(ctx->qmp_device_id);
+ drive_info_unref(ctx->drive);
free(ctx->overlay_file_node);
free(ctx->base_fmt_node);
- free(ctx->disk_driver);
- free(ctx->serial);
- free(ctx->pcie_port);
return mfree(ctx);
}
static int on_ephemeral_create_concluded(QmpClient *qmp, void *userdata) {
_cleanup_(ephemeral_drive_ctx_freep) EphemeralDriveCtx *ctx = ASSERT_PTR(userdata);
_cleanup_(sd_json_variant_unrefp) sd_json_variant *fmt_args = NULL, *device_args = NULL;
+ _cleanup_(drive_info_unrefp) DriveInfo *slot_ref = NULL;
+ DriveInfo *drive = ctx->drive;
int r;
+ assert(qmp);
+
/* Open formatted overlay as qcow2 with backing reference */
QmpFormatNodeParams overlay_fmt_params = {
- .node_name = ctx->qmp_node_name,
+ .node_name = drive->qmp_node_name,
.format = "qcow2",
.file_node_name = ctx->overlay_file_node,
.backing = ctx->base_fmt_node,
- .flags = ctx->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_DISCARD_NO_UNREF),
+ .flags = drive->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_DISCARD_NO_UNREF),
};
r = qmp_build_blockdev_add_format(&overlay_fmt_params, &fmt_args);
if (r < 0)
- return log_error_errno(r, "Failed to build overlay format JSON for '%s': %m", ctx->qmp_node_name);
+ return drive_info_add_fail(drive, r, NULL);
- r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(fmt_args), on_qmp_complete, (void*) "blockdev-add");
+ slot_ref = drive_info_ref(drive);
+ r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(fmt_args),
+ on_add_blockdev_stage, slot_ref);
if (r < 0)
- return r;
+ return drive_info_add_fail(drive, r, NULL);
+ TAKE_PTR(slot_ref);
- /* Temporary DriveInfo view so we can reuse qmp_build_device_add(). */
- const DriveInfo tmp = {
- .disk_driver = ctx->disk_driver,
- .serial = ctx->serial,
- .pcie_port = ctx->pcie_port,
- .flags = ctx->flags & QMP_DRIVE_BOOT,
- .fd = -EBADF,
- .overlay_fd = -EBADF,
- .qmp_node_name = ctx->qmp_node_name,
- .qmp_device_id = ctx->qmp_device_id,
- };
- r = qmp_build_device_add(&tmp, &device_args);
+ r = qmp_build_device_add(drive, &device_args);
if (r < 0)
- return log_error_errno(r, "Failed to build device_add JSON for '%s': %m", ctx->qmp_device_id);
+ return drive_info_add_fail(drive, r, NULL);
- r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_complete, (void*) "device_add");
+ slot_ref = drive_info_ref(drive);
+ r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args),
+ on_add_device_add_complete, slot_ref);
if (r < 0)
- return r;
+ return drive_info_add_fail(drive, r, NULL);
+ TAKE_PTR(slot_ref);
- log_debug("Queued ephemeral drive completion for '%s'", ctx->qmp_device_id);
+ log_debug("Queued ephemeral drive completion for '%s'", drive->qmp_device_id);
return 0;
}
-/* Set up an ephemeral drive: base image (read-only) + anonymous qcow2 overlay (read-write).
- * The final steps (overlay format + device_add) are deferred to a job continuation that
- * fires when the blockdev-create job concludes. */
+/* Base image (read-only) + anonymous qcow2 overlay (read-write). Overlay format
+ * and device_add run from the blockdev-create continuation. */
static int qmp_setup_ephemeral_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
int r;
assert(drive->fd >= 0);
assert(drive->overlay_fd >= 0);
- uint64_t counter = bridge->next_block_counter++;
+ drive->bridge = bridge;
+ drive->counter = bridge->next_block_counter++;
_cleanup_free_ char *base_file_node = NULL, *base_fmt_node = NULL, *overlay_file_node = NULL;
- if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", counter) < 0 ||
- asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", counter) < 0 ||
- asprintf(&base_file_node, "vmspawn-%" PRIu64 "-base-file", counter) < 0 ||
- asprintf(&base_fmt_node, "vmspawn-%" PRIu64 "-base-fmt", counter) < 0 ||
- asprintf(&overlay_file_node, "vmspawn-%" PRIu64 "-overlay-file", counter) < 0)
+ if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", drive->counter) < 0 ||
+ asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", drive->counter) < 0 ||
+ asprintf(&base_file_node, "vmspawn-%" PRIu64 "-base-file", drive->counter) < 0 ||
+ asprintf(&base_fmt_node, "vmspawn-%" PRIu64 "-base-fmt", drive->counter) < 0 ||
+ asprintf(&overlay_file_node, "vmspawn-%" PRIu64 "-overlay-file", drive->counter) < 0)
return log_oom();
+ /* Auto-assigned user id reuses qmp_device_id (matching vmspawn_qmp_add_block_device). */
+ if (!drive->id) {
+ drive->id = strdup(drive->qmp_device_id);
+ if (!drive->id)
+ return log_oom();
+ }
+
/* Read virtual size before passing the fd to QEMU (TAKE_FD consumes it) */
uint64_t virtual_size;
r = get_image_virtual_size(drive->fd, drive->format, FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE), &virtual_size);
return log_error_errno(r, "Failed to build blockdev-create options: %m");
_cleanup_free_ char *job_id = NULL;
- if (asprintf(&job_id, "vmspawn-%" PRIu64 "-overlay-create", counter) < 0)
+ if (asprintf(&job_id, "vmspawn-%" PRIu64 "-overlay-create", drive->counter) < 0)
return log_oom();
_cleanup_(sd_json_variant_unrefp) sd_json_variant *cmd_args = NULL;
if (r < 0)
return log_error_errno(r, "Failed to build blockdev-create JSON: %m");
+ /* Fold DISCARD_NO_UNREF into drive->flags so the continuation's overlay format blockdev-add
+ * picks it up via drive->flags. */
+ if (FLAGS_SET(drive->flags, QMP_DRIVE_DISCARD) &&
+ FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_DISCARD_NO_UNREF))
+ drive->flags |= QMP_DRIVE_DISCARD_NO_UNREF;
+
/* Register continuation: when the job concludes, fire overlay format + device_add */
_cleanup_(ephemeral_drive_ctx_freep) EphemeralDriveCtx *ectx = new(EphemeralDriveCtx, 1);
if (!ectx)
return log_oom();
- QmpDriveFlags ectx_flags = drive->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_BOOT);
- if (FLAGS_SET(drive->flags, QMP_DRIVE_DISCARD) &&
- FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_DISCARD_NO_UNREF))
- ectx_flags |= QMP_DRIVE_DISCARD_NO_UNREF;
-
*ectx = (EphemeralDriveCtx) {
- .qmp_node_name = strdup(drive->qmp_node_name),
- .qmp_device_id = strdup(drive->qmp_device_id),
+ .drive = drive_info_ref(drive),
.overlay_file_node = strdup(overlay_file_node),
.base_fmt_node = strdup(base_fmt_node),
- .disk_driver = strdup(drive->disk_driver),
- .serial = drive->serial ? strdup(drive->serial) : NULL,
- .pcie_port = drive->pcie_port ? strdup(drive->pcie_port) : NULL,
- .flags = ectx_flags,
};
- if (!ectx->qmp_node_name || !ectx->qmp_device_id || !ectx->overlay_file_node ||
- !ectx->base_fmt_node || !ectx->disk_driver ||
- (drive->serial && !ectx->serial) || (drive->pcie_port && !ectx->pcie_port))
+ if (!ectx->overlay_file_node || !ectx->base_fmt_node)
return log_oom();
r = vmspawn_qmp_bridge_register_job(bridge, job_id,
TAKE_PTR(ectx);
r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-create", QMP_CLIENT_ARGS(cmd_args), on_qmp_complete, (void*) "blockdev-create");
- if (r < 0)
+ if (r < 0) {
+ _unused_ _cleanup_(pending_job_freep) PendingJob *dead = hashmap_remove(bridge->pending_jobs, job_id);
return log_error_errno(r, "Failed to send blockdev-create for '%s': %m", drive->path);
+ }
log_debug("Queued ephemeral drive setup for '%s' (job %s)", drive->path, job_id);
return 0;
}
-/* Set up a regular (non-ephemeral) drive: single file node + format node + device_add. */
-static int qmp_setup_regular_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
+static int reply_qmp_error(sd_varlink *link, const char *error_desc, int error) {
+ assert(link);
+
+ if (ERRNO_IS_DISCONNECT(error))
+ return sd_varlink_error(link, "io.systemd.MachineInstance.NotConnected", NULL);
+ if (error_desc)
+ log_warning("QMP error: %s", error_desc);
+ return sd_varlink_error_errno(link, error < 0 ? error : -EIO);
+}
+
+/* After the pipelined remove-fd at add time, QEMU auto-frees the fdset when
+ * raw_close (during blockdev-del) releases the last dup. So teardown only
+ * needs to fire blockdev-del. */
+static void vmspawn_qmp_block_device_teardown(QmpClient *client, const char *qmp_node_name, BlockDeviceStateFlags stages) {
+ assert(client);
+ assert(qmp_node_name);
+
+ if (!FLAGS_SET(stages, BLOCK_DEVICE_STATE_BLOCKDEV_ADDED))
+ return;
+
+ _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+ if (sd_json_buildo(&args, SD_JSON_BUILD_PAIR_STRING("node-name", qmp_node_name)) >= 0)
+ (void) qmp_client_invoke(client, /* ret_slot= */ NULL, "blockdev-del", QMP_CLIENT_ARGS(args),
+ on_qmp_complete, (void*) "teardown blockdev-del");
+}
+
+/* Insert into the owning primary map and the non-owning qmp_device_id view. On
+ * secondary-put failure, roll back the primary so neither map carries a stale entry. */
+static int bridge_register_drive(VmspawnQmpBridge *b, DriveInfo *d) {
+ int r;
+
+ assert(b);
+ assert(d);
+ assert(d->id);
+ assert(d->qmp_device_id);
+
+ r = hashmap_ensure_put(&b->block_devices, &block_devices_hash_ops,
+ d->id, drive_info_ref(d));
+ if (r < 0) {
+ drive_info_unref(d);
+ return r;
+ }
+
+ r = hashmap_ensure_put(&b->block_devices_by_qmp_id, &string_hash_ops,
+ d->qmp_device_id, d);
+ if (r < 0) {
+ drive_info_unref(hashmap_remove(b->block_devices, d->id));
+ return r;
+ }
+
+ return 0;
+}
+
+/* Drop the drive from both maps; returns the pointer removed from the primary
+ * (NULL if it wasn't there) so the caller can decide whether to unref. */
+static DriveInfo* bridge_unregister_drive(VmspawnQmpBridge *b, DriveInfo *d) {
+ assert(b);
+ assert(d);
+
+ hashmap_remove_value(b->block_devices_by_qmp_id, d->qmp_device_id, d);
+ return hashmap_remove_value(b->block_devices, d->id, d);
+}
+
+/* First-error entry point: marks FAILED so cascading callbacks no-op, drops
+ * the registry slot, then replies on the link or exits the loop. */
+static int drive_info_add_fail(DriveInfo *d, int error, const char *error_desc) {
+ assert(d);
+
+ if (FLAGS_SET(d->state, BLOCK_DEVICE_STATE_ADD_FAILED))
+ return 0;
+
+ vmspawn_qmp_block_device_teardown(d->bridge->qmp, d->qmp_node_name, d->state);
+ d->state = BLOCK_DEVICE_STATE_ADD_FAILED;
+
+ if (bridge_unregister_drive(d->bridge, d))
+ drive_info_unref(d);
+
+ if (d->link) {
+ (void) reply_qmp_error(d->link, error_desc, error);
+ d->link = sd_varlink_unref(d->link);
+ return 0;
+ }
+
+ log_error_errno(error, "Block device '%s' setup failed: %s",
+ strna(d->id), strna(error_desc));
+
+ /* Boot-time (link == NULL) is always fatal — even for late-arriving ephemeral replies. */
+ return sd_event_exit(qmp_client_get_event(d->bridge->qmp), error);
+}
+
+/* Rolls back the up-front registry insert on a sync error path. */
+static void drive_info_unregister_on_failurep(DriveInfo **dp) {
+ assert(dp);
+
+ DriveInfo *d = *dp;
+ if (!d)
+ return;
+ d->state |= BLOCK_DEVICE_STATE_ADD_FAILED;
+ if (bridge_unregister_drive(d->bridge, d))
+ drive_info_unref(d);
+}
+
+/* Shared by the intermediate stages that don't need to record a rollback bit
+ * (add-fd, remove-fd). Just forwards errors to drive_info_add_fail so cascades
+ * from earlier stage failures get suppressed via the FAILED sentinel. */
+static int on_add_observe_stage(
+ QmpClient *client,
+ sd_json_variant *result,
+ const char *error_desc,
+ int error,
+ void *userdata) {
+
+ _cleanup_(drive_info_unrefp) DriveInfo *d = ASSERT_PTR(userdata);
+ assert(client);
+
+ if (error < 0)
+ return drive_info_add_fail(d, error, error_desc);
+ return 0;
+}
+
+static int on_add_blockdev_stage(
+ QmpClient *client,
+ sd_json_variant *result,
+ const char *error_desc,
+ int error,
+ void *userdata) {
+
+ _cleanup_(drive_info_unrefp) DriveInfo *d = ASSERT_PTR(userdata);
+ assert(client);
+
+ if (error < 0)
+ return drive_info_add_fail(d, error, error_desc);
+
+ /* A sync error after blockdev-add was queued may have marked the chain FAILED.
+ * The blockdev node we just created is orphaned — tear it down retroactively
+ * and don't claim BLOCKDEV_ADDED on a drive that's already been unregistered. */
+ if (FLAGS_SET(d->state, BLOCK_DEVICE_STATE_ADD_FAILED)) {
+ vmspawn_qmp_block_device_teardown(d->bridge->qmp, d->qmp_node_name,
+ BLOCK_DEVICE_STATE_BLOCKDEV_ADDED);
+ return 0;
+ }
+
+ d->state |= BLOCK_DEVICE_STATE_BLOCKDEV_ADDED;
+ return 0;
+}
+
+static int on_add_device_add_complete(
+ QmpClient *client,
+ sd_json_variant *result,
+ const char *error_desc,
+ int error,
+ void *userdata) {
+
+ _cleanup_(drive_info_unrefp) DriveInfo *d = ASSERT_PTR(userdata);
+
+ assert(client);
+
+ if (error < 0)
+ return drive_info_add_fail(d, error, error_desc);
+
+ if (FLAGS_SET(d->state, BLOCK_DEVICE_STATE_ADD_FAILED))
+ return 0;
+
+ if (d->link) {
+ (void) sd_varlink_replybo(d->link, SD_JSON_BUILD_PAIR_STRING("id", d->id));
+ d->link = sd_varlink_unref(d->link);
+ }
+
+ log_info("Block device '%s' attached", d->id);
+ return 0;
+}
+
+static int on_scsi_controller_complete(
+ QmpClient *client,
+ sd_json_variant *result,
+ const char *error_desc,
+ int error,
+ void *userdata) {
+
+ assert(client);
+
+ VmspawnQmpBridge *bridge = ASSERT_PTR(qmp_client_get_userdata(client));
+
+ if (error < 0) {
+ /* QEMU's device_add is transactional — on error it calls object_unparent()
+ * before replying, so the "vmspawn_scsi" id is free for the next retry. */
+ vmspawn_qmp_bridge_release_pcie_port_by_idx(bridge, bridge->scsi_controller_port_idx);
+ bridge->scsi_controller_port_idx = -1;
+ bridge->scsi_controller_created = false;
+ log_warning("virtio-scsi-pci controller setup failed: %s", strna(error_desc));
+ if (!bridge->setup_done)
+ return sd_event_exit(qmp_client_get_event(client), error);
+ }
+
+ return 0;
+}
+
+static int qmp_setup_scsi_controller(VmspawnQmpBridge *bridge, const char *pcie_port) {
+ _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+ int r;
+
+ assert(bridge);
+
+ r = sd_json_buildo(
+ &args,
+ SD_JSON_BUILD_PAIR_STRING("driver", "virtio-scsi-pci"),
+ SD_JSON_BUILD_PAIR_STRING("id", "vmspawn_scsi"),
+ SD_JSON_BUILD_PAIR_CONDITION(!!pcie_port, "bus", SD_JSON_BUILD_STRING(pcie_port)));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build SCSI controller JSON: %m");
+
+ r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(args),
+ on_scsi_controller_complete, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send SCSI controller device_add: %m");
+
+ log_debug("Queued virtio-scsi-pci controller setup");
+ return 0;
+}
+
+static int vmspawn_qmp_add_block_device(VmspawnQmpBridge *bridge, DriveInfo *drive) {
int r;
assert(bridge);
- assert(qmp);
assert(drive);
+ assert(drive->format);
+ assert(drive->disk_driver);
assert(drive->fd >= 0);
- uint64_t counter = bridge->next_block_counter++;
- _cleanup_free_ char *file_node_name = NULL;
- if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", counter) < 0 ||
- asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", counter) < 0 ||
- asprintf(&file_node_name, "vmspawn-%" PRIu64 "-file", counter) < 0)
+ _unused_ _cleanup_(drive_info_unrefp) DriveInfo *owned = drive;
+ _cleanup_(drive_info_unrefp) DriveInfo *slot_ref = NULL;
+ _cleanup_(drive_info_unregister_on_failurep) DriveInfo *registered = NULL;
+
+ drive->bridge = bridge;
+ drive->counter = bridge->next_block_counter++;
+ if (asprintf(&drive->qmp_node_name, "vmspawn-%" PRIu64 "-storage", drive->counter) < 0)
return log_oom();
+ if (asprintf(&drive->qmp_device_id, "vmspawn-%" PRIu64 "-disk", drive->counter) < 0)
+ return log_oom();
+ /* Auto-assigned user ids reuse qmp_device_id. */
+ if (!drive->id) {
+ drive->id = strdup(drive->qmp_device_id);
+ if (!drive->id)
+ return log_oom();
+ }
- _cleanup_free_ char *fdset_path = NULL;
- uint64_t fdset_id;
- r = qmp_fdset_add(qmp, TAKE_FD(drive->fd),
- on_qmp_complete, (void*) "add-fd", &fdset_path, &fdset_id);
+ /* Reserve the registry slot up-front so the device_add callback's commit can't fail. */
+ r = bridge_register_drive(bridge, drive);
if (r < 0)
- return log_error_errno(r, "Failed to send add-fd for '%s': %m", drive->path);
+ return r;
+ registered = drive;
+
+ /* First SCSI hotplug needs a virtio-scsi-pci controller to attach to. */
+ if (STR_IN_SET(drive->disk_driver, "scsi-hd", "scsi-cd") && !bridge->scsi_controller_created) {
+ _cleanup_free_ char *controller_port = NULL;
+ int controller_port_idx = -1;
+ if (ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS) {
+ r = vmspawn_qmp_bridge_allocate_pcie_port(bridge, "vmspawn_scsi",
+ &controller_port, &controller_port_idx);
+ if (r == -EBUSY)
+ return log_error_errno(r, "No PCIe hotplug ports left for SCSI controller");
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate PCIe hotplug port for SCSI controller: %m");
+ }
+
+ r = qmp_setup_scsi_controller(bridge, controller_port);
+ if (r < 0) {
+ vmspawn_qmp_bridge_release_pcie_port_by_idx(bridge, controller_port_idx);
+ return r;
+ }
- QmpFileNodeParams file_params = {
- .node_name = file_node_name,
- .filename = fdset_path,
- .driver = FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE) ? "host_device" : "file",
- .flags = drive->flags & (QMP_DRIVE_READ_ONLY|QMP_DRIVE_NO_FLUSH),
- };
- if (FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_IO_URING))
- file_params.flags |= QMP_DRIVE_IO_URING;
- r = qmp_add_file_node(qmp, &file_params);
+ /* Set before the reply so a second SCSI hotplug queued in the meantime
+ * doesn't re-create the controller; reset in on_scsi_controller_complete on error. */
+ bridge->scsi_controller_port_idx = controller_port_idx;
+ bridge->scsi_controller_created = true;
+ }
+
+ uint64_t fdset_id;
+ slot_ref = drive_info_ref(drive);
+ r = qmp_fdset_add(bridge->qmp, TAKE_FD(drive->fd),
+ on_add_observe_stage, slot_ref, &drive->fdset_path, &fdset_id);
if (r < 0)
- return log_error_errno(r, "Failed to send blockdev-add for '%s': %m", drive->path);
+ return r;
+ TAKE_PTR(slot_ref);
- /* The file node now holds a dup of the fd; release the monitor's
- * original so the fdset auto-frees when raw_close runs at teardown. */
- r = qmp_fdset_remove(qmp, fdset_id, on_qmp_complete, (void*) "remove-fd");
+ _cleanup_(sd_json_variant_unrefp) sd_json_variant *blockdev_args = NULL;
+ r = qmp_build_blockdev_add_inline(
+ drive->qmp_node_name, drive->format, drive->fdset_path,
+ FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE) ? "host_device" : "file",
+ drive->flags, bridge->features, &blockdev_args);
if (r < 0)
- return log_error_errno(r, "Failed to send remove-fd for '%s': %m", drive->path);
+ return r;
- QmpFormatNodeParams fmt_params = {
- .node_name = drive->qmp_node_name,
- .format = drive->format,
- .file_node_name = file_node_name,
- .flags = drive->flags & (QMP_DRIVE_READ_ONLY|QMP_DRIVE_DISCARD),
- };
- _cleanup_(sd_json_variant_unrefp) sd_json_variant *fmt_args = NULL;
- r = qmp_build_blockdev_add_format(&fmt_params, &fmt_args);
+ slot_ref = drive_info_ref(drive);
+ r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(blockdev_args),
+ on_add_blockdev_stage, slot_ref);
if (r < 0)
return r;
+ TAKE_PTR(slot_ref);
- r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "blockdev-add", QMP_CLIENT_ARGS(fmt_args), on_qmp_complete, (void*) "blockdev-add");
+ /* Release the monitor's original fd; the blockdev-add above took a dup. */
+ slot_ref = drive_info_ref(drive);
+ r = qmp_fdset_remove(bridge->qmp, fdset_id, on_add_observe_stage, slot_ref);
if (r < 0)
- return log_error_errno(r, "Failed to send blockdev-add format for '%s': %m", drive->path);
+ return r;
+ TAKE_PTR(slot_ref);
- /* device_add: attach to virtual hardware */
_cleanup_(sd_json_variant_unrefp) sd_json_variant *device_args = NULL;
r = qmp_build_device_add(drive, &device_args);
if (r < 0)
return r;
- r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_complete, (void*) "device_add");
+ slot_ref = drive_info_ref(drive);
+ r = qmp_client_invoke(bridge->qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(device_args),
+ on_add_device_add_complete, slot_ref);
if (r < 0)
- return log_error_errno(r, "Failed to send device_add for '%s': %m", drive->path);
+ return r;
+ TAKE_PTR(slot_ref);
- log_debug("Queued drive setup for '%s'", drive->path);
+ TAKE_PTR(registered);
return 0;
}
-/* Configure a single drive via QMP. Dispatches to ephemeral or regular setup. */
-static int qmp_setup_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
+static int qmp_setup_regular_drive(VmspawnQmpBridge *bridge, DriveInfo *drive) {
+ assert(bridge);
assert(drive);
+ assert(drive->fd >= 0);
+ assert(!drive->id);
- if (drive->overlay_fd >= 0)
- return qmp_setup_ephemeral_drive(bridge, qmp, drive);
-
- return qmp_setup_regular_drive(bridge, qmp, drive);
+ return vmspawn_qmp_add_block_device(bridge, drive);
}
int vmspawn_qmp_setup_network(VmspawnQmpBridge *bridge, NetworkInfo *network) {
return 0;
}
-static bool drives_need_scsi_controller(DriveInfos *drives) {
- assert(drives);
-
- FOREACH_ARRAY(d, drives->drives, drives->n_drives)
- if (STR_IN_SET((*d)->disk_driver, "scsi-hd", "scsi-cd"))
- return true;
-
- return false;
-}
-
-static int qmp_setup_scsi_controller(QmpClient *qmp, const char *pcie_port) {
- _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
- int r;
-
- r = sd_json_buildo(
- &args,
- SD_JSON_BUILD_PAIR_STRING("driver", "virtio-scsi-pci"),
- SD_JSON_BUILD_PAIR_STRING("id", "vmspawn_scsi"),
- SD_JSON_BUILD_PAIR_CONDITION(!!pcie_port, "bus", SD_JSON_BUILD_STRING(pcie_port)));
- if (r < 0)
- return log_error_errno(r, "Failed to build SCSI controller JSON: %m");
-
- r = qmp_client_invoke(qmp, /* ret_slot= */ NULL, "device_add", QMP_CLIENT_ARGS(args), on_qmp_complete, (void*) "device_add");
- if (r < 0)
- return log_error_errno(r, "Failed to send SCSI controller device_add: %m");
-
- log_debug("Queued virtio-scsi-pci controller setup");
- return 0;
-}
-
int vmspawn_qmp_setup_drives(VmspawnQmpBridge *bridge, DriveInfos *drives) {
int r;
QmpClient *qmp = vmspawn_qmp_bridge_get_qmp(bridge);
/* io_uring support was probed during vmspawn_qmp_init(). The cached result in
- * bridge->features is passed to each file node setup call. */
-
- if (drives_need_scsi_controller(drives)) {
- r = qmp_setup_scsi_controller(qmp, drives->scsi_pcie_port);
- if (r < 0)
- return r;
- }
+ * bridge->features is passed to each file node setup call. SCSI controller
+ * creation is handled on-demand by vmspawn_qmp_add_block_device() for the first
+ * SCSI drive, using the hotplug-spares pool. */
FOREACH_ARRAY(d, drives->drives, drives->n_drives) {
- r = qmp_setup_drive(bridge, qmp, *d);
+ if ((*d)->overlay_fd >= 0)
+ r = qmp_setup_ephemeral_drive(bridge, qmp, *d);
+ else
+ r = qmp_setup_regular_drive(bridge, TAKE_PTR(*d));
if (r < 0)
return r;
}
if (!b)
return NULL;
+ /* Unref first: pending QMP callbacks may release hotplug ports through the bridge. */
+ qmp_client_unref(b->qmp);
+
+ hashmap_free(b->block_devices_by_qmp_id);
+ hashmap_free(b->block_devices);
hashmap_free(b->pending_jobs);
- qmp_client_unref(b->qmp);
+ FOREACH_ELEMENT(owner, b->hotplug_port_owner)
+ free(*owner);
+
return mfree(b);
}
if (!bridge)
return log_oom();
+ bridge->scsi_controller_port_idx = -1;
+
r = qmp_client_connect_fd(&bridge->qmp, fd);
if (r < 0)
return log_error_errno(r, "Failed to create QMP client: %m");