]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
vmspawn: set up varlink bridge infrastructure
authorChristian Brauner <brauner@kernel.org>
Mon, 30 Mar 2026 12:08:40 +0000 (14:08 +0200)
committerChristian Brauner <brauner@kernel.org>
Wed, 15 Apr 2026 08:14:47 +0000 (10:14 +0200)
Create the QMP-to-varlink bridge layer (vmspawn-qmp.{c,h}) and the
varlink server layer (vmspawn-varlink.{c,h}).

The QMP bridge (VmspawnQmpBridge) owns the QmpClient connection and
manages pending background jobs (e.g. blockdev-create continuations).
vmspawn_qmp_init() creates the client and attaches it to the event
loop. vmspawn_qmp_probe_features() drives io_uring and qcow2
discard-no-unref probes synchronously via a qmp_client_process() +
qmp_client_wait() loop — the QMP handshake completes transparently on
the first invoke. vmspawn_qmp_start() resumes vCPUs via an async
"cont" command.

The varlink server (VmspawnVarlinkContext) exposes three interfaces:

- io.systemd.MachineInstance: generic machine control (Terminate,
  PowerOff, Reboot, Pause, Resume, Describe, SubscribeEvents).
  Method handlers forward to QMP commands asynchronously — the
  varlink reply is deferred until the QMP response arrives.
- io.systemd.VirtualMachineInstance: VM-specific (placeholder for
  future snapshot/migration methods).
- io.systemd.QemuMachineInstance: QEMU-specific (AcquireQMP stub).

The server listens on <runtime_dir>/control with mode 0600.

Event streaming follows the importd Pull pattern: SubscribeEvents
sends an initial {ready:true} notification, then fans out QMP events
to all subscribers. The disconnect handler only unrefs subscriber
links (matching resolved's vl_on_notification_disconnect pattern).

Introduce the MachineConfig aggregate in vmspawn-qmp.h grouping the
per-device info structures (DriveInfos, NetworkInfo, VirtiofsInfos,
VsockInfo) together with machine_config_done() that chains the
individual done helpers. Callers populate it field-by-field and rely
on the _cleanup_ attribute for orderly teardown regardless of which
device types the invocation ends up using.

Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
src/vmspawn/meson.build
src/vmspawn/vmspawn-qmp.c [new file with mode: 0644]
src/vmspawn/vmspawn-qmp.h [new file with mode: 0644]
src/vmspawn/vmspawn-varlink.c [new file with mode: 0644]
src/vmspawn/vmspawn-varlink.h [new file with mode: 0644]

index 99bad2d61897351a83bff993927ca99529f93391..6d08755fedf8b77a2473679df71a4a2c95e468e9 100644 (file)
@@ -7,6 +7,8 @@ endif
 vmspawn_sources = files(
         'vmspawn.c',
         'vmspawn-qemu-config.c',
+        'vmspawn-qmp.c',
+        'vmspawn-varlink.c',
         'vmspawn-settings.c',
         'vmspawn-scope.c',
         'vmspawn-mount.c',
diff --git a/src/vmspawn/vmspawn-qmp.c b/src/vmspawn/vmspawn-qmp.c
new file mode 100644 (file)
index 0000000..4171772
--- /dev/null
@@ -0,0 +1,1059 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+#include "sd-json.h"
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "ether-addr-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "json-util.h"
+#include "log.h"
+#include "qmp-client.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "vmspawn-qmp.h"
+
+DEFINE_PRIVATE_HASH_OPS_FULL(
+                pending_job_hash_ops,
+                char, string_hash_func, string_compare_func, free,
+                PendingJob, pending_job_free);
+
+void drive_info_done(DriveInfo *info) {
+        assert(info);
+        info->serial = mfree(info->serial);
+        info->node_name = mfree(info->node_name);
+        info->pcie_port = mfree(info->pcie_port);
+        info->fd = safe_close(info->fd);
+        info->overlay_fd = safe_close(info->overlay_fd);
+}
+
+void drive_infos_done(DriveInfos *infos) {
+        assert(infos);
+        FOREACH_ARRAY(d, infos->drives, infos->n_drives)
+                drive_info_done(d);
+        infos->drives = mfree(infos->drives);
+        infos->n_drives = 0;
+        infos->scsi_pcie_port = mfree(infos->scsi_pcie_port);
+}
+
+void network_info_done(NetworkInfo *info) {
+        assert(info);
+        info->ifname = mfree(info->ifname);
+        info->pcie_port = mfree(info->pcie_port);
+        info->fd = safe_close(info->fd);
+}
+
+void virtiofs_info_done(VirtiofsInfo *info) {
+        assert(info);
+        info->id = mfree(info->id);
+        info->socket_path = mfree(info->socket_path);
+        info->tag = mfree(info->tag);
+        info->pcie_port = mfree(info->pcie_port);
+}
+
+void virtiofs_infos_done(VirtiofsInfos *infos) {
+        assert(infos);
+        FOREACH_ARRAY(e, infos->entries, infos->n_entries)
+                virtiofs_info_done(e);
+        infos->entries = mfree(infos->entries);
+        infos->n_entries = 0;
+}
+
+void vsock_info_done(VsockInfo *info) {
+        assert(info);
+        info->pcie_port = mfree(info->pcie_port);
+        info->fd = safe_close(info->fd);
+}
+
+void machine_config_done(MachineConfig *c) {
+        if (!c)
+                return;
+
+        drive_infos_done(&c->drives);
+        network_info_done(&c->network);
+        virtiofs_infos_done(&c->virtiofs);
+        vsock_info_done(&c->vsock);
+}
+
+/* Generic async QMP setup-completion callback. The userdata argument carries the
+ * command name (as a string literal) for logging. On failure, request a clean
+ * event loop exit so vmspawn shuts down instead of running a VM with missing devices. */
+static int on_qmp_setup_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        const char *label = ASSERT_PTR(userdata);
+
+        assert(client);
+
+        if (error < 0) {
+                log_error_errno(error, "%s failed: %s", label, strna(error_desc));
+                return sd_event_exit(qmp_client_get_event(client), error);
+        }
+
+        return 0;
+}
+
+/* Send add-fd via SCM_RIGHTS; return /dev/fdset/N. Allocations run before invoke so a late
+ * OOM cannot orphan an fdset on QEMU's side; *ret_path is only written on full success. */
+static int qmp_fdset_add(QmpClient *qmp, int fd_consume, char **ret_path) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+        _cleanup_close_ int fd = fd_consume;
+        _cleanup_free_ char *path = NULL;
+        unsigned id;
+        int r;
+
+        assert(qmp);
+        assert(fd_consume >= 0);
+        assert(ret_path);
+
+        id = qmp_client_next_fdset_id(qmp);
+
+        r = sd_json_buildo(&args, SD_JSON_BUILD_PAIR_UNSIGNED("fdset-id", id));
+        if (r < 0)
+                return r;
+
+        if (asprintf(&path, "/dev/fdset/%u", id) < 0)
+                return -ENOMEM;
+
+        r = qmp_client_invoke(qmp, "add-fd", QMP_CLIENT_ARGS_FD(args, TAKE_FD(fd)),
+                              on_qmp_setup_complete, (void*) "add-fd");
+        if (r < 0)
+                return r;
+
+        *ret_path = TAKE_PTR(path);
+        return 0;
+}
+
+typedef struct QmpFileNodeParams {
+        const char *node_name;
+        const char *filename;
+        const char *driver;     /* "file" or "host_device" */
+        QmpDriveFlags flags;
+} QmpFileNodeParams;
+
+/* Build blockdev-add JSON for the protocol-level (file) node */
+static int qmp_build_blockdev_add_file(const QmpFileNodeParams *p, sd_json_variant **ret) {
+        assert(p);
+        assert(p->node_name);
+        assert(p->filename);
+        assert(p->driver);
+        assert(ret);
+
+        /* cache.direct=false uses the page cache (QEMU default). cache.no-flush suppresses host
+         * flush on guest fsync — only safe for ephemeral/extra drives where data loss is acceptable. */
+        return sd_json_buildo(
+                        ret,
+                        SD_JSON_BUILD_PAIR_STRING("node-name", p->node_name),
+                        SD_JSON_BUILD_PAIR_STRING("driver", p->driver),
+                        SD_JSON_BUILD_PAIR_STRING("filename", p->filename),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(p->flags, QMP_DRIVE_READ_ONLY), "read-only", SD_JSON_BUILD_BOOLEAN(true)),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(p->flags, QMP_DRIVE_IO_URING), "aio", JSON_BUILD_CONST_STRING("io_uring")),
+                        SD_JSON_BUILD_PAIR("cache", SD_JSON_BUILD_OBJECT(
+                                        SD_JSON_BUILD_PAIR_BOOLEAN("direct", false),
+                                        SD_JSON_BUILD_PAIR_BOOLEAN("no-flush", FLAGS_SET(p->flags, QMP_DRIVE_NO_FLUSH)))));
+}
+
+typedef struct QmpFormatNodeParams {
+        const char *node_name;
+        const char *format;          /* "raw", "qcow2", etc. */
+        const char *file_node_name;  /* reference to the underlying file node */
+        const char *backing;         /* reference to a backing format node (NULL if none) */
+        QmpDriveFlags flags;
+} QmpFormatNodeParams;
+
+/* Build blockdev-add JSON for the format-level node */
+static int qmp_build_blockdev_add_format(const QmpFormatNodeParams *p, sd_json_variant **ret) {
+        assert(p);
+        assert(p->node_name);
+        assert(p->format);
+        assert(p->file_node_name);
+        assert(ret);
+
+        /* When "file" is a string (not an object), QEMU interprets it as a reference to an
+         * existing node-name. The "backing" field likewise references a format-level node. */
+        return sd_json_buildo(
+                        ret,
+                        SD_JSON_BUILD_PAIR_STRING("node-name", p->node_name),
+                        SD_JSON_BUILD_PAIR_STRING("driver", p->format),
+                        SD_JSON_BUILD_PAIR_STRING("file", p->file_node_name),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(p->flags, QMP_DRIVE_READ_ONLY), "read-only", SD_JSON_BUILD_BOOLEAN(true)),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(p->flags, QMP_DRIVE_DISCARD), "discard", JSON_BUILD_CONST_STRING("unmap")),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(p->flags, QMP_DRIVE_DISCARD_NO_UNREF), "discard-no-unref", SD_JSON_BUILD_BOOLEAN(true)),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!p->backing, "backing", SD_JSON_BUILD_STRING(p->backing)));
+}
+
+/* Build device_add JSON arguments for a drive */
+static int qmp_build_device_add(const DriveInfo *drive, sd_json_variant **ret) {
+        assert(drive);
+        assert(ret);
+
+        return sd_json_buildo(
+                        ret,
+                        SD_JSON_BUILD_PAIR_STRING("driver", drive->disk_driver),
+                        SD_JSON_BUILD_PAIR_STRING("drive", drive->node_name),
+                        SD_JSON_BUILD_PAIR_STRING("id", drive->node_name),
+                        SD_JSON_BUILD_PAIR_CONDITION(FLAGS_SET(drive->flags, QMP_DRIVE_BOOT), "bootindex", SD_JSON_BUILD_INTEGER(1)),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!drive->serial, "serial", SD_JSON_BUILD_STRING(drive->serial)),
+                        SD_JSON_BUILD_PAIR_CONDITION(STR_IN_SET(drive->disk_driver, "scsi-hd", "scsi-cd"),
+                                                    "bus", JSON_BUILD_CONST_STRING("vmspawn_scsi.0")),
+                        SD_JSON_BUILD_PAIR_CONDITION(
+                                        !STR_IN_SET(drive->disk_driver, "scsi-hd", "scsi-cd") && !!drive->pcie_port,
+                                        "bus", SD_JSON_BUILD_STRING(drive->pcie_port)));
+}
+
+/* Issue blockdev-add for a file node. */
+static int qmp_add_file_node(QmpClient *qmp, const QmpFileNodeParams *p) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+        int r;
+
+        r = qmp_build_blockdev_add_file(p, &args);
+        if (r < 0)
+                return r;
+
+        return qmp_client_invoke(qmp, "blockdev-add", QMP_CLIENT_ARGS(args), on_qmp_setup_complete, (void*) "blockdev-add");
+}
+
+/* Get the virtual size of an image from the fd directly. For raw images the virtual size
+ * equals the file/device size. For qcow2 the virtual size is a big-endian uint64 at header
+ * offset 24 (the "size" field in the qcow2 header). */
+static int get_image_virtual_size(int fd, const char *format, bool is_block_device, uint64_t *ret) {
+        int r;
+
+        assert(fd >= 0);
+        assert(format);
+        assert(ret);
+
+        if (streq(format, "raw")) {
+                if (is_block_device)
+                        return blockdev_get_device_size(fd, ret);
+
+                struct stat st;
+                if (fstat(fd, &st) < 0)
+                        return log_error_errno(errno, "Failed to stat image: %m");
+
+                r = stat_verify_regular(&st);
+                if (r < 0)
+                        return log_error_errno(r, "Raw device is neither a regular file nor a block device");
+
+                *ret = st.st_size;
+                return 0;
+        }
+
+        if (streq(format, "qcow2")) {
+                uint32_t magic = 0;
+                ssize_t n = pread(fd, &magic, sizeof(magic), 0);
+                if (n < 0)
+                        return log_error_errno(errno, "Failed to read qcow2 magic: %m");
+                if (n != sizeof(magic) || be32toh(magic) != UINT32_C(0x514649fb))
+                        return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Not a valid qcow2 image (bad magic)");
+
+                uint64_t size_be = 0;
+                n = pread(fd, &size_be, sizeof(size_be), 24);
+                if (n < 0)
+                        return log_error_errno(errno, "Failed to read qcow2 header: %m");
+                if (n != sizeof(size_be))
+                        return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read on qcow2 header");
+
+                *ret = be64toh(size_be);
+                return 0;
+        }
+
+        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Unsupported image format '%s'", format);
+}
+
+/* Ephemeral drive continuation — fired when the blockdev-create job concludes.
+ * Completes the drive setup by adding the overlay format node and the device. */
+typedef struct EphemeralDriveCtx {
+        char *node_name;           /* overlay format node name (= drive node name) */
+        char *overlay_file_node;
+        char *base_fmt_node;
+        /* Fields for device_add */
+        char *disk_driver;
+        char *serial;              /* NULL if unset */
+        char *pcie_port;           /* pcie-root-port bus for device_add (NULL on non-PCIe) */
+        QmpDriveFlags flags;       /* subset: QMP_DRIVE_DISCARD, QMP_DRIVE_DISCARD_NO_UNREF, QMP_DRIVE_BOOT */
+} EphemeralDriveCtx;
+
+static EphemeralDriveCtx* ephemeral_drive_ctx_free(EphemeralDriveCtx *ctx) {
+        if (!ctx)
+                return NULL;
+        free(ctx->node_name);
+        free(ctx->overlay_file_node);
+        free(ctx->base_fmt_node);
+        free(ctx->disk_driver);
+        free(ctx->serial);
+        free(ctx->pcie_port);
+        return mfree(ctx);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(EphemeralDriveCtx *, ephemeral_drive_ctx_free);
+
+static void ephemeral_drive_ctx_free_void(void *p) {
+        ephemeral_drive_ctx_free(p);
+}
+
+static int on_ephemeral_create_concluded(QmpClient *qmp, void *userdata) {
+        _cleanup_(ephemeral_drive_ctx_freep) EphemeralDriveCtx *ctx = ASSERT_PTR(userdata);
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *fmt_args = NULL, *device_args = NULL;
+        int r;
+
+        /* Open formatted overlay as qcow2 with backing reference */
+        QmpFormatNodeParams overlay_fmt_params = {
+                .node_name      = ctx->node_name,
+                .format         = "qcow2",
+                .file_node_name = ctx->overlay_file_node,
+                .backing        = ctx->base_fmt_node,
+                .flags          = ctx->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_DISCARD_NO_UNREF),
+        };
+        r = qmp_build_blockdev_add_format(&overlay_fmt_params, &fmt_args);
+        if (r < 0)
+                return log_error_errno(r, "Failed to build overlay format JSON for '%s': %m", ctx->node_name);
+
+        r = qmp_client_invoke(qmp, "blockdev-add", QMP_CLIENT_ARGS(fmt_args), on_qmp_setup_complete, (void*) "blockdev-add");
+        if (r < 0)
+                return r;
+
+        /* device_add: attach to virtual hardware. Build a temporary DriveInfo as a
+         * read-only view into the continuation context to reuse qmp_build_device_add(). */
+        const DriveInfo tmp = {
+                .disk_driver = ctx->disk_driver,
+                .node_name   = ctx->node_name,
+                .serial      = ctx->serial,
+                .pcie_port   = ctx->pcie_port,
+                .flags       = ctx->flags & QMP_DRIVE_BOOT,
+                .fd          = -EBADF,
+                .overlay_fd  = -EBADF,
+        };
+        r = qmp_build_device_add(&tmp, &device_args);
+        if (r < 0)
+                return log_error_errno(r, "Failed to build device_add JSON for '%s': %m", ctx->node_name);
+
+        r = qmp_client_invoke(qmp, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_setup_complete, (void*) "device_add");
+        if (r < 0)
+                return r;
+
+        log_debug("Queued ephemeral drive completion for '%s'", ctx->node_name);
+        return 0;
+}
+
+/* Set up an ephemeral drive: base image (read-only) + anonymous qcow2 overlay (read-write).
+ * The final steps (overlay format + device_add) are deferred to a job continuation that
+ * fires when the blockdev-create job concludes. */
+static int qmp_setup_ephemeral_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
+        int r;
+
+        assert(bridge);
+        assert(qmp);
+        assert(drive);
+        assert(drive->fd >= 0);
+        assert(drive->overlay_fd >= 0);
+
+        /* Node names: <name>-base-file, <name>-base-fmt, <name>-overlay-file, <name> */
+        _cleanup_free_ char *base_file_node = strjoin(drive->node_name, "-base-file");
+        _cleanup_free_ char *base_fmt_node = strjoin(drive->node_name, "-base-fmt");
+        _cleanup_free_ char *overlay_file_node = strjoin(drive->node_name, "-overlay-file");
+        if (!base_file_node || !base_fmt_node || !overlay_file_node)
+                return log_oom();
+
+        /* Read virtual size before passing the fd to QEMU (TAKE_FD consumes it) */
+        uint64_t virtual_size;
+        r = get_image_virtual_size(drive->fd, drive->format, FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE), &virtual_size);
+        if (r < 0)
+                return r;
+
+        /* Step 1-2: Pass both fds to QEMU */
+        _cleanup_free_ char *base_path = NULL;
+        r = qmp_fdset_add(qmp, TAKE_FD(drive->fd), &base_path);
+        if (r < 0)
+                return log_error_errno(r, "Failed to send add-fd for base image '%s': %m", drive->path);
+
+        _cleanup_free_ char *overlay_path = NULL;
+        r = qmp_fdset_add(qmp, TAKE_FD(drive->overlay_fd), &overlay_path);
+        if (r < 0)
+                return log_error_errno(r, "Failed to send add-fd for overlay of '%s': %m", drive->path);
+
+        /* Step 3: Base image file node (read-only) */
+        QmpFileNodeParams base_file_params = {
+                .node_name = base_file_node,
+                .filename  = base_path,
+                .driver    = FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE) ? "host_device" : "file",
+                .flags     = QMP_DRIVE_READ_ONLY | (drive->flags & QMP_DRIVE_NO_FLUSH),
+        };
+        if (FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_IO_URING))
+                base_file_params.flags |= QMP_DRIVE_IO_URING;
+        r = qmp_add_file_node(qmp, &base_file_params);
+        if (r < 0)
+                return log_error_errno(r, "Failed to send blockdev-add for base file '%s': %m", drive->path);
+
+        /* Step 4: Base image format node (read-only) */
+        QmpFormatNodeParams base_fmt_params = {
+                .node_name      = base_fmt_node,
+                .format         = drive->format,
+                .file_node_name = base_file_node,
+                .flags          = QMP_DRIVE_READ_ONLY,
+        };
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *base_fmt_args = NULL;
+        r = qmp_build_blockdev_add_format(&base_fmt_params, &base_fmt_args);
+        if (r < 0)
+                return r;
+
+        r = qmp_client_invoke(qmp, "blockdev-add", QMP_CLIENT_ARGS(base_fmt_args), on_qmp_setup_complete, (void*) "blockdev-add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send blockdev-add for base format '%s': %m", drive->path);
+
+        /* Step 5: Overlay file node (read-write, no io_uring for anon overlay) */
+        QmpFileNodeParams overlay_file_params = {
+                .node_name = overlay_file_node,
+                .filename  = overlay_path,
+                .driver    = "file",
+                .flags     = QMP_DRIVE_NO_FLUSH,
+        };
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *overlay_file_args = NULL;
+        r = qmp_build_blockdev_add_file(&overlay_file_params, &overlay_file_args);
+        if (r < 0)
+                return r;
+
+        r = qmp_client_invoke(qmp, "blockdev-add", QMP_CLIENT_ARGS(overlay_file_args), on_qmp_setup_complete, (void*) "blockdev-add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send blockdev-add for overlay file '%s': %m", drive->path);
+
+        /* Step 6: Fire blockdev-create to format the overlay */
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *create_options = NULL;
+        r = sd_json_buildo(&create_options,
+                        SD_JSON_BUILD_PAIR_STRING("driver", "qcow2"),
+                        SD_JSON_BUILD_PAIR_STRING("file", overlay_file_node),
+                        SD_JSON_BUILD_PAIR_UNSIGNED("size", virtual_size),
+                        SD_JSON_BUILD_PAIR_STRING("backing-file", base_fmt_node),
+                        SD_JSON_BUILD_PAIR_STRING("backing-fmt", drive->format));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build blockdev-create options: %m");
+
+        _cleanup_free_ char *job_id = strjoin("create-", drive->node_name);
+        if (!job_id)
+                return log_oom();
+
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *cmd_args = NULL;
+        r = sd_json_buildo(&cmd_args,
+                        SD_JSON_BUILD_PAIR_STRING("job-id", job_id),
+                        SD_JSON_BUILD_PAIR_VARIANT("options", create_options));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build blockdev-create JSON: %m");
+
+        /* Register continuation: when the job concludes, fire overlay format + device_add */
+        _cleanup_(ephemeral_drive_ctx_freep) EphemeralDriveCtx *ectx = new(EphemeralDriveCtx, 1);
+        if (!ectx)
+                return log_oom();
+
+        QmpDriveFlags ectx_flags = drive->flags & (QMP_DRIVE_DISCARD|QMP_DRIVE_BOOT);
+        if (FLAGS_SET(drive->flags, QMP_DRIVE_DISCARD) &&
+            FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_DISCARD_NO_UNREF))
+                ectx_flags |= QMP_DRIVE_DISCARD_NO_UNREF;
+
+        *ectx = (EphemeralDriveCtx) {
+                .node_name          = strdup(drive->node_name),
+                .overlay_file_node  = strdup(overlay_file_node),
+                .base_fmt_node      = strdup(base_fmt_node),
+                .disk_driver        = strdup(drive->disk_driver),
+                .serial             = drive->serial ? strdup(drive->serial) : NULL,
+                .pcie_port          = drive->pcie_port ? strdup(drive->pcie_port) : NULL,
+                .flags              = ectx_flags,
+        };
+        if (!ectx->node_name || !ectx->overlay_file_node || !ectx->base_fmt_node ||
+            !ectx->disk_driver || (drive->serial && !ectx->serial) ||
+            (drive->pcie_port && !ectx->pcie_port))
+                return log_oom();
+
+        r = vmspawn_qmp_bridge_register_job(bridge, job_id,
+                                            on_ephemeral_create_concluded, ectx,
+                                            ephemeral_drive_ctx_free_void);
+        if (r < 0)
+                return log_error_errno(r, "Failed to register job continuation: %m");
+
+        TAKE_PTR(ectx);
+
+        r = qmp_client_invoke(qmp, "blockdev-create", QMP_CLIENT_ARGS(cmd_args), on_qmp_setup_complete, (void*) "blockdev-create");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send blockdev-create for '%s': %m", drive->path);
+
+        log_debug("Queued ephemeral drive setup for '%s' (job %s)", drive->path, job_id);
+        return 0;
+}
+
+/* Set up a regular (non-ephemeral) drive: single file node + format node + device_add. */
+static int qmp_setup_regular_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
+        int r;
+
+        assert(bridge);
+        assert(qmp);
+        assert(drive);
+        assert(drive->fd >= 0);
+
+        /* Node names: <name>-file, <name> */
+        _cleanup_free_ char *file_node_name = strjoin(drive->node_name, "-file");
+        if (!file_node_name)
+                return log_oom();
+
+        _cleanup_free_ char *fdset_path = NULL;
+        r = qmp_fdset_add(qmp, TAKE_FD(drive->fd), &fdset_path);
+        if (r < 0)
+                return log_error_errno(r, "Failed to send add-fd for '%s': %m", drive->path);
+
+        QmpFileNodeParams file_params = {
+                .node_name = file_node_name,
+                .filename  = fdset_path,
+                .driver    = FLAGS_SET(drive->flags, QMP_DRIVE_BLOCK_DEVICE) ? "host_device" : "file",
+                .flags     = drive->flags & (QMP_DRIVE_READ_ONLY|QMP_DRIVE_NO_FLUSH),
+        };
+        if (FLAGS_SET(bridge->features, VMSPAWN_QMP_FEATURE_IO_URING))
+                file_params.flags |= QMP_DRIVE_IO_URING;
+        r = qmp_add_file_node(qmp, &file_params);
+        if (r < 0)
+                return log_error_errno(r, "Failed to send blockdev-add for '%s': %m", drive->path);
+
+        QmpFormatNodeParams fmt_params = {
+                .node_name      = drive->node_name,
+                .format         = drive->format,
+                .file_node_name = file_node_name,
+                .flags          = drive->flags & (QMP_DRIVE_READ_ONLY|QMP_DRIVE_DISCARD),
+        };
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *fmt_args = NULL;
+        r = qmp_build_blockdev_add_format(&fmt_params, &fmt_args);
+        if (r < 0)
+                return r;
+
+        r = qmp_client_invoke(qmp, "blockdev-add", QMP_CLIENT_ARGS(fmt_args), on_qmp_setup_complete, (void*) "blockdev-add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send blockdev-add format for '%s': %m", drive->path);
+
+        /* device_add: attach to virtual hardware */
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *device_args = NULL;
+        r = qmp_build_device_add(drive, &device_args);
+        if (r < 0)
+                return r;
+
+        r = qmp_client_invoke(qmp, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_setup_complete, (void*) "device_add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send device_add for '%s': %m", drive->path);
+
+        log_debug("Queued drive setup for '%s'", drive->path);
+        return 0;
+}
+
+/* Configure a single drive via QMP. Dispatches to ephemeral or regular setup. */
+static int qmp_setup_drive(VmspawnQmpBridge *bridge, QmpClient *qmp, DriveInfo *drive) {
+        assert(drive);
+
+        if (drive->overlay_fd >= 0)
+                return qmp_setup_ephemeral_drive(bridge, qmp, drive);
+
+        return qmp_setup_regular_drive(bridge, qmp, drive);
+}
+
+int vmspawn_qmp_setup_network(VmspawnQmpBridge *bridge, NetworkInfo *network) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *netdev_args = NULL, *device_args = NULL;
+        bool tap_by_fd;
+        int r;
+
+        assert(bridge);
+
+        QmpClient *qmp = vmspawn_qmp_bridge_get_qmp(bridge);
+        assert(network);
+        assert(network->type);
+
+        tap_by_fd = streq(network->type, "tap") && network->fd >= 0;
+
+        /* For TAP-by-fd: pass the TAP fd to QEMU via getfd + SCM_RIGHTS, then reference it by name
+         * in netdev_add. QEMU stores the received fd under the given fdname and closes it on removal. */
+        if (tap_by_fd) {
+                _cleanup_(sd_json_variant_unrefp) sd_json_variant *getfd_args = NULL;
+
+                r = sd_json_buildo(
+                                &getfd_args,
+                                SD_JSON_BUILD_PAIR_STRING("fdname", "vmspawn_tap"));
+                if (r < 0)
+                        return log_error_errno(r, "Failed to build getfd JSON: %m");
+
+                r = qmp_client_invoke(qmp, "getfd", QMP_CLIENT_ARGS_FD(getfd_args, TAKE_FD(network->fd)),
+                                      on_qmp_setup_complete, (void*) "getfd");
+                if (r < 0)
+                        return log_error_errno(r, "Failed to send getfd for TAP fd: %m");
+        }
+
+        /* netdev_add: create the network backend */
+        r = sd_json_buildo(
+                        &netdev_args,
+                        SD_JSON_BUILD_PAIR_STRING("type", network->type),
+                        SD_JSON_BUILD_PAIR_STRING("id", "net0"),
+                        SD_JSON_BUILD_PAIR_CONDITION(tap_by_fd,
+                                                     "fd", JSON_BUILD_CONST_STRING("vmspawn_tap")),
+                        SD_JSON_BUILD_PAIR_CONDITION(!tap_by_fd && !!network->ifname,
+                                                     "ifname", SD_JSON_BUILD_STRING(network->ifname)),
+                        SD_JSON_BUILD_PAIR_CONDITION(!tap_by_fd && streq(network->type, "tap"),
+                                                     "script", JSON_BUILD_CONST_STRING("no")),
+                        SD_JSON_BUILD_PAIR_CONDITION(!tap_by_fd && streq(network->type, "tap"),
+                                                     "downscript", JSON_BUILD_CONST_STRING("no")));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build netdev_add JSON: %m");
+
+        r = qmp_client_invoke(qmp, "netdev_add", QMP_CLIENT_ARGS(netdev_args), on_qmp_setup_complete, (void*) "netdev_add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send netdev_add: %m");
+
+        /* device_add: attach NIC frontend */
+        r = sd_json_buildo(
+                        &device_args,
+                        SD_JSON_BUILD_PAIR_STRING("driver", "virtio-net-pci"),
+                        SD_JSON_BUILD_PAIR_STRING("netdev", "net0"),
+                        SD_JSON_BUILD_PAIR_STRING("id", "nic0"),
+                        SD_JSON_BUILD_PAIR_CONDITION(network->mac_set,
+                                                     "mac", SD_JSON_BUILD_STRING(network->mac_set ? ETHER_ADDR_TO_STR(&network->mac) : NULL)),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!network->pcie_port,
+                                                     "bus", SD_JSON_BUILD_STRING(network->pcie_port)));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build NIC device_add JSON: %m");
+
+        r = qmp_client_invoke(qmp, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_setup_complete, (void*) "device_add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send NIC device_add: %m");
+
+        log_debug("Queued %s network setup%s", network->type, tap_by_fd ? " (fd via getfd)" : "");
+        return 0;
+}
+
+static int vmspawn_qmp_setup_one_virtiofs(QmpClient *qmp, const VirtiofsInfo *vfs) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *chardev_args = NULL, *device_args = NULL;
+        int r;
+
+        assert(qmp);
+        assert(vfs);
+        assert(vfs->id);
+        assert(vfs->socket_path);
+        assert(vfs->tag);
+
+        /* chardev-add: connect to virtiofsd socket.
+         * ChardevBackend and SocketAddressLegacy are QAPI legacy unions with explicit "data"
+         * wrapper objects at each level — the nesting is mandatory on the wire. */
+        r = sd_json_buildo(
+                        &chardev_args,
+                        SD_JSON_BUILD_PAIR_STRING("id", vfs->id),
+                        SD_JSON_BUILD_PAIR("backend", SD_JSON_BUILD_OBJECT(
+                                        SD_JSON_BUILD_PAIR_STRING("type", "socket"),
+                                        SD_JSON_BUILD_PAIR("data", SD_JSON_BUILD_OBJECT(
+                                                        SD_JSON_BUILD_PAIR("addr", SD_JSON_BUILD_OBJECT(
+                                                                        SD_JSON_BUILD_PAIR_STRING("type", "unix"),
+                                                                        SD_JSON_BUILD_PAIR("data", SD_JSON_BUILD_OBJECT(
+                                                                                        SD_JSON_BUILD_PAIR_STRING("path", vfs->socket_path))))),
+                                                        SD_JSON_BUILD_PAIR_BOOLEAN("server", false))))));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build chardev-add JSON for '%s': %m", vfs->id);
+
+        r = qmp_client_invoke(qmp, "chardev-add", QMP_CLIENT_ARGS(chardev_args), on_qmp_setup_complete, (void*) "chardev-add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send chardev-add '%s': %m", vfs->id);
+
+        /* device_add: create vhost-user-fs-pci device */
+        r = sd_json_buildo(
+                        &device_args,
+                        SD_JSON_BUILD_PAIR_STRING("driver", "vhost-user-fs-pci"),
+                        SD_JSON_BUILD_PAIR_STRING("id", vfs->id),
+                        SD_JSON_BUILD_PAIR_STRING("chardev", vfs->id),
+                        SD_JSON_BUILD_PAIR_STRING("tag", vfs->tag),
+                        SD_JSON_BUILD_PAIR_UNSIGNED("queue-size", 1024),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!vfs->pcie_port,
+                                                     "bus", SD_JSON_BUILD_STRING(vfs->pcie_port)));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build virtiofs device_add JSON for '%s': %m", vfs->id);
+
+        r = qmp_client_invoke(qmp, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_setup_complete, (void*) "device_add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send virtiofs device_add '%s': %m", vfs->id);
+
+        log_debug("Queued virtiofs device '%s' (tag=%s)", vfs->id, vfs->tag);
+        return 0;
+}
+
+int vmspawn_qmp_setup_virtiofs(VmspawnQmpBridge *bridge, const VirtiofsInfos *virtiofs) {
+        int r;
+
+        assert(bridge);
+
+        QmpClient *qmp = vmspawn_qmp_bridge_get_qmp(bridge);
+        assert(virtiofs);
+
+        FOREACH_ARRAY(e, virtiofs->entries, virtiofs->n_entries) {
+                r = vmspawn_qmp_setup_one_virtiofs(qmp, e);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+int vmspawn_qmp_setup_vsock(VmspawnQmpBridge *bridge, VsockInfo *vsock) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *getfd_args = NULL, *device_args = NULL;
+        int r;
+
+        assert(bridge);
+        assert(vsock);
+
+        if (vsock->fd < 0)
+                return 0;
+
+        QmpClient *qmp = vmspawn_qmp_bridge_get_qmp(bridge);
+
+        /* getfd: pass the vhost-vsock fd to QEMU via SCM_RIGHTS */
+        r = sd_json_buildo(
+                        &getfd_args,
+                        SD_JSON_BUILD_PAIR_STRING("fdname", "vmspawn_vsock"));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build getfd JSON for VSOCK: %m");
+
+        r = qmp_client_invoke(qmp, "getfd", QMP_CLIENT_ARGS_FD(getfd_args, TAKE_FD(vsock->fd)),
+                              on_qmp_setup_complete, (void*) "getfd");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send getfd for VSOCK fd: %m");
+
+        /* device_add: create vhost-vsock-pci device referencing the named fd */
+        r = sd_json_buildo(
+                        &device_args,
+                        SD_JSON_BUILD_PAIR_STRING("driver", "vhost-vsock-pci"),
+                        SD_JSON_BUILD_PAIR_STRING("id", "vsock0"),
+                        SD_JSON_BUILD_PAIR_UNSIGNED("guest-cid", vsock->cid),
+                        SD_JSON_BUILD_PAIR_STRING("vhostfd", "vmspawn_vsock"),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!vsock->pcie_port,
+                                                     "bus", SD_JSON_BUILD_STRING(vsock->pcie_port)));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build VSOCK device_add JSON: %m");
+
+        r = qmp_client_invoke(qmp, "device_add", QMP_CLIENT_ARGS(device_args), on_qmp_setup_complete, (void*) "device_add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send VSOCK device_add: %m");
+
+        log_debug("Queued vhost-vsock-pci device setup (cid=%u)", vsock->cid);
+        return 0;
+}
+
+static bool drives_need_scsi_controller(DriveInfos *drives) {
+        assert(drives);
+
+        FOREACH_ARRAY(d, drives->drives, drives->n_drives)
+                if (STR_IN_SET(d->disk_driver, "scsi-hd", "scsi-cd"))
+                        return true;
+
+        return false;
+}
+
+static int qmp_setup_scsi_controller(QmpClient *qmp, const char *pcie_port) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+        int r;
+
+        r = sd_json_buildo(
+                        &args,
+                        SD_JSON_BUILD_PAIR_STRING("driver", "virtio-scsi-pci"),
+                        SD_JSON_BUILD_PAIR_STRING("id", "vmspawn_scsi"),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!pcie_port, "bus", SD_JSON_BUILD_STRING(pcie_port)));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build SCSI controller JSON: %m");
+
+        r = qmp_client_invoke(qmp, "device_add", QMP_CLIENT_ARGS(args), on_qmp_setup_complete, (void*) "device_add");
+        if (r < 0)
+                return log_error_errno(r, "Failed to send SCSI controller device_add: %m");
+
+        log_debug("Queued virtio-scsi-pci controller setup");
+        return 0;
+}
+
+int vmspawn_qmp_setup_drives(VmspawnQmpBridge *bridge, DriveInfos *drives) {
+        int r;
+
+        assert(bridge);
+        assert(drives);
+
+        QmpClient *qmp = vmspawn_qmp_bridge_get_qmp(bridge);
+
+        /* io_uring support was probed during vmspawn_qmp_init(). The cached result in
+         * bridge->features is passed to each file node setup call. */
+
+        if (drives_need_scsi_controller(drives)) {
+                r = qmp_setup_scsi_controller(qmp, drives->scsi_pcie_port);
+                if (r < 0)
+                        return r;
+        }
+
+        FOREACH_ARRAY(d, drives->drives, drives->n_drives) {
+                r = qmp_setup_drive(bridge, qmp, d);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+PendingJob* pending_job_free(PendingJob *j) {
+        if (!j)
+                return NULL;
+        if (j->free_userdata)
+                j->free_userdata(j->userdata);
+        return mfree(j);
+}
+
+VmspawnQmpBridge* vmspawn_qmp_bridge_free(VmspawnQmpBridge *b) {
+        if (!b)
+                return NULL;
+
+        hashmap_free(b->pending_jobs);
+
+        qmp_client_unref(b->qmp);
+        return mfree(b);
+}
+
+int vmspawn_qmp_bridge_register_job(
+                VmspawnQmpBridge *b,
+                const char *job_id,
+                pending_job_callback_t on_concluded,
+                void *userdata,
+                pending_job_free_t free_userdata) {
+
+        _cleanup_free_ PendingJob *job = NULL;
+        _cleanup_free_ char *id = NULL;
+        int r;
+
+        assert(b);
+        assert(job_id);
+
+        id = strdup(job_id);
+        if (!id)
+                return -ENOMEM;
+
+        job = new(PendingJob, 1);
+        if (!job)
+                return -ENOMEM;
+
+        *job = (PendingJob) {
+                .on_concluded  = on_concluded,
+                .free_userdata = free_userdata,
+                .userdata      = userdata,
+        };
+
+        r = hashmap_ensure_put(&b->pending_jobs, &pending_job_hash_ops, id, job);
+        if (r < 0)
+                return r;
+
+        TAKE_PTR(id);
+        TAKE_PTR(job);
+        return 0;
+}
+
+QmpClient* vmspawn_qmp_bridge_get_qmp(VmspawnQmpBridge *b) {
+        assert(b);
+        return b->qmp;
+}
+
+/* Probe-reply convention: ignore -EIO (QMP rejection = "feature absent", log at debug
+ * and leave the feature flag clear) and transport errors (caught by the post-loop
+ * qmp_client_is_disconnected() check in vmspawn_qmp_probe_features()). Cleanup calls
+ * are best-effort — failing to delete a private probe node leaves a harmless /dev/null
+ * blockdev in QEMU until it exits. */
+
+static int on_io_uring_probe_del_reply(
+                QmpClient *c,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        assert(c);
+
+        if (error_desc)
+                log_debug("Failed to remove io_uring probe node: %s", error_desc);
+        return 0;
+}
+
+static int on_io_uring_probe_add_reply(
+                QmpClient *c,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        VmspawnQmpBridge *bridge = ASSERT_PTR(userdata);
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *del_args = NULL;
+        int r;
+
+        assert(c);
+
+        if (error < 0 && !error_desc)
+                return log_debug_errno(error, "io_uring probe did not execute: %m");
+        if (error_desc) {
+                log_debug("QEMU does not support aio=io_uring: %s", error_desc);
+                return 0;
+        }
+
+        bridge->features |= VMSPAWN_QMP_FEATURE_IO_URING;
+        log_debug("QEMU supports aio=io_uring");
+
+        /* Best-effort cleanup; the chained reply keeps the pump busy via the slots set. */
+        r = sd_json_buildo(&del_args,
+                        SD_JSON_BUILD_PAIR_STRING("node-name", "__io_uring_probe"));
+        if (r < 0)
+                return r;
+
+        return qmp_client_invoke(c, "blockdev-del", QMP_CLIENT_ARGS(del_args),
+                        on_io_uring_probe_del_reply, bridge);
+}
+
+static int probe_io_uring(QmpClient *c, VmspawnQmpBridge *bridge) {
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL;
+        int r;
+
+        assert(c);
+        assert(bridge);
+
+        r = sd_json_buildo(
+                        &args,
+                        SD_JSON_BUILD_PAIR_STRING("node-name", "__io_uring_probe"),
+                        SD_JSON_BUILD_PAIR_STRING("driver", "file"),
+                        SD_JSON_BUILD_PAIR_STRING("filename", "/dev/null"),
+                        SD_JSON_BUILD_PAIR_BOOLEAN("read-only", true),
+                        SD_JSON_BUILD_PAIR_STRING("aio", "io_uring"));
+        if (r < 0)
+                return r;
+
+        return qmp_client_invoke(c, "blockdev-add", QMP_CLIENT_ARGS(args),
+                        on_io_uring_probe_add_reply, bridge);
+}
+
+static int on_probe_schema_reply(
+                QmpClient *c,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        VmspawnQmpBridge *bridge = ASSERT_PTR(userdata);
+
+        assert(c);
+
+        if (error < 0 && !error_desc)
+                return log_debug_errno(error, "query-qmp-schema probe did not execute: %m");
+        if (error_desc) {
+                log_debug("query-qmp-schema rejected: %s", error_desc);
+                return 0;
+        }
+
+        if (qmp_schema_has_member(result, "discard-no-unref")) {
+                bridge->features |= VMSPAWN_QMP_FEATURE_DISCARD_NO_UNREF;
+                log_debug("QEMU supports qcow2 discard-no-unref");
+        } else
+                log_debug("QEMU does not support qcow2 discard-no-unref");
+
+        return 0;
+}
+
+static int probe_schema(QmpClient *c, VmspawnQmpBridge *bridge) {
+        assert(c);
+        assert(bridge);
+
+        return qmp_client_invoke(c, "query-qmp-schema", QMP_CLIENT_ARGS(NULL),
+                        on_probe_schema_reply, bridge);
+}
+
+int vmspawn_qmp_init(VmspawnQmpBridge **ret, int fd, sd_event *event) {
+        _cleanup_(vmspawn_qmp_bridge_freep) VmspawnQmpBridge *bridge = NULL;
+        int r;
+
+        assert(ret);
+        assert(fd >= 0);
+        assert(event);
+
+        bridge = new0(VmspawnQmpBridge, 1);
+        if (!bridge)
+                return log_oom();
+
+        r = qmp_client_connect_fd(&bridge->qmp, fd);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create QMP client: %m");
+
+        r = qmp_client_set_description(bridge->qmp, "vmspawn-qmp-client");
+        if (r < 0)
+                return log_error_errno(r, "Failed to set QMP client description: %m");
+
+        r = qmp_client_attach_event(bridge->qmp, event, SD_EVENT_PRIORITY_NORMAL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to attach QMP client to event loop: %m");
+
+        *ret = TAKE_PTR(bridge);
+        return 0;
+}
+
+int vmspawn_qmp_probe_features(VmspawnQmpBridge *bridge) {
+        int r;
+
+        assert(bridge);
+
+        /* probe_io_uring() and probe_schema() both call qmp_client_invoke(), which internally
+         * drives the handshake to RUNNING via qmp_client_ensure_running() on its first call. */
+        r = probe_io_uring(bridge->qmp, bridge);
+        if (r < 0)
+                return log_error_errno(r, "Failed to issue io_uring probe: %m");
+
+        r = probe_schema(bridge->qmp, bridge);
+        if (r < 0)
+                return log_error_errno(r, "Failed to issue schema probe: %m");
+
+        /* Canonical sync-on-async pump, matching varlink_call_internal(). The QMP client tracks
+         * outstanding replies in its own slots set; drain until it's idle. */
+        while (!qmp_client_is_idle(bridge->qmp)) {
+                r = qmp_client_process(bridge->qmp);
+                if (r < 0)
+                        return log_error_errno(r, "QMP probe pump failed: %m");
+                if (r > 0)
+                        continue;
+
+                r = qmp_client_wait(bridge->qmp, USEC_INFINITY);
+                if (r < 0)
+                        return log_error_errno(r, "QMP probe wait failed: %m");
+        }
+
+        /* If fail_pending() drained the slots (transport dropped mid-probe), features can't be
+         * trusted and we have no QMP channel for device setup anyway. */
+        if (qmp_client_is_disconnected(bridge->qmp))
+                return log_error_errno(SYNTHETIC_ERRNO(ECONNRESET),
+                                       "QMP connection dropped during feature probing");
+
+        return 0;
+}
+
+static int on_cont_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        assert(client);
+
+        if (error < 0) {
+                log_error_errno(error, "Failed to resume QEMU execution: %s", strna(error_desc));
+                return sd_event_exit(qmp_client_get_event(client), error);
+        }
+
+        return 0;
+}
+
+int vmspawn_qmp_start(VmspawnQmpBridge *bridge) {
+        assert(bridge);
+
+        return qmp_client_invoke(bridge->qmp, "cont", /* args= */ NULL, on_cont_complete, /* userdata= */ NULL);
+}
diff --git a/src/vmspawn/vmspawn-qmp.h b/src/vmspawn/vmspawn-qmp.h
new file mode 100644 (file)
index 0000000..8f8c26f
--- /dev/null
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <net/ethernet.h>
+
+#include "shared-forward.h"
+
+/* Pending job continuation — called when a QMP background job reaches "concluded" state.
+ * Used by blockdev-create to chain remaining drive setup after the job completes. */
+typedef int (*pending_job_callback_t)(QmpClient *qmp, void *userdata);
+typedef void (*pending_job_free_t)(void *userdata);
+
+typedef struct PendingJob {
+        pending_job_callback_t on_concluded;
+        pending_job_free_t free_userdata;
+        void *userdata;
+} PendingJob;
+
+PendingJob* pending_job_free(PendingJob *j);
+DEFINE_TRIVIAL_CLEANUP_FUNC(PendingJob *, pending_job_free);
+
+typedef enum VmspawnQmpFeatureFlags {
+        VMSPAWN_QMP_FEATURE_IO_URING         = 1u << 0,
+        VMSPAWN_QMP_FEATURE_DISCARD_NO_UNREF = 1u << 1,
+} VmspawnQmpFeatureFlags;
+
+typedef struct VmspawnQmpBridge {
+        QmpClient *qmp;
+        Hashmap *pending_jobs;  /* job_id (string, owned) -> PendingJob* */
+        VmspawnQmpFeatureFlags features;
+} VmspawnQmpBridge;
+
+VmspawnQmpBridge* vmspawn_qmp_bridge_free(VmspawnQmpBridge *b);
+DEFINE_TRIVIAL_CLEANUP_FUNC(VmspawnQmpBridge *, vmspawn_qmp_bridge_free);
+
+QmpClient* vmspawn_qmp_bridge_get_qmp(VmspawnQmpBridge *b);
+
+/* Phase 1: Connect to VMM backend. Returns an opaque bridge ready for device setup. */
+int vmspawn_qmp_init(VmspawnQmpBridge **ret, int fd, sd_event *event);
+
+/* Phase 1b: Feature probing. Fires one-shot QMP commands and drives the client
+ * synchronously until every reply has been delivered. Populates bridge->features.
+ * Must run before the device-setup phase; both io_uring and discard-no-unref flags
+ * are consumed by vmspawn_qmp_setup_drives(). */
+int vmspawn_qmp_probe_features(VmspawnQmpBridge *bridge);
+
+/* Phase 3: Resume vCPUs. All commands are async — responses arrive during sd_event_loop(). */
+int vmspawn_qmp_start(VmspawnQmpBridge *bridge);
+
+int vmspawn_qmp_bridge_register_job(
+                VmspawnQmpBridge *b,
+                const char *job_id,
+                pending_job_callback_t on_concluded,
+                void *userdata,
+                pending_job_free_t free_userdata);
+
+typedef enum QmpDriveFlags {
+        QMP_DRIVE_BLOCK_DEVICE     = 1u << 0,
+        QMP_DRIVE_READ_ONLY        = 1u << 1,
+        QMP_DRIVE_DISCARD          = 1u << 2,
+        QMP_DRIVE_NO_FLUSH         = 1u << 3,
+        QMP_DRIVE_BOOT             = 1u << 4,
+        QMP_DRIVE_IO_URING         = 1u << 5,
+        QMP_DRIVE_DISCARD_NO_UNREF = 1u << 6,  /* qcow2 only */
+} QmpDriveFlags;
+
+/* Drive info for QMP-based drive setup */
+typedef struct DriveInfo {
+        const char *path;          /* kept for logging only — not passed to QEMU */
+        const char *format;        /* "raw" or "qcow2" */
+        const char *disk_driver;   /* "virtio-blk-pci", "scsi-hd", "scsi-cd", "nvme" */
+        char *serial;              /* owned */
+        char *node_name;           /* owned */
+        char *pcie_port;           /* owned: pcie-root-port id for device_add bus (NULL on non-PCIe) */
+        int fd;                    /* pre-opened image fd (owned, -EBADF if unused) */
+        int overlay_fd;            /* pre-opened anonymous overlay fd for ephemeral (owned, -EBADF if unused) */
+        QmpDriveFlags flags;
+} DriveInfo;
+
+void drive_info_done(DriveInfo *info);
+
+typedef struct DriveInfos {
+        DriveInfo *drives;
+        size_t n_drives;
+        char *scsi_pcie_port;  /* owned: pcie-root-port id for SCSI controller (NULL if no SCSI or non-PCIe) */
+} DriveInfos;
+
+void drive_infos_done(DriveInfos *infos);
+
+/* Network info for QMP-based network setup. Covers privileged TAP (by name),
+ * nsresourced TAP (by FD via getfd), and user-mode networking. The no-network
+ * case (-nic none) stays on the QEMU command line. */
+typedef struct NetworkInfo {
+        const char *type;                  /* "tap" or "user" — points to a string literal */
+        char *ifname;                      /* owned: TAP interface name (tap by name only, NULL if unset) */
+        struct ether_addr mac;             /* VM-side MAC address (tap only, valid iff mac_set) */
+        bool mac_set;
+        char *pcie_port;                   /* owned: pcie-root-port id for device_add bus (NULL on non-PCIe) */
+        int fd;                            /* TAP fd to pass via getfd (tap by fd only, -EBADF if unused) */
+} NetworkInfo;
+
+void network_info_done(NetworkInfo *info);
+
+/* Virtiofs device info for QMP-based chardev + device setup */
+typedef struct VirtiofsInfo {
+        char *id;              /* owned: chardev and device id (e.g. "rootdir", "mnt0") */
+        char *socket_path;     /* owned: virtiofsd listen socket path */
+        char *tag;             /* owned: virtiofs mount tag visible to guest */
+        char *pcie_port;       /* owned: pcie-root-port id for device_add bus (NULL on non-PCIe) */
+} VirtiofsInfo;
+
+void virtiofs_info_done(VirtiofsInfo *info);
+
+typedef struct VirtiofsInfos {
+        VirtiofsInfo *entries;
+        size_t n_entries;
+} VirtiofsInfos;
+
+void virtiofs_infos_done(VirtiofsInfos *infos);
+
+/* VSOCK device info for QMP-based setup via getfd + device_add */
+typedef struct VsockInfo {
+        int fd;                 /* vhost-vsock fd to pass via getfd (-EBADF if unused) */
+        unsigned cid;           /* guest CID */
+        char *pcie_port;        /* owned: pcie-root-port id for device_add bus (NULL on non-PCIe) */
+} VsockInfo;
+
+void vsock_info_done(VsockInfo *info);
+
+/* Aggregate of the per-device info structures populated before the bridge-based
+ * device setup phase. Keeps lifetime and cleanup of all device state in one place. */
+typedef struct MachineConfig {
+        DriveInfos drives;
+        NetworkInfo network;
+        VirtiofsInfos virtiofs;
+        VsockInfo vsock;
+} MachineConfig;
+
+void machine_config_done(MachineConfig *c);
+
+/* Phase 2: Device setup — call any subset in any order before vmspawn_qmp_start(). */
+int vmspawn_qmp_setup_drives(VmspawnQmpBridge *bridge, DriveInfos *drives);
+int vmspawn_qmp_setup_network(VmspawnQmpBridge *bridge, NetworkInfo *network);
+int vmspawn_qmp_setup_virtiofs(VmspawnQmpBridge *bridge, const VirtiofsInfos *virtiofs);
+int vmspawn_qmp_setup_vsock(VmspawnQmpBridge *bridge, VsockInfo *vsock);
diff --git a/src/vmspawn/vmspawn-varlink.c b/src/vmspawn/vmspawn-varlink.c
new file mode 100644 (file)
index 0000000..c73372e
--- /dev/null
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "path-util.h"
+#include "qmp-client.h"
+#include "string-util.h"
+#include "strv.h"
+#include "varlink-io.systemd.MachineInstance.h"
+#include "varlink-io.systemd.QemuMachineInstance.h"
+#include "varlink-io.systemd.VirtualMachineInstance.h"
+#include "varlink-util.h"
+#include "vmspawn-varlink.h"
+
+DEFINE_PRIVATE_HASH_OPS_FULL(
+                varlink_subscriber_hash_ops,
+                void, trivial_hash_func, trivial_compare_func, sd_varlink_close_unref,
+                char*, strv_free);
+
+struct VmspawnVarlinkContext {
+        sd_varlink_server *varlink_server;
+        VmspawnQmpBridge *bridge;
+        /* Key: sd_varlink* (ref'd), Value: strv filter (NULL = all events).
+         * varlink_subscriber_hash_ops handles cleanup of both on removal. */
+        Hashmap *subscribed;
+};
+
+/* Translate a QMP async completion into a varlink error reply */
+static int qmp_error_to_varlink(sd_varlink *link, const char *error_desc, int error) {
+        assert(link);
+
+        if (ERRNO_IS_DISCONNECT(error))
+                return sd_varlink_error(link, "io.systemd.MachineInstance.NotConnected", NULL);
+        if (error == -EIO)
+                log_warning("QMP command failed: %s", strna(error_desc));
+        return sd_varlink_error_errno(link, error);
+}
+
+/* Shared async completion for simple QMP commands that return no data.
+ * Errors are translated to varlink replies, not propagated through sd_event. */
+static int on_qmp_simple_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        sd_varlink *link = ASSERT_PTR(userdata);
+
+        assert(client);
+
+        if (error == 0)
+                (void) sd_varlink_reply(link, NULL);
+        else
+                (void) qmp_error_to_varlink(link, error_desc, error);
+
+        sd_varlink_unref(link);
+        return 0;
+}
+
+static int qmp_execute_varlink_async(
+                VmspawnVarlinkContext *ctx,
+                sd_varlink *link,
+                const char *command,
+                sd_json_variant *arguments,
+                qmp_command_callback_t callback) {
+
+        int r;
+
+        sd_varlink_ref(link);
+
+        r = qmp_client_invoke(ctx->bridge->qmp, command, QMP_CLIENT_ARGS(arguments), callback, link);
+        if (r < 0)
+                sd_varlink_unref(link);
+
+        return r;
+}
+
+static int qmp_execute_simple_async(sd_varlink *link, VmspawnVarlinkContext *ctx, const char *qmp_command) {
+        assert(link);
+        assert(ctx);
+        assert(qmp_command);
+
+        return qmp_execute_varlink_async(ctx, link, qmp_command, /* arguments= */ NULL, on_qmp_simple_complete);
+}
+
+static int vl_method_terminate(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return qmp_execute_simple_async(link, ASSERT_PTR(userdata), "quit");
+}
+
+static int vl_method_pause(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return qmp_execute_simple_async(link, ASSERT_PTR(userdata), "stop");
+}
+
+static int vl_method_resume(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return qmp_execute_simple_async(link, ASSERT_PTR(userdata), "cont");
+}
+
+static int vl_method_power_off(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return qmp_execute_simple_async(link, ASSERT_PTR(userdata), "system_powerdown");
+}
+
+static int vl_method_reboot(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return qmp_execute_simple_async(link, ASSERT_PTR(userdata), "system_reset");
+}
+
+/* Async completion for query-status: extract running/status from QMP result */
+static int on_qmp_describe_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        _cleanup_(sd_varlink_unrefp) sd_varlink *link = ASSERT_PTR(userdata);
+
+        assert(client);
+
+        if (error != 0) {
+                (void) qmp_error_to_varlink(link, error_desc, error);
+                return 0;
+        }
+
+        sd_json_variant *running = sd_json_variant_by_key(result, "running");
+        sd_json_variant *status = sd_json_variant_by_key(result, "status");
+
+        (void) sd_varlink_replybo(
+                        link,
+                        SD_JSON_BUILD_PAIR_BOOLEAN("running", running ? sd_json_variant_boolean(running) : false),
+                        SD_JSON_BUILD_PAIR_STRING("status", status && sd_json_variant_is_string(status) ? sd_json_variant_string(status) : "unknown"));
+
+        return 0;
+}
+
+static int vl_method_describe(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata);
+
+        return qmp_execute_varlink_async(ctx, link, "query-status", /* arguments= */ NULL, on_qmp_describe_complete);
+}
+
+static int vl_method_subscribe_events(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata);
+        _cleanup_strv_free_ char **filter = NULL;
+        int r;
+
+        /* SD_VARLINK_REQUIRES_MORE in the IDL rejects non-streaming callers before we get here */
+
+        r = sd_varlink_dispatch(link, parameters, (const sd_json_dispatch_field[]) {
+                { "filter", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_strv, 0, SD_JSON_NULLABLE },
+                {},
+        }, &filter);
+        if (r != 0)
+                return r;
+
+        sd_varlink_ref(link);
+
+        r = hashmap_ensure_put(&ctx->subscribed, &varlink_subscriber_hash_ops, link, filter);
+        if (r < 0) {
+                sd_varlink_unref(link);
+                return r;
+        }
+
+        TAKE_PTR(filter);
+
+        r = sd_varlink_notifybo(link, SD_JSON_BUILD_PAIR_STRING("event", "READY"));
+        if (r < 0) {
+                strv_free(hashmap_remove(ctx->subscribed, link));
+                sd_varlink_close_unref(link);
+                return r;
+        }
+
+        return 0;
+}
+
+static int vl_method_acquire_qmp(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return sd_varlink_error_errno(link, -EOPNOTSUPP);
+}
+
+static void vl_disconnect(sd_varlink_server *server, sd_varlink *link, void *userdata) {
+        VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata);
+
+        assert(server);
+        assert(link);
+
+        /* Only subscribers hold an extra ref on the link (taken in vl_method_subscribe_events).
+         * Non-subscriber connections (one-shot commands like Pause, Describe) must not be unref'd
+         * here — their extra ref is consumed by the async completion callback. Only unref, never
+         * close — the server handles close after this callback returns (matching resolved's
+         * vl_on_notification_disconnect pattern).
+         *
+         * Use hashmap_remove2() so the returned key (non-NULL iff the entry was present)
+         * disambiguates "no filter subscriber" (value=NULL) from "not a subscriber". */
+        void *removed_key = NULL;
+        strv_free(hashmap_remove2(ctx->subscribed, link, &removed_key));
+        if (!removed_key)
+                return;
+
+        sd_varlink_unref(link);
+}
+
+static int on_job_dismiss_complete(
+                QmpClient *client,
+                sd_json_variant *result,
+                const char *error_desc,
+                int error,
+                void *userdata) {
+
+        if (error < 0)
+                log_debug_errno(error, "job-dismiss failed: %s", strna(error_desc));
+
+        return 0;
+}
+
+static int dispatch_pending_job(VmspawnQmpBridge *bridge, sd_json_variant *data) {
+        const char *job_id, *status;
+        int r;
+
+        assert(bridge);
+
+        if (!data)
+                return 0;
+
+        job_id = sd_json_variant_string(sd_json_variant_by_key(data, "id"));
+        status = sd_json_variant_string(sd_json_variant_by_key(data, "status"));
+
+        if (!job_id || !streq_ptr(status, "concluded"))
+                return 0;
+
+        _cleanup_free_ char *key = NULL;
+        _cleanup_(pending_job_freep) PendingJob *job = hashmap_remove2(bridge->pending_jobs, job_id, (void**) &key);
+        if (!job)
+                return 0;
+
+        log_debug("QMP job '%s' concluded, firing continuation", job_id);
+
+        /* Dismiss the concluded job before running the continuation */
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *dismiss_args = NULL;
+        r = sd_json_buildo(&dismiss_args, SD_JSON_BUILD_PAIR_STRING("id", job_id));
+        if (r < 0)
+                return sd_event_exit(qmp_client_get_event(bridge->qmp), r);
+
+        r = qmp_client_invoke(bridge->qmp, "job-dismiss", QMP_CLIENT_ARGS(dismiss_args),
+                              on_job_dismiss_complete, /* userdata= */ NULL);
+        if (r < 0)
+                return sd_event_exit(qmp_client_get_event(bridge->qmp), r);
+
+        if (!job->on_concluded)
+                return 1;
+
+        r = job->on_concluded(bridge->qmp, TAKE_PTR(job->userdata));
+        if (r < 0) {
+                log_error_errno(r, "Job continuation failed: %m");
+                return sd_event_exit(qmp_client_get_event(bridge->qmp), r);
+        }
+
+        return 1;
+}
+
+static int on_qmp_event(
+                QmpClient *client,
+                const char *event,
+                sd_json_variant *data,
+                void *userdata) {
+
+        VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata);
+        _cleanup_(sd_json_variant_unrefp) sd_json_variant *notification = NULL;
+        sd_varlink *link;
+        char **filter;
+        int r;
+
+        assert(client);
+        assert(event);
+
+        /* Dispatch job status changes to pending continuations (e.g. blockdev-create) */
+        if (streq(event, "JOB_STATUS_CHANGE"))
+                return dispatch_pending_job(ctx->bridge, data);
+
+        if (hashmap_isempty(ctx->subscribed))
+                return 0;
+
+        r = sd_json_buildo(
+                        &notification,
+                        SD_JSON_BUILD_PAIR_STRING("event", event),
+                        SD_JSON_BUILD_PAIR_CONDITION(!!data, "data", SD_JSON_BUILD_VARIANT(data)));
+        if (r < 0) {
+                log_warning_errno(r, "Failed to build event notification, ignoring: %m");
+                return 0;
+        }
+
+        HASHMAP_FOREACH_KEY(filter, link, ctx->subscribed) {
+                if (filter && !strv_contains(filter, event))
+                        continue;
+
+                r = sd_varlink_notify(link, notification);
+                if (r < 0)
+                        log_warning_errno(r, "Failed to notify event subscriber, ignoring: %m");
+        }
+
+        return 0;
+}
+
+/* Free all subscriber entries — varlink_subscriber_hash_ops handles
+ * close + unref for each key and strv_free for each value. */
+static void drain_event_subscribers(Hashmap **subscribed) {
+        assert(subscribed);
+        *subscribed = hashmap_free(*subscribed);
+}
+
+static void on_qmp_disconnect(QmpClient *client, void *userdata) {
+        VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata);
+
+        assert(client);
+
+        log_debug("Backend connection lost");
+
+        /* Propagate connection loss by closing all subscriber connections */
+        drain_event_subscribers(&ctx->subscribed);
+}
+
+int vmspawn_varlink_setup(
+                VmspawnVarlinkContext **ret,
+                VmspawnQmpBridge *bridge,
+                const char *runtime_dir,
+                char **ret_control_address) {
+
+        _cleanup_(vmspawn_varlink_context_freep) VmspawnVarlinkContext *ctx = NULL;
+        _cleanup_free_ char *listen_address = NULL;
+        int r;
+
+        assert(ret);
+        assert(bridge);
+        assert(runtime_dir);
+
+        sd_event *event = qmp_client_get_event(bridge->qmp);
+        assert(event);
+
+        ctx = new0(VmspawnVarlinkContext, 1);
+        if (!ctx)
+                return log_oom();
+
+        /* Create varlink server for VM control */
+        r = varlink_server_new(&ctx->varlink_server,
+                               SD_VARLINK_SERVER_INHERIT_USERDATA,
+                               ctx);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create varlink server: %m");
+
+        r = sd_varlink_server_add_interface_many(
+                        ctx->varlink_server,
+                        &vl_interface_io_systemd_MachineInstance,
+                        &vl_interface_io_systemd_VirtualMachineInstance,
+                        &vl_interface_io_systemd_QemuMachineInstance);
+        if (r < 0)
+                return log_error_errno(r, "Failed to add varlink interfaces: %m");
+
+        r = sd_varlink_server_bind_method_many(
+                        ctx->varlink_server,
+                        "io.systemd.MachineInstance.Terminate",         vl_method_terminate,
+                        "io.systemd.MachineInstance.PowerOff",          vl_method_power_off,
+                        "io.systemd.MachineInstance.Pause",             vl_method_pause,
+                        "io.systemd.MachineInstance.Resume",            vl_method_resume,
+                        "io.systemd.MachineInstance.Reboot",            vl_method_reboot,
+                        "io.systemd.MachineInstance.Describe",          vl_method_describe,
+                        "io.systemd.MachineInstance.SubscribeEvents",   vl_method_subscribe_events,
+                        "io.systemd.QemuMachineInstance.AcquireQMP",    vl_method_acquire_qmp);
+        if (r < 0)
+                return log_error_errno(r, "Failed to bind varlink methods: %m");
+
+        r = sd_varlink_server_bind_disconnect(ctx->varlink_server, vl_disconnect);
+        if (r < 0)
+                return log_error_errno(r, "Failed to bind disconnect handler: %m");
+
+        listen_address = path_join(runtime_dir, "control");
+        if (!listen_address)
+                return log_oom();
+
+        r = sd_varlink_server_listen_address(ctx->varlink_server, listen_address, 0600);
+        if (r < 0)
+                return log_error_errno(r, "Failed to listen on %s: %m", listen_address);
+
+        r = sd_varlink_server_attach_event(ctx->varlink_server, event, SD_EVENT_PRIORITY_NORMAL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to attach varlink server to event loop: %m");
+
+        ctx->bridge = bridge;
+        qmp_client_bind_event(ctx->bridge->qmp, on_qmp_event, ctx);
+        qmp_client_bind_disconnect(ctx->bridge->qmp, on_qmp_disconnect, ctx);
+
+        log_debug("Varlink control server listening on %s", listen_address);
+
+        if (ret_control_address)
+                *ret_control_address = TAKE_PTR(listen_address);
+
+        *ret = TAKE_PTR(ctx);
+        return 0;
+}
+
+VmspawnVarlinkContext* vmspawn_varlink_context_free(VmspawnVarlinkContext *ctx) {
+        if (!ctx)
+                return NULL;
+
+        sd_varlink_server_unref(ctx->varlink_server);
+        vmspawn_qmp_bridge_free(ctx->bridge);
+
+        drain_event_subscribers(&ctx->subscribed);
+
+        return mfree(ctx);
+}
diff --git a/src/vmspawn/vmspawn-varlink.h b/src/vmspawn/vmspawn-varlink.h
new file mode 100644 (file)
index 0000000..1833416
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "cleanup-util.h"
+#include "shared-forward.h"
+#include "vmspawn-qmp.h"
+
+typedef struct VmspawnVarlinkContext VmspawnVarlinkContext;
+
+/* Varlink server for VM control on top of an established bridge connection */
+int vmspawn_varlink_setup(
+                VmspawnVarlinkContext **ret,
+                VmspawnQmpBridge *bridge,
+                const char *runtime_dir,
+                char **ret_control_address);
+
+VmspawnVarlinkContext* vmspawn_varlink_context_free(VmspawnVarlinkContext *ctx);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(VmspawnVarlinkContext *, vmspawn_varlink_context_free);