From: Daan De Meyer Date: Fri, 13 Oct 2023 11:24:44 +0000 (+0200) Subject: Open qemu device nodes before unsharing user namespace X-Git-Tag: v19~79^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c9a89ece4b891e64f1c6f57259d8c496ebd08d7d;p=thirdparty%2Fmkosi.git Open qemu device nodes before unsharing user namespace Where possible, we should open the qemu device nodes before we unshare the user namespace as this might not be possible anymore after unsharing the user namespace because we might lose access to the kvm group. Currently this is only possible for /dev/vhost-vsock. I've opened https://gitlab.com/qemu-project/qemu/-/issues/1936 to hopefully make it work for /dev/kvm as well. --- diff --git a/mkosi/__init__.py b/mkosi/__init__.py index f6bba675f..d922e5533 100644 --- a/mkosi/__init__.py +++ b/mkosi/__init__.py @@ -46,7 +46,7 @@ from mkosi.manifest import Manifest from mkosi.mounts import mount, mount_overlay, mount_passwd, mount_usr from mkosi.pager import page from mkosi.partition import Partition, finalize_root, finalize_roothash -from mkosi.qemu import copy_ephemeral, run_qemu, run_ssh +from mkosi.qemu import QemuDeviceNode, copy_ephemeral, run_qemu, run_ssh from mkosi.run import ( become_root, bwrap, @@ -2493,6 +2493,17 @@ def run_verb(args: MkosiArgs, presets: Sequence[MkosiConfig]) -> None: for config in presets: try_import(f"mkosi.distributions.{config.distribution}") + # After we unshare the user namespace, we might not have access to /dev/kvm or related device nodes anymore as + # access to these might be gated behind the kvm group and we won't be part of the kvm group anymore after unsharing + # the user namespace. To get around this, open all those device nodes now while we still can so we can pass them as + # file descriptors to qemu later. Note that we can't pass the kvm file descriptor to qemu until + # https://gitlab.com/qemu-project/qemu/-/issues/1936 is resolved. + qemu_device_fds = { + d: os.open(f"/dev/{d}", os.O_RDWR|os.O_CLOEXEC|os.O_NONBLOCK) + for d in QemuDeviceNode + if os.access(f"/dev/{d}", os.F_OK|os.R_OK|os.W_OK) + } + # Get the user UID/GID either on the host or in the user namespace running the build become_root() init_mount_namespace() @@ -2570,7 +2581,7 @@ def run_verb(args: MkosiArgs, presets: Sequence[MkosiConfig]) -> None: run_shell(args, last) if args.verb == Verb.qemu: - run_qemu(args, last) + run_qemu(args, last, qemu_device_fds) if args.verb == Verb.ssh: run_ssh(args, last) diff --git a/mkosi/qemu.py b/mkosi/qemu.py index 1e90466ae..03650c314 100644 --- a/mkosi/qemu.py +++ b/mkosi/qemu.py @@ -3,6 +3,7 @@ import asyncio import base64 import contextlib +import enum import hashlib import logging import os @@ -12,7 +13,7 @@ import subprocess import sys import tempfile import uuid -from collections.abc import Iterator +from collections.abc import Iterator, Mapping from pathlib import Path from mkosi.architecture import Architecture @@ -29,7 +30,16 @@ from mkosi.partition import finalize_root, find_partitions from mkosi.run import MkosiAsyncioThread, run, spawn from mkosi.tree import copy_tree, rmtree from mkosi.types import PathString -from mkosi.util import InvokingUser, qemu_check_kvm_support, qemu_check_vsock_support +from mkosi.util import ( + InvokingUser, + StrEnum, + qemu_check_kvm_support, + qemu_check_vsock_support, +) + + +class QemuDeviceNode(StrEnum): + vhost_vsock = enum.auto() def machine_cid(config: MkosiConfig) -> int: @@ -293,7 +303,7 @@ def copy_ephemeral(config: MkosiConfig, src: Path) -> Iterator[Path]: rmtree(tmp) -def run_qemu(args: MkosiArgs, config: MkosiConfig) -> None: +def run_qemu(args: MkosiArgs, config: MkosiConfig, qemu_device_fds: Mapping[QemuDeviceNode, int]) -> None: if config.output_format not in (OutputFormat.disk, OutputFormat.cpio, OutputFormat.uki, OutputFormat.directory): die(f"{config.output_format} images cannot be booted in qemu") @@ -352,7 +362,10 @@ def run_qemu(args: MkosiArgs, config: MkosiConfig) -> None: use_vsock = (config.qemu_vsock == ConfigFeature.enabled or (config.qemu_vsock == ConfigFeature.auto and qemu_check_vsock_support(log=True))) if use_vsock: - cmdline += ["-device", f"vhost-vsock-pci,guest-cid={machine_cid(config)}"] + cmdline += [ + "-device", + f"vhost-vsock-pci,guest-cid={machine_cid(config)},vhostfd={qemu_device_fds[QemuDeviceNode.vhost_vsock]}" + ] cmdline += ["-cpu", "max"] @@ -525,7 +538,7 @@ def run_qemu(args: MkosiArgs, config: MkosiConfig) -> None: cmdline += config.qemu_args cmdline += args.cmdline - run( + with spawn( cmdline, # On Debian/Ubuntu, only users in the kvm group can access /dev/kvm. The invoking user might be part of the # kvm group, but the user namespace fake root user will definitely not be. Thus, we have to run qemu as the @@ -535,9 +548,15 @@ def run_qemu(args: MkosiArgs, config: MkosiConfig) -> None: group=InvokingUser.gid if not InvokingUser.invoked_as_root else None, stdin=sys.stdin, stdout=sys.stdout, + pass_fds=qemu_device_fds.values(), env=os.environ, log=False, - ) + ) as qemu: + # We have to close these before we wait for qemu otherwise we'll deadlock as qemu will never exit. + for fd in qemu_device_fds.values(): + os.close(fd) + + qemu.wait() if status := int(notifications.get("EXIT_STATUS", 0)): raise subprocess.CalledProcessError(status, cmdline)