From: Daan De Meyer Date: Tue, 11 Feb 2025 14:48:29 +0000 (+0100) Subject: mkosi-sandbox: Implement --pack-fds X-Git-Tag: v26~395^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c785d0b57b0492948a39bd89c6569cafde4bbf76;p=thirdparty%2Fmkosi.git mkosi-sandbox: Implement --pack-fds This allows us to move the file descriptor packing logic from spawn() to mkosi-sandbox. The main advantage here is that we can pass file descriptors now without necessarily packing them together, which we now only do for systemd-journal-remote which requires it. --- diff --git a/mkosi/qemu.py b/mkosi/qemu.py index 6dd9f38a3..254a30d85 100644 --- a/mkosi/qemu.py +++ b/mkosi/qemu.py @@ -44,10 +44,19 @@ from mkosi.config import ( ) from mkosi.log import ARG_DEBUG, die from mkosi.partition import finalize_root, find_partitions -from mkosi.run import SD_LISTEN_FDS_START, AsyncioThread, find_binary, fork_and_wait, run, spawn, workdir +from mkosi.run import AsyncioThread, find_binary, fork_and_wait, run, spawn, workdir from mkosi.tree import copy_tree, rmtree from mkosi.user import INVOKING_USER, become_root_in_subuid_range, become_root_in_subuid_range_cmd -from mkosi.util import PathString, StrEnum, current_home_dir, flock, flock_or_die, groupby, round_up, try_or +from mkosi.util import ( + PathString, + StrEnum, + current_home_dir, + flock, + flock_or_die, + groupby, + round_up, + try_or, +) from mkosi.versioncomp import GenericVersion QEMU_KVM_DEVICE_VERSION = GenericVersion("9.0") @@ -297,7 +306,7 @@ def start_swtpm(config: Config) -> Iterator[Path]: sock.bind(os.fspath(path)) sock.listen() - cmdline += ["--ctrl", f"type=unixio,fd={SD_LISTEN_FDS_START}"] + cmdline += ["--ctrl", f"type=unixio,fd={sock.fileno()}"] with spawn( cmdline, @@ -396,7 +405,7 @@ def start_virtiofsd( # Make sure virtiofsd can connect to the socket. os.chown(path, st.st_uid, st.st_gid) - cmdline += ["--fd", str(SD_LISTEN_FDS_START)] + cmdline += ["--fd", str(sock.fileno())] # We want RuntimeBuildSources= and RuntimeTrees= to do the right thing even when running mkosi vm # as root without the source directories necessarily being owned by root. We achieve this by running @@ -423,7 +432,7 @@ def start_virtiofsd( # our own function to become root in the subuid range. # TODO: Drop this as soon as we drop CentOS Stream 9 support and can rely on newer unshare # features. - preexec_fn=become_root_in_subuid_range if not scope and not uidmap else None, + preexec=become_root_in_subuid_range if not scope and not uidmap else None, sandbox=config.sandbox( options=[ "--bind", directory, workdir(directory), @@ -563,6 +572,7 @@ def start_journal_remote(config: Config, sockfd: int) -> Iterator[None]: options=[ "--bind", config.forward_journal.parent, workdir(config.forward_journal.parent), "--ro-bind", f.name, "/etc/systemd/journal-remote.conf", + "--pack-fds", ], setup=scope, ), @@ -1233,8 +1243,7 @@ def run_qemu(args: Args, config: Config) -> None: if config.kvm != ConfigFeature.disabled and have_kvm and config.architecture.can_kvm(): accel = "kvm" if qemu_version(config, qemu) >= QEMU_KVM_DEVICE_VERSION: - index = list(qemu_device_fds.keys()).index(QemuDeviceNode.kvm) - cmdline += ["--add-fd", f"fd={SD_LISTEN_FDS_START + index},set=1,opaque=/dev/kvm"] + cmdline += ["--add-fd", f"fd={qemu_device_fds[QemuDeviceNode.kvm]},set=1,opaque=/dev/kvm"] accel += ",device=/dev/fdset/1" cmdline += ["-cpu", "host"] else: @@ -1259,8 +1268,9 @@ def run_qemu(args: Args, config: Config) -> None: "find a free vsock connection ID", ) - index = list(qemu_device_fds.keys()).index(QemuDeviceNode.vhost_vsock) - cmdline += ["-device", f"vhost-vsock-pci,guest-cid={cid},vhostfd={SD_LISTEN_FDS_START + index}"] + cmdline += [ + "-device", f"vhost-vsock-pci,guest-cid={cid},vhostfd={qemu_device_fds[QemuDeviceNode.vhost_vsock]}", # noqa: E501 + ] # fmt: skip if config.console == ConsoleMode.gui: if config.architecture.is_arm_variant(): diff --git a/mkosi/resources/man/mkosi-sandbox.1.md b/mkosi/resources/man/mkosi-sandbox.1.md index 230999bed..a5cbd428b 100644 --- a/mkosi/resources/man/mkosi-sandbox.1.md +++ b/mkosi/resources/man/mkosi-sandbox.1.md @@ -117,6 +117,11 @@ host system. This is useful to wait until all setup logic has completed before continuing execution in the parent process invoking `mkosi-sandbox` by using `waitid()` with the `WNOWAIT` AND `WSTOPPED` flags. +`--pack-fds` +: Pack inherited file descriptors together starting at file descriptor number 3 and set + `$LISTEN_FDS` to the number of packed file descriptors and `$LISTEN_PID` to the current process + pid. + `--version` : Show package version. diff --git a/mkosi/run.py b/mkosi/run.py index 94d98a92e..3c4fd1d9d 100644 --- a/mkosi/run.py +++ b/mkosi/run.py @@ -2,8 +2,6 @@ import contextlib import errno -import fcntl -import functools import logging import os import queue @@ -24,9 +22,6 @@ from mkosi.log import ARG_DEBUG, ARG_DEBUG_SANDBOX, ARG_DEBUG_SHELL, die from mkosi.sandbox import acquire_privileges, joinpath, umask from mkosi.util import _FILE, PathString, current_home_dir, flatten, one_zero, resource_path, unique -SD_LISTEN_FDS_START = 3 - - # These types are only generic during type checking and not at runtime, leading # to a TypeError during compilation. # Let's be as strict as we can with the description for the usage we have. @@ -169,47 +164,6 @@ def run( return CompletedProcess(cmdline, process.returncode, out, err) -def fd_move_above(fd: int, above: int) -> int: - dup = fcntl.fcntl(fd, fcntl.F_DUPFD, above) - os.close(fd) - return dup - - -def preexec( - *, - preexec_fn: Optional[Callable[[], None]], - pass_fds: Collection[int], -) -> None: - if preexec_fn: - preexec_fn() - - if not pass_fds: - return - - # The systemd socket activation interface requires any passed file descriptors to start at '3' and - # incrementally increase from there. The file descriptors we got from the caller might be arbitrary, - # so we need to move them around to make sure they start at '3' and incrementally increase. - for i, fd in enumerate(pass_fds): - # Don't do anything if the file descriptor is already what we need it to be. - if fd == SD_LISTEN_FDS_START + i: - continue - - # Close any existing file descriptor that occupies the id that we want to move to. This is safe - # because using pass_fds implies using close_fds as well, except that file descriptors are closed - # by python after running the preexec function, so we have to close a few of those manually here - # to make room if needed. - try: - os.close(SD_LISTEN_FDS_START + i) - except OSError as e: - if e.errno != errno.EBADF: - raise - - nfd = fcntl.fcntl(fd, fcntl.F_DUPFD, SD_LISTEN_FDS_START + i) - # fcntl.F_DUPFD uses the closest available file descriptor ID, so make sure it actually picked - # the ID we expect it to pick. - assert nfd == SD_LISTEN_FDS_START + i - - @contextlib.contextmanager def spawn( cmdline: Sequence[PathString], @@ -222,12 +176,10 @@ def spawn( pass_fds: Collection[int] = (), env: Mapping[str, str] = {}, log: bool = True, - preexec_fn: Optional[Callable[[], None]] = None, + preexec: Optional[Callable[[], None]] = None, success_exit_status: Sequence[int] = (0,), sandbox: AbstractContextManager[Sequence[PathString]] = contextlib.nullcontext([]), ) -> Iterator[Popen]: - assert sorted(set(pass_fds)) == list(pass_fds) - cmd = [os.fspath(x) for x in cmdline] if ARG_DEBUG.get(): @@ -258,10 +210,6 @@ def spawn( if "HOME" not in env: env["HOME"] = "/" - # sandbox.py takes care of setting $LISTEN_PID - if pass_fds: - env["LISTEN_FDS"] = str(len(pass_fds)) - with sandbox as sbx: prefix = [os.fspath(x) for x in sbx] @@ -274,12 +222,9 @@ def spawn( text=True, user=user, group=group, - # pass_fds only comes into effect after python has invoked the preexec function, so we make - # sure that pass_fds contains the file descriptors to keep open after we've done our - # transformation in preexec(). - pass_fds=[SD_LISTEN_FDS_START + i for i in range(len(pass_fds))], + pass_fds=pass_fds, env=env, - preexec_fn=functools.partial(preexec, preexec_fn=preexec_fn, pass_fds=pass_fds), + preexec_fn=preexec, ) as proc: def failed() -> bool: @@ -310,7 +255,7 @@ def spawn( user=user, group=group, env=env, - preexec_fn=functools.partial(preexec, preexec_fn=preexec_fn, pass_fds=tuple()), + preexec_fn=preexec, ) raise subprocess.CalledProcessError(returncode, cmdline) except FileNotFoundError as e: diff --git a/mkosi/sandbox.py b/mkosi/sandbox.py index 268079454..d4d08fd34 100755 --- a/mkosi/sandbox.py +++ b/mkosi/sandbox.py @@ -28,9 +28,12 @@ CLONE_NEWIPC = 0x08000000 CLONE_NEWNET = 0x40000000 CLONE_NEWNS = 0x00020000 CLONE_NEWUSER = 0x10000000 +EBADF = 9 EPERM = 1 ENOENT = 2 ENOSYS = 38 +F_DUPFD = 0 +F_GETFD = 1 LINUX_CAPABILITY_U32S_3 = 2 LINUX_CAPABILITY_VERSION_3 = 0x20080522 MNT_DETACH = 2 @@ -56,6 +59,7 @@ PR_CAP_AMBIENT_RAISE = 2 # These definitions are taken from the libseccomp headers SCMP_ACT_ALLOW = 0x7FFF0000 SCMP_ACT_ERRNO = 0x00050000 +SD_LISTEN_FDS_START = 3 SIGSTOP = 19 @@ -96,6 +100,7 @@ libc.pivot_root.argtypes = (ctypes.c_char_p, ctypes.c_char_p) libc.umount2.argtypes = (ctypes.c_char_p, ctypes.c_int) libc.capget.argtypes = (ctypes.c_void_p, ctypes.c_void_p) libc.capset.argtypes = (ctypes.c_void_p, ctypes.c_void_p) +libc.fcntl.argtypes = (ctypes.c_int, ctypes.c_int, ctypes.c_int) def terminal_is_dumb() -> bool: @@ -438,6 +443,67 @@ def is_relative_to(one: str, two: str) -> bool: return os.path.commonpath((one, two)) == two +def pack_file_descriptors() -> int: + fds = [] + + with os.scandir("/proc/self/fd") as it: + for e in it: + if not e.is_symlink() and (e.is_file() or e.is_dir()): + continue + + try: + fd = int(e.name) + except ValueError: + continue + + if fd < SD_LISTEN_FDS_START: + continue + + fds.append(fd) + + # os.scandir() either opens a file descriptor to the given path or dups the given file descriptor. Either + # way, there will be an extra file descriptor in the fds array that's not valid anymore now, so find out + # which one and drop it. + fds = sorted(fd for fd in fds if libc.fcntl(fd, F_GETFD, 0) >= 0) + + # The following is a reimplementation of pack_fds() in systemd. + + if len(fds) == 0: + return 0 + + start = 0 + while True: + restart_from = -1 + + for i in range(start, len(fds)): + if fds[i] == SD_LISTEN_FDS_START + i: + continue + + nfd = libc.fcntl(fds[i], F_DUPFD, SD_LISTEN_FDS_START + i) + if nfd < 0: + oserror("fnctl") + + try: + os.close(fds[i]) + except OSError as e: + if e.errno != EBADF: + raise + + fds[i] = nfd + + if nfd != (SD_LISTEN_FDS_START + i) and restart_from < 0: + restart_from = i + + if restart_from < 0: + break + + start = restart_from + + assert fds[0] == SD_LISTEN_FDS_START + + return len(fds) + + class FSOperation: def __init__(self, dst: str) -> None: self.dst = dst @@ -752,7 +818,7 @@ def main() -> None: upperdir = "" workdir = "" chdir = None - become_root = suppress_chown = unshare_net = unshare_ipc = suspend = False + become_root = suppress_chown = unshare_net = unshare_ipc = suspend = pack_fds = False ttyname = os.ttyname(2) if os.isatty(2) else "" @@ -813,6 +879,8 @@ def main() -> None: unshare_ipc = True elif arg == "--suspend": suspend = True + elif arg == "--pack-fds": + pack_fds = True elif arg.startswith("-"): raise ValueError(f"Unrecognized option {arg}") else: @@ -837,9 +905,11 @@ def main() -> None: if e in os.environ: del os.environ[e] - # If $LISTEN_FDS is in the environment, let's automatically set $LISTEN_PID to the correct pid as well. - if "LISTEN_FDS" in os.environ: - os.environ["LISTEN_PID"] = str(os.getpid()) + if pack_fds: + nfds = pack_file_descriptors() + if nfds > 0: + os.environ["LISTEN_FDS"] = str(nfds) + os.environ["LISTEN_PID"] = str(os.getpid()) namespaces = CLONE_NEWNS if unshare_net and have_effective_cap(CAP_NET_ADMIN):