)
from mkosi.log import ARG_DEBUG, die
from mkosi.partition import finalize_root, find_partitions
-from mkosi.run import SD_LISTEN_FDS_START, AsyncioThread, find_binary, fork_and_wait, run, spawn, workdir
+from mkosi.run import AsyncioThread, find_binary, fork_and_wait, run, spawn, workdir
from mkosi.tree import copy_tree, rmtree
from mkosi.user import INVOKING_USER, become_root_in_subuid_range, become_root_in_subuid_range_cmd
-from mkosi.util import PathString, StrEnum, current_home_dir, flock, flock_or_die, groupby, round_up, try_or
+from mkosi.util import (
+ PathString,
+ StrEnum,
+ current_home_dir,
+ flock,
+ flock_or_die,
+ groupby,
+ round_up,
+ try_or,
+)
from mkosi.versioncomp import GenericVersion
QEMU_KVM_DEVICE_VERSION = GenericVersion("9.0")
sock.bind(os.fspath(path))
sock.listen()
- cmdline += ["--ctrl", f"type=unixio,fd={SD_LISTEN_FDS_START}"]
+ cmdline += ["--ctrl", f"type=unixio,fd={sock.fileno()}"]
with spawn(
cmdline,
# Make sure virtiofsd can connect to the socket.
os.chown(path, st.st_uid, st.st_gid)
- cmdline += ["--fd", str(SD_LISTEN_FDS_START)]
+ cmdline += ["--fd", str(sock.fileno())]
# We want RuntimeBuildSources= and RuntimeTrees= to do the right thing even when running mkosi vm
# as root without the source directories necessarily being owned by root. We achieve this by running
# our own function to become root in the subuid range.
# TODO: Drop this as soon as we drop CentOS Stream 9 support and can rely on newer unshare
# features.
- preexec_fn=become_root_in_subuid_range if not scope and not uidmap else None,
+ preexec=become_root_in_subuid_range if not scope and not uidmap else None,
sandbox=config.sandbox(
options=[
"--bind", directory, workdir(directory),
options=[
"--bind", config.forward_journal.parent, workdir(config.forward_journal.parent),
"--ro-bind", f.name, "/etc/systemd/journal-remote.conf",
+ "--pack-fds",
],
setup=scope,
),
if config.kvm != ConfigFeature.disabled and have_kvm and config.architecture.can_kvm():
accel = "kvm"
if qemu_version(config, qemu) >= QEMU_KVM_DEVICE_VERSION:
- index = list(qemu_device_fds.keys()).index(QemuDeviceNode.kvm)
- cmdline += ["--add-fd", f"fd={SD_LISTEN_FDS_START + index},set=1,opaque=/dev/kvm"]
+ cmdline += ["--add-fd", f"fd={qemu_device_fds[QemuDeviceNode.kvm]},set=1,opaque=/dev/kvm"]
accel += ",device=/dev/fdset/1"
cmdline += ["-cpu", "host"]
else:
"find a free vsock connection ID",
)
- index = list(qemu_device_fds.keys()).index(QemuDeviceNode.vhost_vsock)
- cmdline += ["-device", f"vhost-vsock-pci,guest-cid={cid},vhostfd={SD_LISTEN_FDS_START + index}"]
+ cmdline += [
+ "-device", f"vhost-vsock-pci,guest-cid={cid},vhostfd={qemu_device_fds[QemuDeviceNode.vhost_vsock]}", # noqa: E501
+ ] # fmt: skip
if config.console == ConsoleMode.gui:
if config.architecture.is_arm_variant():
This is useful to wait until all setup logic has completed before continuing execution in the parent
process invoking `mkosi-sandbox` by using `waitid()` with the `WNOWAIT` AND `WSTOPPED` flags.
+`--pack-fds`
+: Pack inherited file descriptors together starting at file descriptor number 3 and set
+ `$LISTEN_FDS` to the number of packed file descriptors and `$LISTEN_PID` to the current process
+ pid.
+
`--version`
: Show package version.
import contextlib
import errno
-import fcntl
-import functools
import logging
import os
import queue
from mkosi.sandbox import acquire_privileges, joinpath, umask
from mkosi.util import _FILE, PathString, current_home_dir, flatten, one_zero, resource_path, unique
-SD_LISTEN_FDS_START = 3
-
-
# These types are only generic during type checking and not at runtime, leading
# to a TypeError during compilation.
# Let's be as strict as we can with the description for the usage we have.
return CompletedProcess(cmdline, process.returncode, out, err)
-def fd_move_above(fd: int, above: int) -> int:
- dup = fcntl.fcntl(fd, fcntl.F_DUPFD, above)
- os.close(fd)
- return dup
-
-
-def preexec(
- *,
- preexec_fn: Optional[Callable[[], None]],
- pass_fds: Collection[int],
-) -> None:
- if preexec_fn:
- preexec_fn()
-
- if not pass_fds:
- return
-
- # The systemd socket activation interface requires any passed file descriptors to start at '3' and
- # incrementally increase from there. The file descriptors we got from the caller might be arbitrary,
- # so we need to move them around to make sure they start at '3' and incrementally increase.
- for i, fd in enumerate(pass_fds):
- # Don't do anything if the file descriptor is already what we need it to be.
- if fd == SD_LISTEN_FDS_START + i:
- continue
-
- # Close any existing file descriptor that occupies the id that we want to move to. This is safe
- # because using pass_fds implies using close_fds as well, except that file descriptors are closed
- # by python after running the preexec function, so we have to close a few of those manually here
- # to make room if needed.
- try:
- os.close(SD_LISTEN_FDS_START + i)
- except OSError as e:
- if e.errno != errno.EBADF:
- raise
-
- nfd = fcntl.fcntl(fd, fcntl.F_DUPFD, SD_LISTEN_FDS_START + i)
- # fcntl.F_DUPFD uses the closest available file descriptor ID, so make sure it actually picked
- # the ID we expect it to pick.
- assert nfd == SD_LISTEN_FDS_START + i
-
-
@contextlib.contextmanager
def spawn(
cmdline: Sequence[PathString],
pass_fds: Collection[int] = (),
env: Mapping[str, str] = {},
log: bool = True,
- preexec_fn: Optional[Callable[[], None]] = None,
+ preexec: Optional[Callable[[], None]] = None,
success_exit_status: Sequence[int] = (0,),
sandbox: AbstractContextManager[Sequence[PathString]] = contextlib.nullcontext([]),
) -> Iterator[Popen]:
- assert sorted(set(pass_fds)) == list(pass_fds)
-
cmd = [os.fspath(x) for x in cmdline]
if ARG_DEBUG.get():
if "HOME" not in env:
env["HOME"] = "/"
- # sandbox.py takes care of setting $LISTEN_PID
- if pass_fds:
- env["LISTEN_FDS"] = str(len(pass_fds))
-
with sandbox as sbx:
prefix = [os.fspath(x) for x in sbx]
text=True,
user=user,
group=group,
- # pass_fds only comes into effect after python has invoked the preexec function, so we make
- # sure that pass_fds contains the file descriptors to keep open after we've done our
- # transformation in preexec().
- pass_fds=[SD_LISTEN_FDS_START + i for i in range(len(pass_fds))],
+ pass_fds=pass_fds,
env=env,
- preexec_fn=functools.partial(preexec, preexec_fn=preexec_fn, pass_fds=pass_fds),
+ preexec_fn=preexec,
) as proc:
def failed() -> bool:
user=user,
group=group,
env=env,
- preexec_fn=functools.partial(preexec, preexec_fn=preexec_fn, pass_fds=tuple()),
+ preexec_fn=preexec,
)
raise subprocess.CalledProcessError(returncode, cmdline)
except FileNotFoundError as e:
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x00020000
CLONE_NEWUSER = 0x10000000
+EBADF = 9
EPERM = 1
ENOENT = 2
ENOSYS = 38
+F_DUPFD = 0
+F_GETFD = 1
LINUX_CAPABILITY_U32S_3 = 2
LINUX_CAPABILITY_VERSION_3 = 0x20080522
MNT_DETACH = 2
# These definitions are taken from the libseccomp headers
SCMP_ACT_ALLOW = 0x7FFF0000
SCMP_ACT_ERRNO = 0x00050000
+SD_LISTEN_FDS_START = 3
SIGSTOP = 19
libc.umount2.argtypes = (ctypes.c_char_p, ctypes.c_int)
libc.capget.argtypes = (ctypes.c_void_p, ctypes.c_void_p)
libc.capset.argtypes = (ctypes.c_void_p, ctypes.c_void_p)
+libc.fcntl.argtypes = (ctypes.c_int, ctypes.c_int, ctypes.c_int)
def terminal_is_dumb() -> bool:
return os.path.commonpath((one, two)) == two
+def pack_file_descriptors() -> int:
+ fds = []
+
+ with os.scandir("/proc/self/fd") as it:
+ for e in it:
+ if not e.is_symlink() and (e.is_file() or e.is_dir()):
+ continue
+
+ try:
+ fd = int(e.name)
+ except ValueError:
+ continue
+
+ if fd < SD_LISTEN_FDS_START:
+ continue
+
+ fds.append(fd)
+
+ # os.scandir() either opens a file descriptor to the given path or dups the given file descriptor. Either
+ # way, there will be an extra file descriptor in the fds array that's not valid anymore now, so find out
+ # which one and drop it.
+ fds = sorted(fd for fd in fds if libc.fcntl(fd, F_GETFD, 0) >= 0)
+
+ # The following is a reimplementation of pack_fds() in systemd.
+
+ if len(fds) == 0:
+ return 0
+
+ start = 0
+ while True:
+ restart_from = -1
+
+ for i in range(start, len(fds)):
+ if fds[i] == SD_LISTEN_FDS_START + i:
+ continue
+
+ nfd = libc.fcntl(fds[i], F_DUPFD, SD_LISTEN_FDS_START + i)
+ if nfd < 0:
+ oserror("fnctl")
+
+ try:
+ os.close(fds[i])
+ except OSError as e:
+ if e.errno != EBADF:
+ raise
+
+ fds[i] = nfd
+
+ if nfd != (SD_LISTEN_FDS_START + i) and restart_from < 0:
+ restart_from = i
+
+ if restart_from < 0:
+ break
+
+ start = restart_from
+
+ assert fds[0] == SD_LISTEN_FDS_START
+
+ return len(fds)
+
+
class FSOperation:
def __init__(self, dst: str) -> None:
self.dst = dst
upperdir = ""
workdir = ""
chdir = None
- become_root = suppress_chown = unshare_net = unshare_ipc = suspend = False
+ become_root = suppress_chown = unshare_net = unshare_ipc = suspend = pack_fds = False
ttyname = os.ttyname(2) if os.isatty(2) else ""
unshare_ipc = True
elif arg == "--suspend":
suspend = True
+ elif arg == "--pack-fds":
+ pack_fds = True
elif arg.startswith("-"):
raise ValueError(f"Unrecognized option {arg}")
else:
if e in os.environ:
del os.environ[e]
- # If $LISTEN_FDS is in the environment, let's automatically set $LISTEN_PID to the correct pid as well.
- if "LISTEN_FDS" in os.environ:
- os.environ["LISTEN_PID"] = str(os.getpid())
+ if pack_fds:
+ nfds = pack_file_descriptors()
+ if nfds > 0:
+ os.environ["LISTEN_FDS"] = str(nfds)
+ os.environ["LISTEN_PID"] = str(os.getpid())
namespaces = CLONE_NEWNS
if unshare_net and have_effective_cap(CAP_NET_ADMIN):