test: write file from systemd service in transient unit

author Tommy Unger <tommyu@fb.com>

Sat, 19 Apr 2025 01:57:26 +0000 (18:57 -0700)

committer Daan De Meyer <daan.j.demeyer@gmail.com>

Thu, 15 May 2025 09:33:23 +0000 (11:33 +0200)
author Tommy Unger <tommyu@fb.com>
Sat, 19 Apr 2025 01:57:26 +0000 (18:57 -0700)
committer Daan De Meyer <daan.j.demeyer@gmail.com>
Thu, 15 May 2025 09:33:23 +0000 (11:33 +0200)
diff --git a/test/units/TEST-07-PID1.transient-unit-container.sh b/test/units/TEST-07-PID1.transient-unit-container.sh

new file mode 100755 (executable)

index 0000000..f18e9ff
--- /dev/null
+++ b/test/units/TEST-07-PID1.transient-unit-container.sh
@@ -0,0 +1,194 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# shellcheck disable=SC2016
+set -Eeuo pipefail
+set -x
+
+# -----------------------------------------------------------------------------
+#
+# Test: PID-1 Transient Unit Container
+#
+# Verifies that a minimal systemd PID 1 inside a tmpfs root can:
+#   • Boot
+#   • Bind mount the host's /usr directory read-only
+#   • Bind mount a shared writable directory with the host
+#   • Run a one-shot service in the container to create and
+#     write to a host file in that directory
+#   • Exit cleanly with systemd-run --wait propagating status
+#
+# -----------------------------------------------------------------------------
+
+# Helpers
+# shellcheck source=test/units/test-control.sh
+. "$(dirname "$0")"/test-control.sh
+# shellcheck source=test/units/util.sh
+. "$(dirname "$0")"/util.sh
+
+# Mounts and directories to teardown and cleanup
+CLEANUP_MOUNTS=()
+CLEANUP_PATHS=()
+
+# Common Config:
+TEST_NAME="TEST-07-PID1.transient-unit-container"
+OUTPUT_FILE="test-service-output"
+EXPECTED_OUTPUT="Test service is running"
+readonly TEST_NAME OUTPUT_FILE EXPECTED_OUTPUT
+
+# Host FS Directories
+# mktemp helps avoid name collision; using dry-run mode
+CONTAINER_ROOT_FS=$(mktemp -u -d --tmpdir "${TEST_NAME}-root-XXXX")
+HOST_OUT_DIR=$(mktemp -u -d --tmpdir test-dir-XXXX)
+readonly CONTAINER_ROOT_FS HOST_OUT_DIR
+
+# Container FS Directories
+CONTAINER_MOUNT_DIR="/${TEST_NAME}"
+readonly CONTAINER_MOUNT_DIR
+
+# Mount a dummy /proc FS which will not be passed to the container. It
+# circumvents a permissions error when attempting to mount a FS within the
+# container. This seems like a systemd bug.
+temporary_mount_hack() {
+    # IMPORTANT: This is modeled after a workaround in
+    # TEST-07-PID1.private-pids.sh with a key difference. In private-pids, it's
+    # explained there must be at least 1 unmasked procfs mount on the host in
+    # order for /proc/ to be mounted by an UNPRIVILEGED user within the container
+    # namespace. Note the host mount is not actually passed through to the
+    # container.
+    #
+    # The key difference here is that, here, systemd-run is NOT launched with
+    # --user, it is a PRIVILEGED environment and should not hit a permissions
+    # error when attempting to mount /proc. Unfortunately, that's exactly what
+    # happens if you launch the container without first mounting a dummy
+    # unmasked /proc on the host.
+    #
+    # It was pointed out to me that this may indicate a significant bug. A
+    # change masking the host's /proc could prevent the startup of privileged
+    # containers. If this were addressed, this function could be removed.
+
+    local -r helper_proc=$(mktemp -d --tmpdir helper-proc-XXXX)
+    CLEANUP_PATHS+=("$helper_proc")
+
+    mount -t proc proc "$helper_proc"
+    CLEANUP_MOUNTS+=("$helper_proc")
+}
+
+# Mount 1) a writable directory for output; 2) a dummy procfs as a workaround so
+# the container can mount /proc; 3) a tmpfs to serve as the container's root
+# FS; 4) the host's /usr directory read only.
+make_mounts() {
+    # Host bind mount for the output file. Systemd will make the container's version.
+    mkdir -p "$HOST_OUT_DIR"
+    CLEANUP_PATHS+=("$HOST_OUT_DIR")
+
+    temporary_mount_hack
+
+    # Container root tmpfs mount
+    mkdir -p "$CONTAINER_ROOT_FS"
+    CLEANUP_PATHS+=("$CONTAINER_ROOT_FS")
+
+    mount -t tmpfs tmpfs "$CONTAINER_ROOT_FS"
+    CLEANUP_MOUNTS+=("$CONTAINER_ROOT_FS")
+
+    # Container's /usr will be a read-only bind mount of the host's /usr. Tried
+    # using -p BindReadOnlyPaths=/usr instead of this, but that didn't work.
+    # Debugging that got hairy, so I'm going with this for now.
+    mkdir -p "${CONTAINER_ROOT_FS}/usr"
+
+    mount --bind /usr "${CONTAINER_ROOT_FS}/usr"
+    mount -o remount,bind,ro "${CONTAINER_ROOT_FS}/usr"
+
+    # Make sure /root/usr is unmounted before /root.
+    # Don't add to CLEANUP_PATHS because it will be removed when /root is.
+    CLEANUP_MOUNTS=( "${CONTAINER_ROOT_FS}/usr" "${CLEANUP_MOUNTS[@]}" )
+}
+
+# Create a test-service unit file that will run via the container's systemd and
+# write the output file.
+config_container_service() {
+    local -r container_systemd_dir="${CONTAINER_ROOT_FS}/etc/systemd/system"
+    local -r guest_output="${CONTAINER_MOUNT_DIR}/${OUTPUT_FILE}"
+    local -r internal_test_service="${container_systemd_dir}/test-service.service"
+
+    mkdir -p "$container_systemd_dir"
+
+    # Generate a phony random machine-id for the container
+    uuidgen -r | tr -d '-' | tr '[:upper:]' '[:lower:]' > "${CONTAINER_ROOT_FS}/etc/machine-id"
+
+    cat <<EOF >"$internal_test_service"
+[Unit]
+Description=Test Service for Internal Systemd
+After=basic.target
+
+[Service]
+Type=oneshot
+ExecStart=/bin/sh -c 'echo "$EXPECTED_OUTPUT"  > "$guest_output"'
+ExecStartPost=/usr/bin/systemctl --no-block exit 0
+TimeoutStopSec=15s
+
+[Install]
+WantedBy=multi-user.target
+EOF
+    systemctl --root="$CONTAINER_ROOT_FS" enable test-service.service
+}
+
+# The testcase. Configs cleanup trap, makes mounts, configs internal service
+# unit, kicks off container as a transient unit, waits for it to finish and
+# checks output.
+testcase_transient_unit_container_file_write() {
+
+    # Cleanup on exit. Test cases seem to run in a subshell, and only a single
+    # testcase is expected in this file. So we tie cleanup to the lifetime of
+    # this subshell, not the global context, allowing for appending to
+    # CLEANUP_PATHS and CLEANUP_MOUNTS
+    trap file_write_cleanup EXIT ERR INT TERM
+
+    make_mounts
+
+    config_container_service
+
+    # Run the container as a transient unit and wait for it to finish
+    local -r bind_mount_arg="${HOST_OUT_DIR}:${CONTAINER_MOUNT_DIR}"
+    local -r service_unit_name="${TEST_NAME}.service"
+
+    SYSTEMD_LOG_LEVEL=debug SYSTEMD_LOG_TARGET=console \
+    systemd-run \
+    --unit "$service_unit_name" \
+    --wait \
+    -p RootDirectory="$CONTAINER_ROOT_FS" \
+    -p PrivatePIDs=yes \
+    -p PrivateUsersEx=full \
+    -p ProtectHostnameEx=private \
+    -p ProtectControlGroupsEx=private \
+    -p PrivateMounts=yes \
+    -p PrivateNetwork=yes \
+    -p PrivateDevices=yes \
+    -p PrivateIPC=yes \
+    -p BindLogSockets=no \
+    -p "Environment=container=transient-unit" \
+    -p "CapabilityBoundingSet=~CAP_SYS_TIME CAP_SYS_BOOT CAP_AUDIT_READ" \
+    -p Type=exec \
+    -p Delegate=true \
+    -p DelegateSubgroup=init.scope \
+    -p DelegateNamespaces=yes \
+    -p BindPaths="$bind_mount_arg" \
+    /usr/lib/systemd/systemd multi-user.target
+
+    # If our service ran, we should be able to read its output here
+    local -r host_output="${HOST_OUT_DIR}/${OUTPUT_FILE}"
+    assert_eq "$(cat "${host_output}")" "$EXPECTED_OUTPUT"
+}
+
+CLEANUP_DONE=0
+file_write_cleanup() {
+    # Avoid re-running this function. E.g. At both SIGINT and EXIT.
+    (( CLEANUP_DONE )) && return
+    CLEANUP_DONE=1
+    set +e
+
+    # Remove all the mounts and directories we created
+    # These variables reset to empty arrays when the subprocess concludes.
+    umount "${CLEANUP_MOUNTS[@]}"
+    rm -rf "${CLEANUP_PATHS[@]}"
+}
+
+run_testcases
author	Tommy Unger <tommyu@fb.com>
	Sat, 19 Apr 2025 01:57:26 +0000 (18:57 -0700)
committer	Daan De Meyer <daan.j.demeyer@gmail.com>
	Thu, 15 May 2025 09:33:23 +0000 (11:33 +0200)