Container Interface: expose the host's os-release metadata to nspawn and portable guests
`container_ttys=pts/7 pts/8 pts/14` it will spawn three additional login
gettys on ptys 7, 8, and 14.
+4. To allow applications to detect the OS version and other metadata of the host
+ running the container manager, if this is considered desirable, please parse
+ the host's `/etc/os-release` and set a `$container_host_<key>=<VALUE>`
+ environment variable for the ID fields described by the [os-release
+ interface](https://www.freedesktop.org/software/systemd/man/os-release.html), eg:
+ `$container_host_id=debian`
+ `$container_host_build_id=2020-06-15`
+ `$container_host_variant_id=server`
+ `$container_host_version_id=10`
+
## Advanced Integration
1. Consider syncing `/etc/localtime` from the host file system into the
name in order to avoid name clashes. Applications
reading this file must ignore unknown fields. Example:
<literal>DEBIAN_BTS="debbugs://bugs.debian.org/"</literal></para>
+
+ <para>Container and sandbox runtime managers may make the host's
+ identification data available to applications by providing the host's
+ <filename>/etc/os-release</filename> and
+ <filename>/usr/lib/os-release</filename> as respectively
+ <filename>/run/host/etc/os-release</filename> and
+ <filename>/run/host/usr/lib/os-release</filename>.</para>
</refsect1>
<refsect1>
#include "mkdir.h"
#include "selinux-util.h"
#include "smack-util.h"
+#include "user-util.h"
int mkdir_label(const char *path, mode_t mode) {
int r;
}
int mkdir_parents_label(const char *path, mode_t mode) {
- return mkdir_parents_internal(NULL, path, mode, mkdir_label);
+ return mkdir_parents_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_label);
}
int mkdir_p_label(const char *path, mode_t mode) {
- return mkdir_p_internal(NULL, path, mode, mkdir_label);
+ return mkdir_p_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_label);
}
return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_errno_wrapper);
}
-int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
const char *p, *e;
int r;
if (prefix && path_startswith(prefix, t))
continue;
- r = _mkdir(t, mode);
- if (r < 0 && r != -EEXIST)
- return r;
+ if (uid == UID_INVALID && gid == UID_INVALID && flags == 0) {
+ r = _mkdir(t, mode);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ } else {
+ r = mkdir_safe_internal(t, mode, uid, gid, flags, _mkdir);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
}
}
int mkdir_parents(const char *path, mode_t mode) {
- return mkdir_parents_internal(NULL, path, mode, mkdir_errno_wrapper);
+ return mkdir_parents_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_errno_wrapper);
+}
+
+int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_parents_internal(prefix, path, mode, uid, gid, flags, mkdir_errno_wrapper);
}
-int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
int r;
/* Like mkdir -p */
assert(_mkdir != mkdir);
- r = mkdir_parents_internal(prefix, path, mode, _mkdir);
+ r = mkdir_parents_internal(prefix, path, mode, uid, gid, flags, _mkdir);
if (r < 0)
return r;
- r = _mkdir(path, mode);
- if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0))
- return r;
+ if (uid == UID_INVALID && gid == UID_INVALID && flags == 0) {
+ r = _mkdir(path, mode);
+ if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0))
+ return r;
+ } else {
+ r = mkdir_safe_internal(path, mode, uid, gid, flags, _mkdir);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
return 0;
}
int mkdir_p(const char *path, mode_t mode) {
- return mkdir_p_internal(NULL, path, mode, mkdir_errno_wrapper);
+ return mkdir_p_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_errno_wrapper);
+}
+
+int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_p_internal(prefix, path, mode, uid, gid, flags, mkdir_errno_wrapper);
}
int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode);
int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
int mkdir_parents(const char *path, mode_t mode);
+int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
int mkdir_p(const char *path, mode_t mode);
+int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
/* mandatory access control(MAC) versions */
int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
-int mkdir_parents_label(const char *path, mode_t mode);
+int mkdir_parents_label(const char *path, mode_t mod);
int mkdir_p_label(const char *path, mode_t mode);
/* internally used */
typedef int (*mkdir_func_t)(const char *pathname, mode_t mode);
int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
-int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
-int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
}
-static int mkdir_userns(const char *path, mode_t mode, uid_t uid_shift) {
- int r;
-
- assert(path);
-
- r = mkdir_errno_wrapper(path, mode);
- if (r < 0 && r != -EEXIST)
- return r;
-
- if (uid_shift == UID_INVALID)
- return 0;
-
- if (lchown(path, uid_shift, uid_shift) < 0)
- return -errno;
-
- return 0;
-}
-
-static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, uid_t uid_shift) {
- const char *p, *e;
- int r;
-
- assert(path);
-
- if (prefix && !path_startswith(path, prefix))
- return -ENOTDIR;
-
- /* create every parent directory in the path, except the last component */
- p = path + strspn(path, "/");
- for (;;) {
- char t[strlen(path) + 1];
-
- e = p + strcspn(p, "/");
- p = e + strspn(e, "/");
-
- /* Is this the last component? If so, then we're done */
- if (*p == 0)
- break;
-
- memcpy(t, path, e - path);
- t[e-path] = 0;
-
- if (prefix && path_startswith(prefix, t))
- continue;
-
- r = mkdir_userns(t, mode, uid_shift);
- if (r < 0)
- return r;
- }
-
- return mkdir_userns(path, mode, uid_shift);
-}
-
int mount_all(const char *dest,
MountSettingsMask mount_settings,
uid_t uid_shift,
PROC_READ_ONLY("/proc/irq"),
PROC_READ_ONLY("/proc/scsi"),
- { "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ { "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_IN_USERNS|MOUNT_MKDIR },
/* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */
- { "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ { "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR },
- { "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ { "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR },
- { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR }, /* skipped if above was mounted */
- { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */
- { "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
+ { "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
- { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
- { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
+ { "/usr/lib/os-release", "/run/host/usr/lib/os-release", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_MKDIR|MOUNT_TOUCH },
+ { "/etc/os-release", "/run/host/etc/os-release", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_MKDIR|MOUNT_TOUCH },
#if HAVE_SELINUX
- { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
+ { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
MOUNT_MKDIR }, /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
- { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
+ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
0 }, /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
#endif
};
for (k = 0; k < ELEMENTSOF(mount_table); k++) {
_cleanup_free_ char *where = NULL, *options = NULL;
const char *o;
+ struct stat source_st;
bool fatal = FLAGS_SET(mount_table[k].mount_settings, MOUNT_FATAL);
if (in_userns != FLAGS_SET(mount_table[k].mount_settings, MOUNT_IN_USERNS))
return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
if (r > 0)
continue;
+
+ /* Shortcut for optional bind mounts: if the source can't be found skip ahead to avoid creating
+ * empty and unused directories. */
+ if (!fatal && FLAGS_SET(mount_table[k].mount_settings, MOUNT_MKDIR) && FLAGS_SET(mount_table[k].flags, MS_BIND)) {
+ r = stat(mount_table[k].what, &source_st);
+ if (r < 0) {
+ if (errno == ENOENT)
+ continue;
+ return log_error_errno(errno, "Failed to stat %s: %m", mount_table[k].what);
+ }
+ }
}
if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_MKDIR)) {
- r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID);
+ uid_t u = (use_userns && !in_userns) ? uid_shift : UID_INVALID;
+
+ if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH))
+ r = mkdir_parents_safe(dest, where, 0755, u, u, 0);
+ else
+ r = mkdir_p_safe(dest, where, 0755, u, u, 0);
if (r < 0 && r != -EEXIST) {
if (fatal && r != -EROFS)
return log_error_errno(r, "Failed to create directory %s: %m", where);
if (r != -EROFS)
continue;
}
+ if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH)) {
+ r = touch(where);
+ if (r < 0 && r != -EEXIST) {
+ if (fatal)
+ return log_error_errno(r, "Failed to create mount point %s: %m", where);
+ log_debug_errno(r, "Failed to create mount point %s: %m", where);
+ }
+ }
}
o = mount_table[k].options;
MOUNT_ROOT_ONLY = 1 << 6, /* if set, only root mounts are mounted */
MOUNT_NON_ROOT_ONLY = 1 << 7, /* if set, only non-root mounts are mounted */
MOUNT_MKDIR = 1 << 8, /* if set, make directory to mount over first */
+ MOUNT_TOUCH = 1 << 9, /* if set, touch file to mount over first */
} MountSettingsMask;
typedef enum CustomMountType {
int kmsg_socket,
int rtnl_socket,
int master_pty_socket,
- FDSet *fds) {
+ FDSet *fds,
+ char **os_release_pairs) {
_cleanup_free_ char *home = NULL;
char as_uuid[ID128_UUID_STRING_MAX];
if (asprintf((char **)(envp + n_env++), "NOTIFY_SOCKET=%s", NSPAWN_NOTIFY_SOCKET_PATH) < 0)
return log_oom();
- env_use = strv_env_merge(2, envp, arg_setenv);
+ env_use = strv_env_merge(3, envp, arg_setenv, os_release_pairs);
if (!env_use)
return log_oom();
FDSet *fds,
int netns_fd) {
+ _cleanup_strv_free_ char **os_release_pairs = NULL;
_cleanup_close_ int fd = -1;
const char *p;
pid_t pid;
log_debug("Outer child is initializing.");
+ r = load_os_release_pairs_with_prefix("/", "container_host_", &os_release_pairs);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read os-release from host for container, ignoring: %m");
+
if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
return log_error_errno(r, "Failed to join network namespace: %m");
}
- r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds);
+ r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds, os_release_pairs);
if (r < 0)
_exit(EXIT_FAILURE);
"[Service]\n",
IN_SET(type, IMAGE_DIRECTORY, IMAGE_SUBVOLUME) ? "RootDirectory=" : "RootImage=", image_path, "\n"
"Environment=PORTABLE=", basename(image_path), "\n"
+ "BindReadOnlyPaths=-/etc/os-release:/run/host/etc/os-release /usr/lib/os-release:/run/host/usr/lib/os-release\n"
"LogExtraFields=PORTABLE=", basename(image_path), "\n",
NULL))
return load_env_file_pairs(f, p, ret);
}
+
+int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret) {
+ _cleanup_strv_free_ char **os_release_pairs = NULL, **os_release_pairs_prefixed = NULL;
+ char **p, **q;
+ int r;
+
+ r = load_os_release_pairs(root, &os_release_pairs);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(p, q, os_release_pairs) {
+ char *line;
+
+ // We strictly return only the four main ID fields and ignore the rest
+ if (!STR_IN_SET(*p, "ID", "VERSION_ID", "BUILD_ID", "VARIANT_ID"))
+ continue;
+
+ ascii_strlower(*p);
+ line = strjoin(prefix, *p, "=", *q);
+ if (!line)
+ return -ENOMEM;
+ r = strv_consume(&os_release_pairs_prefixed, line);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(os_release_pairs_prefixed);
+
+ return 0;
+}
int parse_os_release(const char *root, ...) _sentinel_;
int load_os_release_pairs(const char *root, char ***ret);
+int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret);
mask_supporting_services
../create-busybox-container $initdir/testsuite-13.nc-container
- initdir="$initdir/testsuite-13.nc-container" dracut_install nc ip
+ initdir="$initdir/testsuite-13.nc-container" dracut_install nc ip md5sum
)
}
systemd-nspawn --register=no -D /testsuite-13.nc-container -U /bin/sh -x -c "$_cmd"
}
+function check_os_release {
+ local _cmd='. /tmp/os-release
+if [ -n "${ID:+set}" ] && [ "${ID}" != "${container_host_id}" ]; then exit 1; fi
+if [ -n "${VERSION_ID:+set}" ] && [ "${VERSION_ID}" != "${container_host_version_id}" ]; then exit 1; fi
+if [ -n "${BUILD_ID:+set}" ] && [ "${BUILD_ID}" != "${container_host_build_id}" ]; then exit 1; fi
+if [ -n "${VARIANT_ID:+set}" ] && [ "${VARIANT_ID}" != "${container_host_variant_id}" ]; then exit 1; fi
+cd /tmp; (cd /run/host/usr/lib; md5sum os-release) | md5sum -c
+'
+
+ systemd-nspawn --register=no -D /testsuite-13.nc-container --bind=/etc/os-release:/tmp/os-release /bin/sh -x -e -c "$_cmd"
+}
+
function run {
if [[ "$1" = "yes" && "$is_v2_supported" = "no" ]]; then
printf "Unified cgroup hierarchy is not supported. Skipping.\n" >&2
check_notification_socket
+check_os_release
+
for api_vfs_writable in yes no network; do
run no no $api_vfs_writable
run yes no $api_vfs_writable