From e5f10cafe0bb1034505cba934cd6fae5f332b1dc Mon Sep 17 00:00:00 2001 From: Anita Zhang Date: Tue, 19 Nov 2019 14:24:52 -0800 Subject: [PATCH] core: create inaccessible nodes for users when making runtime dirs To support ProtectHome=y in a user namespace (which mounts the inaccessible nodes), the nodes need to be accessible by the user. Create these paths and devices in the user runtime directory so they can be used later if needed. --- src/core/mount-setup.c | 2 +- src/core/namespace.c | 18 +++++- src/login/user-runtime-dir.c | 3 + src/nspawn/nspawn-mount.c | 7 ++- src/nspawn/nspawn.c | 5 +- src/shared/dev-setup.c | 18 +++--- src/shared/mount-util.c | 58 ++++++++++++++++---- src/shared/mount-util.h | 2 +- src/test/test-dev-setup.c | 3 +- test/TEST-43-PRIVATEUSER-UNPRIV/test.sh | 1 + test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh | 9 +++ 11 files changed, 96 insertions(+), 30 deletions(-) diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 7ef5b127adc..5dfcb6158a4 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -536,7 +536,7 @@ int mount_setup(bool loaded_policy) { /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes * from. */ - (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID); + (void) make_inaccessible_nodes("/run/systemd", UID_INVALID, GID_INVALID); return 0; } diff --git a/src/core/namespace.c b/src/core/namespace.c index 104e96193d7..fee4c980964 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -12,6 +12,7 @@ #include "base-filesystem.h" #include "dev-setup.h" #include "fd-util.h" +#include "format-util.h" #include "fs-util.h" #include "label.h" #include "loop-util.h" @@ -905,6 +906,7 @@ static int apply_mount( const char *root_directory, MountEntry *m) { + _cleanup_free_ char *inaccessible = NULL; bool rbind = true, make = false; const char *what; int r; @@ -916,6 +918,8 @@ static int apply_mount( switch (m->mode) { case INACCESSIBLE: { + _cleanup_free_ char *tmp = NULL; + const char *runtime_dir; struct stat target; /* First, get rid of everything that is below if there @@ -930,10 +934,20 @@ static int apply_mount( return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m)); } - what = mode_to_inaccessible_node(target.st_mode); - if (!what) + if (geteuid() == 0) + runtime_dir = "/run/systemd"; + else { + if (asprintf(&tmp, "/run/user/"UID_FMT, geteuid()) < 0) + log_oom(); + + runtime_dir = tmp; + } + + r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible); + if (r < 0) return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "File type not supported for inaccessible mounts. Note that symlinks are not allowed"); + what = inaccessible; break; } diff --git a/src/login/user-runtime-dir.c b/src/login/user-runtime-dir.c index c8a56256057..1f98898b695 100644 --- a/src/login/user-runtime-dir.c +++ b/src/login/user-runtime-dir.c @@ -6,6 +6,7 @@ #include "sd-bus.h" #include "bus-error.h" +#include "dev-setup.h" #include "fs-util.h" #include "format-util.h" #include "label.h" @@ -91,6 +92,8 @@ static int user_mkdir_runtime_path( log_warning_errno(r, "Failed to fix label of \"%s\", ignoring: %m", runtime_path); } + /* Set up inaccessible nodes now so they're available if we decide to use them with user namespaces. */ + (void) make_inaccessible_nodes(runtime_path, uid, gid); return 0; fail: diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 0fb83a4ff3d..f423f625905 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -883,8 +883,7 @@ static int mount_overlay(const char *dest, CustomMount *m) { } static int mount_inaccessible(const char *dest, CustomMount *m) { - _cleanup_free_ char *where = NULL; - const char *source; + _cleanup_free_ char *where = NULL, *source = NULL; struct stat st; int r; @@ -897,7 +896,9 @@ static int mount_inaccessible(const char *dest, CustomMount *m) { return m->graceful ? 0 : r; } - assert_se(source = mode_to_inaccessible_node(st.st_mode)); + r = mode_to_inaccessible_node("/run/systemd", st.st_mode, &source); + if (r < 0) + return m->graceful ? 0 : r; r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL); if (r < 0) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 27ea5921582..9113f6e323c 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3252,6 +3252,7 @@ static int outer_child( int netns_fd) { _cleanup_close_ int fd = -1; + const char *p; pid_t pid; ssize_t l; int r; @@ -3447,7 +3448,9 @@ static int outer_child( return r; (void) dev_setup(directory, arg_uid_shift, arg_uid_shift); - (void) make_inaccessible_nodes(directory, arg_uid_shift, arg_uid_shift); + + p = prefix_roota(directory, "/run/systemd"); + (void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift); r = setup_pts(directory); if (r < 0) diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c index 071ff7b30cf..4bce8b167b5 100644 --- a/src/shared/dev-setup.c +++ b/src/shared/dev-setup.c @@ -61,20 +61,20 @@ int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid) { const char *name; mode_t mode; } table[] = { - { "/run/systemd", S_IFDIR | 0755 }, - { "/run/systemd/inaccessible", S_IFDIR | 0000 }, - { "/run/systemd/inaccessible/reg", S_IFREG | 0000 }, - { "/run/systemd/inaccessible/dir", S_IFDIR | 0000 }, - { "/run/systemd/inaccessible/fifo", S_IFIFO | 0000 }, - { "/run/systemd/inaccessible/sock", S_IFSOCK | 0000 }, + { "", S_IFDIR | 0755 }, + { "/inaccessible", S_IFDIR | 0000 }, + { "/inaccessible/reg", S_IFREG | 0000 }, + { "/inaccessible/dir", S_IFDIR | 0000 }, + { "/inaccessible/fifo", S_IFIFO | 0000 }, + { "/inaccessible/sock", S_IFSOCK | 0000 }, /* The following two are likely to fail if we lack the privs for it (for example in an userns * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 * device nodes to be created). But that's entirely fine. Consumers of these files should carry - * fallback to use a different node then, for example /run/systemd/inaccessible/sock, which is close + * fallback to use a different node then, for example /inaccessible/sock, which is close * enough in behaviour and semantics for most uses. */ - { "/run/systemd/inaccessible/chr", S_IFCHR | 0000 }, - { "/run/systemd/inaccessible/blk", S_IFBLK | 0000 }, + { "/inaccessible/chr", S_IFCHR | 0000 }, + { "/inaccessible/blk", S_IFBLK | 0000 }, }; _cleanup_umask_ mode_t u; diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index d316e81aa24..95d7ea96917 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -339,38 +339,72 @@ int repeat_unmount(const char *path, int flags) { } } -const char* mode_to_inaccessible_node(mode_t mode) { +int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest) { /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead, * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */ + _cleanup_free_ char *d = NULL; + const char *node = NULL; + char *tmp; + + assert(dest); switch(mode & S_IFMT) { case S_IFREG: - return "/run/systemd/inaccessible/reg"; + node = "/inaccessible/reg"; + break; case S_IFDIR: - return "/run/systemd/inaccessible/dir"; + node = "/inaccessible/dir"; + break; case S_IFCHR: - if (access("/run/systemd/inaccessible/chr", F_OK) == 0) - return "/run/systemd/inaccessible/chr"; - return "/run/systemd/inaccessible/sock"; + d = path_join(runtime_dir, "/inaccessible/chr"); + if (!d) + return log_oom(); + + if (access(d, F_OK) == 0) { + *dest = TAKE_PTR(d); + return 0; + } + + node = "/inaccessible/sock"; + break; case S_IFBLK: - if (access("/run/systemd/inaccessible/blk", F_OK) == 0) - return "/run/systemd/inaccessible/blk"; - return "/run/systemd/inaccessible/sock"; + d = path_join(runtime_dir, "/inaccessible/blk"); + if (!d) + return log_oom(); + + if (access(d, F_OK) == 0) { + *dest = TAKE_PTR(d); + return 0; + } + + node = "/inaccessible/sock"; + break; case S_IFIFO: - return "/run/systemd/inaccessible/fifo"; + node = "/inaccessible/fifo"; + break; case S_IFSOCK: - return "/run/systemd/inaccessible/sock"; + node = "/inaccessible/sock"; + break; } - return NULL; + + if (!node) + return -EINVAL; + + tmp = path_join(runtime_dir, node); + if (!tmp) + return log_oom(); + + *dest = tmp; + return 0; } #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "") diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 8649fca39b6..9a8d073631d 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -31,4 +31,4 @@ int mount_option_mangle( unsigned long *ret_mount_flags, char **ret_remaining_options); -const char* mode_to_inaccessible_node(mode_t mode); +int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest); diff --git a/src/test/test-dev-setup.c b/src/test/test-dev-setup.c index 9414ea6c3e7..d991fe52004 100644 --- a/src/test/test-dev-setup.c +++ b/src/test/test-dev-setup.c @@ -20,7 +20,8 @@ int main(int argc, char *argv[]) { f = prefix_roota(p, "/run"); assert_se(mkdir(f, 0755) >= 0); - assert_se(make_inaccessible_nodes(p, 1, 1) >= 0); + f = prefix_roota(p, "/run/systemd"); + assert_se(make_inaccessible_nodes(f, 1, 1) >= 0); f = prefix_roota(p, "/run/systemd/inaccessible/reg"); assert_se(stat(f, &st) >= 0); diff --git a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh index 23904a3c3f8..49d61c6a7f9 100755 --- a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh +++ b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh @@ -11,6 +11,7 @@ test_setup() { eval $(udevadm info --export --query=env --name=${LOOPDEV}p2) setup_basic_environment + inst_binary stat mask_supporting_services diff --git a/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh b/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh index 3cabd78574d..cd61dc95c95 100755 --- a/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh +++ b/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh @@ -46,6 +46,15 @@ runas nobody systemd-run --user --unit=test-protect-home-tmpfs \ -p PrivateUsers=yes -p ProtectHome=tmpfs \ -P test ! -e /home/nobody +# Confirm that home, /root, and /run/user are inaccessible under "yes" +runas nobody systemd-run --user --unit=test-protect-home-yes \ + -p PrivateUsers=yes -p ProtectHome=yes \ + -P bash -c ' + test "$(stat -c %a /home)" = "0" + test "$(stat -c %a /root)" = "0" + test "$(stat -c %a /run/user)" = "0" + ' + # Confirm we cannot change groups because we only have one mapping in the user # namespace (no CAP_SETGID in the parent namespace to write the additional # mapping of the user supplied group and thus cannot change groups to an -- 2.39.2