/* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes
* from. */
- (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID);
+ (void) make_inaccessible_nodes("/run/systemd", UID_INVALID, GID_INVALID);
return 0;
}
#include "base-filesystem.h"
#include "dev-setup.h"
#include "fd-util.h"
+#include "format-util.h"
#include "fs-util.h"
#include "label.h"
#include "loop-util.h"
const char *root_directory,
MountEntry *m) {
+ _cleanup_free_ char *inaccessible = NULL;
bool rbind = true, make = false;
const char *what;
int r;
switch (m->mode) {
case INACCESSIBLE: {
+ _cleanup_free_ char *tmp = NULL;
+ const char *runtime_dir;
struct stat target;
/* First, get rid of everything that is below if there
return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m));
}
- what = mode_to_inaccessible_node(target.st_mode);
- if (!what)
+ if (geteuid() == 0)
+ runtime_dir = "/run/systemd";
+ else {
+ if (asprintf(&tmp, "/run/user/"UID_FMT, geteuid()) < 0)
+ log_oom();
+
+ runtime_dir = tmp;
+ }
+
+ r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible);
+ if (r < 0)
return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
"File type not supported for inaccessible mounts. Note that symlinks are not allowed");
+ what = inaccessible;
break;
}
#include "sd-bus.h"
#include "bus-error.h"
+#include "dev-setup.h"
#include "fs-util.h"
#include "format-util.h"
#include "label.h"
log_warning_errno(r, "Failed to fix label of \"%s\", ignoring: %m", runtime_path);
}
+ /* Set up inaccessible nodes now so they're available if we decide to use them with user namespaces. */
+ (void) make_inaccessible_nodes(runtime_path, uid, gid);
return 0;
fail:
}
static int mount_inaccessible(const char *dest, CustomMount *m) {
- _cleanup_free_ char *where = NULL;
- const char *source;
+ _cleanup_free_ char *where = NULL, *source = NULL;
struct stat st;
int r;
return m->graceful ? 0 : r;
}
- assert_se(source = mode_to_inaccessible_node(st.st_mode));
+ r = mode_to_inaccessible_node("/run/systemd", st.st_mode, &source);
+ if (r < 0)
+ return m->graceful ? 0 : r;
r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
if (r < 0)
int netns_fd) {
_cleanup_close_ int fd = -1;
+ const char *p;
pid_t pid;
ssize_t l;
int r;
return r;
(void) dev_setup(directory, arg_uid_shift, arg_uid_shift);
- (void) make_inaccessible_nodes(directory, arg_uid_shift, arg_uid_shift);
+
+ p = prefix_roota(directory, "/run/systemd");
+ (void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift);
r = setup_pts(directory);
if (r < 0)
const char *name;
mode_t mode;
} table[] = {
- { "/run/systemd", S_IFDIR | 0755 },
- { "/run/systemd/inaccessible", S_IFDIR | 0000 },
- { "/run/systemd/inaccessible/reg", S_IFREG | 0000 },
- { "/run/systemd/inaccessible/dir", S_IFDIR | 0000 },
- { "/run/systemd/inaccessible/fifo", S_IFIFO | 0000 },
- { "/run/systemd/inaccessible/sock", S_IFSOCK | 0000 },
+ { "", S_IFDIR | 0755 },
+ { "/inaccessible", S_IFDIR | 0000 },
+ { "/inaccessible/reg", S_IFREG | 0000 },
+ { "/inaccessible/dir", S_IFDIR | 0000 },
+ { "/inaccessible/fifo", S_IFIFO | 0000 },
+ { "/inaccessible/sock", S_IFSOCK | 0000 },
/* The following two are likely to fail if we lack the privs for it (for example in an userns
* environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0
* device nodes to be created). But that's entirely fine. Consumers of these files should carry
- * fallback to use a different node then, for example /run/systemd/inaccessible/sock, which is close
+ * fallback to use a different node then, for example <root>/inaccessible/sock, which is close
* enough in behaviour and semantics for most uses. */
- { "/run/systemd/inaccessible/chr", S_IFCHR | 0000 },
- { "/run/systemd/inaccessible/blk", S_IFBLK | 0000 },
+ { "/inaccessible/chr", S_IFCHR | 0000 },
+ { "/inaccessible/blk", S_IFBLK | 0000 },
};
_cleanup_umask_ mode_t u;
}
}
-const char* mode_to_inaccessible_node(mode_t mode) {
+int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest) {
/* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
* early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
* because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
* device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
* which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
* from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
+ _cleanup_free_ char *d = NULL;
+ const char *node = NULL;
+ char *tmp;
+
+ assert(dest);
switch(mode & S_IFMT) {
case S_IFREG:
- return "/run/systemd/inaccessible/reg";
+ node = "/inaccessible/reg";
+ break;
case S_IFDIR:
- return "/run/systemd/inaccessible/dir";
+ node = "/inaccessible/dir";
+ break;
case S_IFCHR:
- if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
- return "/run/systemd/inaccessible/chr";
- return "/run/systemd/inaccessible/sock";
+ d = path_join(runtime_dir, "/inaccessible/chr");
+ if (!d)
+ return log_oom();
+
+ if (access(d, F_OK) == 0) {
+ *dest = TAKE_PTR(d);
+ return 0;
+ }
+
+ node = "/inaccessible/sock";
+ break;
case S_IFBLK:
- if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
- return "/run/systemd/inaccessible/blk";
- return "/run/systemd/inaccessible/sock";
+ d = path_join(runtime_dir, "/inaccessible/blk");
+ if (!d)
+ return log_oom();
+
+ if (access(d, F_OK) == 0) {
+ *dest = TAKE_PTR(d);
+ return 0;
+ }
+
+ node = "/inaccessible/sock";
+ break;
case S_IFIFO:
- return "/run/systemd/inaccessible/fifo";
+ node = "/inaccessible/fifo";
+ break;
case S_IFSOCK:
- return "/run/systemd/inaccessible/sock";
+ node = "/inaccessible/sock";
+ break;
}
- return NULL;
+
+ if (!node)
+ return -EINVAL;
+
+ tmp = path_join(runtime_dir, node);
+ if (!tmp)
+ return log_oom();
+
+ *dest = tmp;
+ return 0;
}
#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
unsigned long *ret_mount_flags,
char **ret_remaining_options);
-const char* mode_to_inaccessible_node(mode_t mode);
+int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest);
f = prefix_roota(p, "/run");
assert_se(mkdir(f, 0755) >= 0);
- assert_se(make_inaccessible_nodes(p, 1, 1) >= 0);
+ f = prefix_roota(p, "/run/systemd");
+ assert_se(make_inaccessible_nodes(f, 1, 1) >= 0);
f = prefix_roota(p, "/run/systemd/inaccessible/reg");
assert_se(stat(f, &st) >= 0);
eval $(udevadm info --export --query=env --name=${LOOPDEV}p2)
setup_basic_environment
+ inst_binary stat
mask_supporting_services
-p PrivateUsers=yes -p ProtectHome=tmpfs \
-P test ! -e /home/nobody
+# Confirm that home, /root, and /run/user are inaccessible under "yes"
+runas nobody systemd-run --user --unit=test-protect-home-yes \
+ -p PrivateUsers=yes -p ProtectHome=yes \
+ -P bash -c '
+ test "$(stat -c %a /home)" = "0"
+ test "$(stat -c %a /root)" = "0"
+ test "$(stat -c %a /run/user)" = "0"
+ '
+
# Confirm we cannot change groups because we only have one mapping in the user
# namespace (no CAP_SETGID in the parent namespace to write the additional
# mapping of the user supplied group and thus cannot change groups to an