return false;
}
+int mount_fd(const char *source,
+ int target_fd,
+ const char *filesystemtype,
+ unsigned long mountflags,
+ const void *data) {
+
+ if (mount(source, FORMAT_PROC_FD_PATH(target_fd), filesystemtype, mountflags, data) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
+ * mounted. Check for the latter to generate better error messages. */
+ if (proc_mounted() == 0)
+ return -ENOSYS;
+
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int mount_nofollow(
+ const char *source,
+ const char *target,
+ const char *filesystemtype,
+ unsigned long mountflags,
+ const void *data) {
+
+ _cleanup_close_ int fd = -1;
+
+ /* In almost all cases we want to manipulate the mount table without following symlinks, hence
+ * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
+ * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
+ * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
+ * fs to mount) we can only use traditional mount() directly.
+ *
+ * Note that this disables following only for the final component of the target, i.e symlinks within
+ * the path of the target are honoured, as are symlinks in the source path everywhere. */
+
+ fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return mount_fd(source, fd, filesystemtype, mountflags, data);
+}
+
const char *mount_propagation_flags_to_string(unsigned long flags) {
switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
#include <stdbool.h>
#include <sys/types.h>
+/* The limit used for /dev itself. 4MB should be enough since device nodes and symlinks don't
+ * consume any space and udev isn't supposed to create regular file either. There's no limit on the
+ * max number of inodes since such limit is hard to guess especially on large storage array
+ * systems. */
+#define TMPFS_LIMITS_DEV ",size=4m"
+
+/* The limit used for /dev in private namespaces. 4MB for contents of regular files. The number of
+ * inodes should be relatively low in private namespaces but for now use a 64k limit. */
+#define TMPFS_LIMITS_PRIVATE_DEV ",size=4m,nr_inodes=64k"
+
+/* Very little, if any use expected */
+#define TMPFS_LIMITS_EMPTY_OR_ALMOST ",size=4m,nr_inodes=1k"
+#define TMPFS_LIMITS_SYS TMPFS_LIMITS_EMPTY_OR_ALMOST
+#define TMPFS_LIMITS_SYS_FS_CGROUP TMPFS_LIMITS_EMPTY_OR_ALMOST
+
+/* On an extremely small device with only 256MB of RAM, 20% of RAM should be enough for the re-execution of
+ * PID1 because 16MB of free space is required. */
+#define TMPFS_LIMITS_RUN ",size=20%,nr_inodes=800k"
+
+/* The limit used for various nested tmpfs mounts, in particular for guests started by systemd-nspawn.
+ * 10% of RAM (using 16GB of RAM as a baseline) translates to 400k inodes (assuming 4k each) and 25%
+ * translates to 1M inodes.
+ * (On the host, /tmp is configured through a .mount unit file.) */
+#define NESTED_TMPFS_LIMITS ",size=10%,nr_inodes=400k"
+
+/* More space for volatile root and /var */
+#define TMPFS_LIMITS_VAR ",size=25%,nr_inodes=1m"
+#define TMPFS_LIMITS_ROOTFS TMPFS_LIMITS_VAR
+#define TMPFS_LIMITS_VOLATILE_STATE TMPFS_LIMITS_VAR
+
int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
int path_get_mnt_id(const char *path, int *ret);
int dev_is_devtmpfs(void);
+int mount_fd(const char *source, int target_fd, const char *filesystemtype, unsigned long mountflags, const void *data);
+int mount_nofollow(const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data);
+
const char *mount_propagation_flags_to_string(unsigned long flags);
int mount_propagation_flags_from_string(const char *name, unsigned long *ret);
#include "memory-util.h"
#include "missing_sched.h"
#include "missing_syscall.h"
+#include "mountpoint-util.h"
#include "namespace-util.h"
#include "nulstr-util.h"
#include "parse-util.h"
}
if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
-
/* Optionally, make sure we never propagate mounts to the host. */
-
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
_exit(EXIT_FAILURE);
}
}
+ if (FLAGS_SET(flags, FORK_PRIVATE_TMP)) {
+ assert(FLAGS_SET(flags, FORK_NEW_MOUNTNS));
+
+ /* Optionally, overmount new tmpfs instance on /tmp/. */
+ r = mount_nofollow("tmpfs", "/tmp", "tmpfs",
+ MS_NOSUID|MS_NODEV,
+ "mode=01777" TMPFS_LIMITS_RUN);
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to overmount /tmp/: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
if (flags & FORK_CLOSE_ALL_FDS) {
/* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
log_close();
FORK_WAIT = 1 << 7, /* Wait until child exited */
FORK_NEW_MOUNTNS = 1 << 8, /* Run child in its own mount namespace */
FORK_MOUNTNS_SLAVE = 1 << 9, /* Make child's mount namespace MS_SLAVE */
- FORK_RLIMIT_NOFILE_SAFE = 1 << 10, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
- FORK_STDOUT_TO_STDERR = 1 << 11, /* Make stdout a copy of stderr */
- FORK_FLUSH_STDIO = 1 << 12, /* fflush() stdout (and stderr) before forking */
- FORK_NEW_USERNS = 1 << 13, /* Run child in its own user namespace */
- FORK_CLOEXEC_OFF = 1 << 14, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */
+ FORK_PRIVATE_TMP = 1 << 10, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */
+ FORK_RLIMIT_NOFILE_SAFE = 1 << 11, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
+ FORK_STDOUT_TO_STDERR = 1 << 12, /* Make stdout a copy of stderr */
+ FORK_FLUSH_STDIO = 1 << 13, /* fflush() stdout (and stderr) before forking */
+ FORK_NEW_USERNS = 1 << 14, /* Run child in its own user namespace */
+ FORK_CLOEXEC_OFF = 1 << 15, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */
} ForkFlags;
int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid);
#include "tmpfile-util.h"
#include "user-util.h"
-int mount_fd(const char *source,
- int target_fd,
- const char *filesystemtype,
- unsigned long mountflags,
- const void *data) {
-
- if (mount(source, FORMAT_PROC_FD_PATH(target_fd), filesystemtype, mountflags, data) < 0) {
- if (errno != ENOENT)
- return -errno;
-
- /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
- * mounted. Check for the latter to generate better error messages. */
- if (proc_mounted() == 0)
- return -ENOSYS;
-
- return -ENOENT;
- }
-
- return 0;
-}
-
-int mount_nofollow(
- const char *source,
- const char *target,
- const char *filesystemtype,
- unsigned long mountflags,
- const void *data) {
-
- _cleanup_close_ int fd = -1;
-
- /* In almost all cases we want to manipulate the mount table without following symlinks, hence
- * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
- * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
- * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
- * fs to mount) we can only use traditional mount() directly.
- *
- * Note that this disables following only for the final component of the target, i.e symlinks within
- * the path of the target are honoured, as are symlinks in the source path everywhere. */
-
- fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
- if (fd < 0)
- return -errno;
-
- return mount_fd(source, fd, filesystemtype, mountflags, data);
-}
-
int umount_recursive(const char *prefix, int flags) {
int n = 0, r;
bool again;
MountAttrPropagationType mount_attr_propagation_type_from_string(const char *s) _pure_;
unsigned int mount_attr_propagation_type_to_flag(MountAttrPropagationType t);
-/* The limit used for /dev itself. 4MB should be enough since device nodes and symlinks don't
- * consume any space and udev isn't supposed to create regular file either. There's no limit on the
- * max number of inodes since such limit is hard to guess especially on large storage array
- * systems. */
-#define TMPFS_LIMITS_DEV ",size=4m"
-
-/* The limit used for /dev in private namespaces. 4MB for contents of regular files. The number of
- * inodes should be relatively low in private namespaces but for now use a 64k limit. */
-#define TMPFS_LIMITS_PRIVATE_DEV ",size=4m,nr_inodes=64k"
-
-/* Very little, if any use expected */
-#define TMPFS_LIMITS_EMPTY_OR_ALMOST ",size=4m,nr_inodes=1k"
-#define TMPFS_LIMITS_SYS TMPFS_LIMITS_EMPTY_OR_ALMOST
-#define TMPFS_LIMITS_SYS_FS_CGROUP TMPFS_LIMITS_EMPTY_OR_ALMOST
-
-/* On an extremely small device with only 256MB of RAM, 20% of RAM should be enough for the re-execution of
- * PID1 because 16MB of free space is required. */
-#define TMPFS_LIMITS_RUN ",size=20%,nr_inodes=800k"
-
-/* The limit used for various nested tmpfs mounts, in particular for guests started by systemd-nspawn.
- * 10% of RAM (using 16GB of RAM as a baseline) translates to 400k inodes (assuming 4k each) and 25%
- * translates to 1M inodes.
- * (On the host, /tmp is configured through a .mount unit file.) */
-#define NESTED_TMPFS_LIMITS ",size=10%,nr_inodes=400k"
-
-/* More space for volatile root and /var */
-#define TMPFS_LIMITS_VAR ",size=25%,nr_inodes=1m"
-#define TMPFS_LIMITS_ROOTFS TMPFS_LIMITS_VAR
-#define TMPFS_LIMITS_VOLATILE_STATE TMPFS_LIMITS_VAR
-
-int mount_fd(const char *source, int target_fd, const char *filesystemtype, unsigned long mountflags, const void *data);
-int mount_nofollow(const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data);
-
int repeat_unmount(const char *path, int flags);
int umount_recursive(const char *target, int flags);
#include "missing_mount.h"
#include "mkdir.h"
#include "mount-util.h"
+#include "mountpoint-util.h"
#include "namespace-util.h"
#include "path-util.h"
#include "process-util.h"