From: Yu Watanabe Date: Wed, 21 Feb 2018 00:17:52 +0000 (+0900) Subject: core: add new setting TemporaryFileSystem= X-Git-Tag: v238~79^2~5 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=2abd4e388a73bfca28eb7d0b63d2aa4f981d9ee2;p=thirdparty%2Fsystemd.git core: add new setting TemporaryFileSystem= This introduces a new setting TemporaryFileSystem=. This is useful to hide files not relevant to the processes invoked by unit, while necessary files or directories can be still accessed by combining with Bind{,ReadOnly}Paths=. --- diff --git a/src/core/execute.c b/src/core/execute.c index 18ad92c3d9d..4b041edc159 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1793,6 +1793,9 @@ static bool exec_needs_mount_namespace( if (context->n_bind_mounts > 0) return true; + if (context->n_temporary_filesystems > 0) + return true; + if (context->mount_flags != 0) return true; @@ -2371,6 +2374,8 @@ static int apply_mount_namespace( empty_directories, bind_mounts, n_bind_mounts, + context->temporary_filesystems, + context->n_temporary_filesystems, tmp, var, needs_sandboxing ? context->protect_home : PROTECT_HOME_NO, @@ -3623,6 +3628,9 @@ void exec_context_done(ExecContext *c) { bind_mount_free_many(c->bind_mounts, c->n_bind_mounts); c->bind_mounts = NULL; c->n_bind_mounts = 0; + temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems); + c->temporary_filesystems = NULL; + c->n_temporary_filesystems = 0; c->cpuset = cpu_set_mfree(c->cpuset); @@ -4181,6 +4189,16 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { c->bind_mounts[i].destination, c->bind_mounts[i].recursive ? "rbind" : "norbind"); + if (c->n_temporary_filesystems > 0) + for (i = 0; i < c->n_temporary_filesystems; i++) { + TemporaryFileSystem *t = c->temporary_filesystems + i; + + fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix, + t->path, + isempty(t->options) ? "" : ":", + strempty(t->options)); + } + if (c->utmp_id) fprintf(f, "%sUtmpIdentifier: %s\n", diff --git a/src/core/execute.h b/src/core/execute.h index 4023b301647..a34cf0b8733 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -219,6 +219,8 @@ struct ExecContext { unsigned long mount_flags; BindMount *bind_mounts; unsigned n_bind_mounts; + TemporaryFileSystem *temporary_filesystems; + unsigned n_temporary_filesystems; uint64_t capability_bounding_set; uint64_t capability_ambient_set; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index dde5010e026..5d90a7c0549 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -104,6 +104,7 @@ $1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, $1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) $1.BindPaths, config_parse_bind_paths, 0, offsetof($1, exec_context) $1.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof($1, exec_context) +$1.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof($1, exec_context) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) $1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index c4f91fb262b..1b9888c10a4 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -4174,6 +4174,83 @@ int config_parse_namespace_path_strv( return 0; } +int config_parse_temporary_filesystems( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Unit *u = userdata; + ExecContext *c = data; + const char *cur; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + /* Empty assignment resets the list */ + temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems); + c->temporary_filesystems = NULL; + c->n_temporary_filesystems = 0; + return 0; + } + + cur = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL, *path = NULL, *resolved = NULL; + const char *w; + + r = extract_first_word(&cur, &word, NULL, EXTRACT_QUOTES); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract first word, ignoring: %s", rvalue); + return 0; + } + + w = word; + r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = unit_full_printf(u, path, &resolved); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve specifiers in %s, ignoring: %m", word); + continue; + } + + if (!path_is_absolute(resolved)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute path, ignoring: %s", resolved); + continue; + } + + path_kill_slashes(resolved); + + r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, path, w); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse mount options, ignoring: %s", word); + continue; + } + } + + return 0; +} + int config_parse_bind_paths( const char *unit, const char *filename, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index cb17bdd3c35..163b5ce4855 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -106,6 +106,7 @@ int config_parse_runtime_preserve_mode(const char *unit, const char *filename, u int config_parse_exec_directories(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_set_status(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_temporary_filesystems(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_protect_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/core/namespace.c b/src/core/namespace.c index a42220ebe0b..2d82a0778cf 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -65,6 +65,7 @@ typedef enum MountMode { PROCFS, READONLY, READWRITE, + TMPFS, } MountMode; typedef struct MountEntry { @@ -76,6 +77,9 @@ typedef struct MountEntry { char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */ const char *source_const; /* The source path, for bind mounts */ char *source_malloc; + const char *options_const;/* Mount options for tmpfs */ + char *options_malloc; + unsigned long flags; /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */ } MountEntry; /* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted @@ -185,11 +189,18 @@ static const char *mount_entry_source(const MountEntry *p) { return p->source_malloc ?: p->source_const; } +static const char *mount_entry_options(const MountEntry *p) { + assert(p); + + return p->options_malloc ?: p->options_const; +} + static void mount_entry_done(MountEntry *p) { assert(p); p->path_malloc = mfree(p->path_malloc); p->source_malloc = mfree(p->source_malloc); + p->options_malloc = mfree(p->options_malloc); } static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, bool forcibly_require_prefix) { @@ -243,6 +254,8 @@ static int append_empty_dir_mounts(MountEntry **p, char **strv) { .ignore = false, .has_prefix = false, .read_only = true, + .options_const = "mode=755", + .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, }; } @@ -269,6 +282,49 @@ static int append_bind_mounts(MountEntry **p, const BindMount *binds, unsigned n return 0; } +static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, unsigned n) { + unsigned i; + int r; + + assert(p); + + for (i = 0; i < n; i++) { + const TemporaryFileSystem *t = tmpfs + i; + _cleanup_free_ char *o = NULL, *str = NULL; + unsigned long flags = MS_NODEV|MS_STRICTATIME; + bool ro = false; + + if (!path_is_absolute(t->path)) + return -EINVAL; + + if (!isempty(t->options)) { + str = strjoin("mode=0755,", t->options); + if (!str) + return -ENOMEM; + + r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o); + if (r < 0) + return r; + + ro = !!(flags & MS_RDONLY); + if (ro) + flags ^= MS_RDONLY; + } + + *((*p)++) = (MountEntry) { + .path_const = t->path, + .mode = TMPFS, + .read_only = ro, + .options_malloc = o, + .flags = flags, + }; + + o = NULL; + } + + return 0; +} + static int append_static_mounts(MountEntry **p, const MountEntry *mounts, unsigned n, bool ignore_protect) { unsigned i; @@ -711,15 +767,15 @@ static int mount_procfs(const MountEntry *m) { return 1; } -static int mount_empty_dir(const MountEntry *m) { +static int mount_tmpfs(const MountEntry *m) { assert(m); - /* First, get rid of everything that is below if there is anything. Then, overmount with our new empty dir */ + /* First, get rid of everything that is below if there is anything. Then, overmount with our new tmpfs */ (void) mkdir_p_label(mount_entry_path(m), 0755); (void) umount_recursive(mount_entry_path(m), 0); - if (mount("tmpfs", mount_entry_path(m), "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, "mode=755") < 0) + if (mount("tmpfs", mount_entry_path(m), "tmpfs", m->flags, mount_entry_options(m)) < 0) return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m)); return 1; @@ -821,7 +877,8 @@ static int apply_mount( break; case EMPTY_DIR: - return mount_empty_dir(m); + case TMPFS: + return mount_tmpfs(m); case PRIVATE_TMP: what = mount_entry_source(m); @@ -887,9 +944,15 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self assert(m); assert(proc_self_mountinfo); - if (mount_entry_read_only(m)) - r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo); - else if (m->mode == PRIVATE_DEV) { /* Superblock can be readonly but the submounts can't */ + if (mount_entry_read_only(m)) { + if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) { + /* Make superblock readonly */ + if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0) + r = -errno; + } else + r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo); + } else if (m->mode == PRIVATE_DEV) { + /* Superblock can be readonly but the submounts can't */ if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) r = -errno; } else @@ -929,6 +992,7 @@ static unsigned namespace_calculate_mounts( char** inaccessible_paths, char** empty_directories, unsigned n_bind_mounts, + unsigned n_temporary_filesystems, const char* tmp_dir, const char* var_tmp_dir, ProtectHome protect_home, @@ -955,6 +1019,7 @@ static unsigned namespace_calculate_mounts( strv_length(inaccessible_paths) + strv_length(empty_directories) + n_bind_mounts + + n_temporary_filesystems + ns_info->private_dev + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + (ns_info->protect_control_groups ? 1 : 0) + @@ -973,6 +1038,8 @@ int setup_namespace( char** empty_directories, const BindMount *bind_mounts, unsigned n_bind_mounts, + const TemporaryFileSystem *temporary_filesystems, + unsigned n_temporary_filesystems, const char* tmp_dir, const char* var_tmp_dir, ProtectHome protect_home, @@ -1024,7 +1091,7 @@ int setup_namespace( if (root_directory) root = root_directory; - else if (root_image || n_bind_mounts > 0) { + else if (root_image || n_bind_mounts > 0 || n_temporary_filesystems > 0) { /* If we are booting from an image, create a mount point for the image, if it's still missing. We use * the same mount point for all images, which is safe, since they all live in their own namespaces @@ -1046,6 +1113,7 @@ int setup_namespace( inaccessible_paths, empty_directories, n_bind_mounts, + n_temporary_filesystems, tmp_dir, var_tmp_dir, protect_home, protect_system); @@ -1075,6 +1143,10 @@ int setup_namespace( if (r < 0) goto finish; + r = append_tmpfs_mounts(&m, temporary_filesystems, n_temporary_filesystems); + if (r < 0) + goto finish; + if (tmp_dir) { *(m++) = (MountEntry) { .path_const = "/tmp", @@ -1305,6 +1377,57 @@ int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item) { return 0; } +void temporary_filesystem_free_many(TemporaryFileSystem *t, unsigned n) { + unsigned i; + + assert(t || n == 0); + + for (i = 0; i < n; i++) { + free(t[i].path); + free(t[i].options); + } + + free(t); +} + +int temporary_filesystem_add( + TemporaryFileSystem **t, + unsigned *n, + const char *path, + const char *options) { + + _cleanup_free_ char *p = NULL, *o = NULL; + TemporaryFileSystem *c; + + assert(t); + assert(n); + assert(path); + + p = strdup(path); + if (!p) + return -ENOMEM; + + if (!isempty(options)) { + o = strdup(options); + if (!o) + return -ENOMEM; + } + + c = realloc_multiply(*t, sizeof(TemporaryFileSystem), *n + 1); + if (!c) + return -ENOMEM; + + *t = c; + + c[(*n) ++] = (TemporaryFileSystem) { + .path = p, + .options = o, + }; + + p = o = NULL; + return 0; +} + static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) { _cleanup_free_ char *x = NULL; char bid[SD_ID128_STRING_MAX]; diff --git a/src/core/namespace.h b/src/core/namespace.h index 42d841c4d29..df7be7d1f2a 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -23,6 +23,7 @@ typedef struct NamespaceInfo NamespaceInfo; typedef struct BindMount BindMount; +typedef struct TemporaryFileSystem TemporaryFileSystem; #include @@ -75,6 +76,11 @@ struct BindMount { bool ignore_enoent:1; }; +struct TemporaryFileSystem { + char *path; + char *options; +}; + int setup_namespace( const char *root_directory, const char *root_image, @@ -85,6 +91,8 @@ int setup_namespace( char **empty_directories, const BindMount *bind_mounts, unsigned n_bind_mounts, + const TemporaryFileSystem *temporary_filesystems, + unsigned n_temporary_filesystems, const char *tmp_dir, const char *var_tmp_dir, ProtectHome protect_home, @@ -110,6 +118,10 @@ ProtectSystem parse_protect_system_or_bool(const char *s); void bind_mount_free_many(BindMount *b, unsigned n); int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item); +void temporary_filesystem_free_many(TemporaryFileSystem *t, unsigned n); +int temporary_filesystem_add(TemporaryFileSystem **t, unsigned *n, + const char *path, const char *options); + const char* namespace_type_to_string(NamespaceType t) _const_; NamespaceType namespace_type_from_string(const char *s) _pure_; diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 87b4facb857..3ab3c1ab95b 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -86,6 +86,7 @@ int main(int argc, char *argv[]) { (char **) inaccessible, NULL, &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1, + &(TemporaryFileSystem) { .path = (char*) "/var", .options = (char*) "ro" }, 1, tmp_dir, var_tmp_dir, PROTECT_HOME_NO,