]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add new setting TemporaryFileSystem=
authorYu Watanabe <watanabe.yu+github@gmail.com>
Wed, 21 Feb 2018 00:17:52 +0000 (09:17 +0900)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Wed, 21 Feb 2018 00:17:52 +0000 (09:17 +0900)
This introduces a new setting TemporaryFileSystem=. This is useful
to hide files not relevant to the processes invoked by unit, while
necessary files or directories can be still accessed by combining
with Bind{,ReadOnly}Paths=.

src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/core/load-fragment.h
src/core/namespace.c
src/core/namespace.h
src/test/test-ns.c

index 18ad92c3d9d2be597283b4547e18d24cfe8a68cd..4b041edc159133ce82900c92f7936797610af930 100644 (file)
@@ -1793,6 +1793,9 @@ static bool exec_needs_mount_namespace(
         if (context->n_bind_mounts > 0)
                 return true;
 
+        if (context->n_temporary_filesystems > 0)
+                return true;
+
         if (context->mount_flags != 0)
                 return true;
 
@@ -2371,6 +2374,8 @@ static int apply_mount_namespace(
                             empty_directories,
                             bind_mounts,
                             n_bind_mounts,
+                            context->temporary_filesystems,
+                            context->n_temporary_filesystems,
                             tmp,
                             var,
                             needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
@@ -3623,6 +3628,9 @@ void exec_context_done(ExecContext *c) {
         bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
         c->bind_mounts = NULL;
         c->n_bind_mounts = 0;
+        temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+        c->temporary_filesystems = NULL;
+        c->n_temporary_filesystems = 0;
 
         c->cpuset = cpu_set_mfree(c->cpuset);
 
@@ -4181,6 +4189,16 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                                 c->bind_mounts[i].destination,
                                 c->bind_mounts[i].recursive ? "rbind" : "norbind");
 
+        if (c->n_temporary_filesystems > 0)
+                for (i = 0; i < c->n_temporary_filesystems; i++) {
+                        TemporaryFileSystem *t = c->temporary_filesystems + i;
+
+                        fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
+                                t->path,
+                                isempty(t->options) ? "" : ":",
+                                strempty(t->options));
+                }
+
         if (c->utmp_id)
                 fprintf(f,
                         "%sUtmpIdentifier: %s\n",
index 4023b3016472dfb4b0a7f4f7b6adc089d5d6057b..a34cf0b87333db3cbbc51ce25241d98a18932aa1 100644 (file)
@@ -219,6 +219,8 @@ struct ExecContext {
         unsigned long mount_flags;
         BindMount *bind_mounts;
         unsigned n_bind_mounts;
+        TemporaryFileSystem *temporary_filesystems;
+        unsigned n_temporary_filesystems;
 
         uint64_t capability_bounding_set;
         uint64_t capability_ambient_set;
index dde5010e02683222061591da3d5da85d4e3c49d1..5d90a7c054961c4864c9644a23d21f846b3a777b 100644 (file)
@@ -104,6 +104,7 @@ $1.ReadOnlyPaths,                config_parse_namespace_path_strv,   0,
 $1.InaccessiblePaths,            config_parse_namespace_path_strv,   0,                             offsetof($1, exec_context.inaccessible_paths)
 $1.BindPaths,                    config_parse_bind_paths,            0,                             offsetof($1, exec_context)
 $1.BindReadOnlyPaths,            config_parse_bind_paths,            0,                             offsetof($1, exec_context)
+$1.TemporaryFileSystem,          config_parse_temporary_filesystems, 0,                             offsetof($1, exec_context)
 $1.PrivateTmp,                   config_parse_bool,                  0,                             offsetof($1, exec_context.private_tmp)
 $1.PrivateDevices,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_devices)
 $1.ProtectKernelTunables,        config_parse_bool,                  0,                             offsetof($1, exec_context.protect_kernel_tunables)
index c4f91fb262b0cb3a914296935cf790817daa503b..1b9888c10a479e0b0aebf2dc8a879e85e456e4fd 100644 (file)
@@ -4174,6 +4174,83 @@ int config_parse_namespace_path_strv(
         return 0;
 }
 
+int config_parse_temporary_filesystems(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        Unit *u = userdata;
+        ExecContext *c = data;
+        const char *cur;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        if (isempty(rvalue)) {
+                /* Empty assignment resets the list */
+                temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+                c->temporary_filesystems = NULL;
+                c->n_temporary_filesystems = 0;
+                return 0;
+        }
+
+        cur = rvalue;
+        for (;;) {
+                _cleanup_free_ char *word = NULL, *path = NULL, *resolved = NULL;
+                const char *w;
+
+                r = extract_first_word(&cur, &word, NULL, EXTRACT_QUOTES);
+                if (r == 0)
+                        break;
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract first word, ignoring: %s", rvalue);
+                        return 0;
+                }
+
+                w = word;
+                r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        return -EINVAL;
+
+                r = unit_full_printf(u, path, &resolved);
+                if (r < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve specifiers in %s, ignoring: %m", word);
+                        continue;
+                }
+
+                if (!path_is_absolute(resolved)) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute path, ignoring: %s", resolved);
+                        continue;
+                }
+
+                path_kill_slashes(resolved);
+
+                r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, path, w);
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse mount options, ignoring: %s", word);
+                        continue;
+                }
+        }
+
+        return 0;
+}
+
 int config_parse_bind_paths(
                 const char *unit,
                 const char *filename,
index cb17bdd3c353e77211d4ea0c3c4fc768cc4ee41a..163b5ce4855068c124079f25bf25e8f809c19668 100644 (file)
@@ -106,6 +106,7 @@ int config_parse_runtime_preserve_mode(const char *unit, const char *filename, u
 int config_parse_exec_directories(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_set_status(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_temporary_filesystems(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_protect_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
index a42220ebe0b2fcdfcc921cbf23ad2d44f9ae5f8f..2d82a0778cf28cd7340f9fe7b582cbb66559e0d2 100644 (file)
@@ -65,6 +65,7 @@ typedef enum MountMode {
         PROCFS,
         READONLY,
         READWRITE,
+        TMPFS,
 } MountMode;
 
 typedef struct MountEntry {
@@ -76,6 +77,9 @@ typedef struct MountEntry {
         char *path_malloc;        /* Use this instead of 'path_const' if we had to allocate memory */
         const char *source_const; /* The source path, for bind mounts */
         char *source_malloc;
+        const char *options_const;/* Mount options for tmpfs */
+        char *options_malloc;
+        unsigned long flags;      /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */
 } MountEntry;
 
 /* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted
@@ -185,11 +189,18 @@ static const char *mount_entry_source(const MountEntry *p) {
         return p->source_malloc ?: p->source_const;
 }
 
+static const char *mount_entry_options(const MountEntry *p) {
+        assert(p);
+
+        return p->options_malloc ?: p->options_const;
+}
+
 static void mount_entry_done(MountEntry *p) {
         assert(p);
 
         p->path_malloc = mfree(p->path_malloc);
         p->source_malloc = mfree(p->source_malloc);
+        p->options_malloc = mfree(p->options_malloc);
 }
 
 static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, bool forcibly_require_prefix) {
@@ -243,6 +254,8 @@ static int append_empty_dir_mounts(MountEntry **p, char **strv) {
                         .ignore = false,
                         .has_prefix = false,
                         .read_only = true,
+                        .options_const = "mode=755",
+                        .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
                 };
         }
 
@@ -269,6 +282,49 @@ static int append_bind_mounts(MountEntry **p, const BindMount *binds, unsigned n
         return 0;
 }
 
+static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, unsigned n) {
+        unsigned i;
+        int r;
+
+        assert(p);
+
+        for (i = 0; i < n; i++) {
+                const TemporaryFileSystem *t = tmpfs + i;
+                _cleanup_free_ char *o = NULL, *str = NULL;
+                unsigned long flags = MS_NODEV|MS_STRICTATIME;
+                bool ro = false;
+
+                if (!path_is_absolute(t->path))
+                        return -EINVAL;
+
+                if (!isempty(t->options)) {
+                        str = strjoin("mode=0755,", t->options);
+                        if (!str)
+                                return -ENOMEM;
+
+                        r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o);
+                        if (r < 0)
+                                return r;
+
+                        ro = !!(flags & MS_RDONLY);
+                        if (ro)
+                                flags ^= MS_RDONLY;
+                }
+
+                *((*p)++) = (MountEntry) {
+                        .path_const = t->path,
+                        .mode = TMPFS,
+                        .read_only = ro,
+                        .options_malloc = o,
+                        .flags = flags,
+                };
+
+                o = NULL;
+        }
+
+        return 0;
+}
+
 static int append_static_mounts(MountEntry **p, const MountEntry *mounts, unsigned n, bool ignore_protect) {
         unsigned i;
 
@@ -711,15 +767,15 @@ static int mount_procfs(const MountEntry *m) {
         return 1;
 }
 
-static int mount_empty_dir(const MountEntry *m) {
+static int mount_tmpfs(const MountEntry *m) {
         assert(m);
 
-        /* First, get rid of everything that is below if there is anything. Then, overmount with our new empty dir */
+        /* First, get rid of everything that is below if there is anything. Then, overmount with our new tmpfs */
 
         (void) mkdir_p_label(mount_entry_path(m), 0755);
         (void) umount_recursive(mount_entry_path(m), 0);
 
-        if (mount("tmpfs", mount_entry_path(m), "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, "mode=755") < 0)
+        if (mount("tmpfs", mount_entry_path(m), "tmpfs", m->flags, mount_entry_options(m)) < 0)
                 return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m));
 
         return 1;
@@ -821,7 +877,8 @@ static int apply_mount(
                 break;
 
         case EMPTY_DIR:
-                return mount_empty_dir(m);
+        case TMPFS:
+                return mount_tmpfs(m);
 
         case PRIVATE_TMP:
                 what = mount_entry_source(m);
@@ -887,9 +944,15 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self
         assert(m);
         assert(proc_self_mountinfo);
 
-        if (mount_entry_read_only(m))
-                r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
-        else if (m->mode == PRIVATE_DEV) { /* Superblock can be readonly but the submounts can't */
+        if (mount_entry_read_only(m)) {
+                if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) {
+                        /* Make superblock readonly */
+                        if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0)
+                                r = -errno;
+                } else
+                        r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
+        } else if (m->mode == PRIVATE_DEV) {
+                /* Superblock can be readonly but the submounts can't */
                 if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
                         r = -errno;
         } else
@@ -929,6 +992,7 @@ static unsigned namespace_calculate_mounts(
                 char** inaccessible_paths,
                 char** empty_directories,
                 unsigned n_bind_mounts,
+                unsigned n_temporary_filesystems,
                 const char* tmp_dir,
                 const char* var_tmp_dir,
                 ProtectHome protect_home,
@@ -955,6 +1019,7 @@ static unsigned namespace_calculate_mounts(
                 strv_length(inaccessible_paths) +
                 strv_length(empty_directories) +
                 n_bind_mounts +
+                n_temporary_filesystems +
                 ns_info->private_dev +
                 (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
                 (ns_info->protect_control_groups ? 1 : 0) +
@@ -973,6 +1038,8 @@ int setup_namespace(
                 char** empty_directories,
                 const BindMount *bind_mounts,
                 unsigned n_bind_mounts,
+                const TemporaryFileSystem *temporary_filesystems,
+                unsigned n_temporary_filesystems,
                 const char* tmp_dir,
                 const char* var_tmp_dir,
                 ProtectHome protect_home,
@@ -1024,7 +1091,7 @@ int setup_namespace(
 
         if (root_directory)
                 root = root_directory;
-        else if (root_image || n_bind_mounts > 0) {
+        else if (root_image || n_bind_mounts > 0 || n_temporary_filesystems > 0) {
 
                 /* If we are booting from an image, create a mount point for the image, if it's still missing. We use
                  * the same mount point for all images, which is safe, since they all live in their own namespaces
@@ -1046,6 +1113,7 @@ int setup_namespace(
                         inaccessible_paths,
                         empty_directories,
                         n_bind_mounts,
+                        n_temporary_filesystems,
                         tmp_dir, var_tmp_dir,
                         protect_home, protect_system);
 
@@ -1075,6 +1143,10 @@ int setup_namespace(
                 if (r < 0)
                         goto finish;
 
+                r = append_tmpfs_mounts(&m, temporary_filesystems, n_temporary_filesystems);
+                if (r < 0)
+                        goto finish;
+
                 if (tmp_dir) {
                         *(m++) = (MountEntry) {
                                 .path_const = "/tmp",
@@ -1305,6 +1377,57 @@ int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item) {
         return 0;
 }
 
+void temporary_filesystem_free_many(TemporaryFileSystem *t, unsigned n) {
+        unsigned i;
+
+        assert(t || n == 0);
+
+        for (i = 0; i < n; i++) {
+                free(t[i].path);
+                free(t[i].options);
+        }
+
+        free(t);
+}
+
+int temporary_filesystem_add(
+                TemporaryFileSystem **t,
+                unsigned *n,
+                const char *path,
+                const char *options) {
+
+        _cleanup_free_ char *p = NULL, *o = NULL;
+        TemporaryFileSystem *c;
+
+        assert(t);
+        assert(n);
+        assert(path);
+
+        p = strdup(path);
+        if (!p)
+                return -ENOMEM;
+
+        if (!isempty(options)) {
+                o = strdup(options);
+                if (!o)
+                        return -ENOMEM;
+        }
+
+        c = realloc_multiply(*t, sizeof(TemporaryFileSystem), *n + 1);
+        if (!c)
+                return -ENOMEM;
+
+        *t = c;
+
+        c[(*n) ++] = (TemporaryFileSystem) {
+                .path = p,
+                .options = o,
+        };
+
+        p = o = NULL;
+        return 0;
+}
+
 static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
         _cleanup_free_ char *x = NULL;
         char bid[SD_ID128_STRING_MAX];
index 42d841c4d291abe22d028782ae0e9ba8af905ba1..df7be7d1f2a30ff43314c05ed2de8d2e40032acd 100644 (file)
@@ -23,6 +23,7 @@
 
 typedef struct NamespaceInfo NamespaceInfo;
 typedef struct BindMount BindMount;
+typedef struct TemporaryFileSystem TemporaryFileSystem;
 
 #include <stdbool.h>
 
@@ -75,6 +76,11 @@ struct BindMount {
         bool ignore_enoent:1;
 };
 
+struct TemporaryFileSystem {
+        char *path;
+        char *options;
+};
+
 int setup_namespace(
                 const char *root_directory,
                 const char *root_image,
@@ -85,6 +91,8 @@ int setup_namespace(
                 char **empty_directories,
                 const BindMount *bind_mounts,
                 unsigned n_bind_mounts,
+                const TemporaryFileSystem *temporary_filesystems,
+                unsigned n_temporary_filesystems,
                 const char *tmp_dir,
                 const char *var_tmp_dir,
                 ProtectHome protect_home,
@@ -110,6 +118,10 @@ ProtectSystem parse_protect_system_or_bool(const char *s);
 void bind_mount_free_many(BindMount *b, unsigned n);
 int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item);
 
+void temporary_filesystem_free_many(TemporaryFileSystem *t, unsigned n);
+int temporary_filesystem_add(TemporaryFileSystem **t, unsigned *n,
+                             const char *path, const char *options);
+
 const char* namespace_type_to_string(NamespaceType t) _const_;
 NamespaceType namespace_type_from_string(const char *s) _pure_;
 
index 87b4facb8572e4e99d60db7aadfba1b72abf3670..3ab3c1ab95be71a6e7f069f2277af1b20fbb48eb 100644 (file)
@@ -86,6 +86,7 @@ int main(int argc, char *argv[]) {
                             (char **) inaccessible,
                             NULL,
                             &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1,
+                            &(TemporaryFileSystem) { .path = (char*) "/var", .options = (char*) "ro" }, 1,
                             tmp_dir,
                             var_tmp_dir,
                             PROTECT_HOME_NO,