]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core:sandbox: lets make /lib/modules/ inaccessible on ProtectKernelModules=
authorDjalal Harouni <tixxdz@opendz.org>
Wed, 12 Oct 2016 12:11:16 +0000 (14:11 +0200)
committerDjalal Harouni <tixxdz@opendz.org>
Wed, 12 Oct 2016 12:11:16 +0000 (14:11 +0200)
Lets go further and make /lib/modules/ inaccessible for services that do
not have business with modules, this is a minor improvment but it may
help on setups with custom modules and they are limited... in regard of
kernel auto-load feature.

This change introduce NameSpaceInfo struct which we may embed later
inside ExecContext but for now lets just reduce the argument number to
setup_namespace() and merge ProtectKernelModules feature.

man/systemd.exec.xml
src/core/execute.c
src/core/namespace.c
src/core/namespace.h
src/test/test-ns.c

index 4a6869534831311b0caffc23569563fef8616e46..249fcb0363e86d9f24f15bcbc6686a47954f0229 100644 (file)
         kernels. It is recomended to turn this on for most services that do not need special
         file systems or extra kernel modules to work. Default to off. Enabling this option
         removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
-        the unit, and installs a system call filter to block module system calls.
+        the unit, and installs a system call filter to block module system calls,
+        also <filename>/usr/lib/modules</filename> is made inaccessible. For this
+        setting the same restrictions regarding mount propagation and privileges
+        apply as for <varname>ReadOnlyPaths=</varname> and related calls, see above.
         Note that limited automatic module loading due to user configuration or kernel
         mapping tables might still happen as side effect of requested user operations,
         both privileged and unprivileged. To disable module auto-load feature please see
index 7a278b7d3193792359f0b7622aa109897f4e7012..dc078d96f0de7fae731d046fae9770a3a2d9b82b 100644 (file)
@@ -1766,6 +1766,7 @@ static bool exec_needs_mount_namespace(
             context->protect_system != PROTECT_SYSTEM_NO ||
             context->protect_home != PROTECT_HOME_NO ||
             context->protect_kernel_tunables ||
+            context->protect_kernel_modules ||
             context->protect_control_groups)
                 return true;
 
@@ -2493,6 +2494,12 @@ static int exec_child(
         if (needs_mount_namespace) {
                 _cleanup_free_ char **rw = NULL;
                 char *tmp = NULL, *var = NULL;
+                NameSpaceInfo ns_info = {
+                        .private_dev = context->private_devices,
+                        .protect_control_groups = context->protect_control_groups,
+                        .protect_kernel_tunables = context->protect_kernel_tunables,
+                        .protect_kernel_modules = context->protect_kernel_modules,
+                };
 
                 /* The runtime struct only contains the parent
                  * of the private /tmp, which is
@@ -2515,14 +2522,12 @@ static int exec_child(
 
                 r = setup_namespace(
                                 (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL,
+                                &ns_info,
                                 rw,
                                 context->read_only_paths,
                                 context->inaccessible_paths,
                                 tmp,
                                 var,
-                                context->private_devices,
-                                context->protect_kernel_tunables,
-                                context->protect_control_groups,
                                 context->protect_home,
                                 context->protect_system,
                                 context->mount_flags);
index 43a2f4ba6e2ce8ea5cabc859405618ce351ec20b..1195e9a8544291dcb4e71877a420fb3bfe716152 100644 (file)
@@ -97,6 +97,14 @@ static const TargetMount protect_kernel_tunables_table[] = {
         { "/sys/fs/cgroup",             READWRITE,      false }, /* READONLY is set by ProtectControlGroups= option */
 };
 
+/* ProtectKernelModules= option */
+static const TargetMount protect_kernel_modules_table[] = {
+#ifdef HAVE_SPLIT_USR
+        { "/lib/modules",               INACCESSIBLE,   true  },
+#endif
+        { "/usr/lib/modules",           INACCESSIBLE,   true  },
+};
+
 /*
  * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
  * system should be protected by ProtectSystem=
@@ -207,6 +215,13 @@ static int append_protect_kernel_tunables(BindMount **p, const char *root_direct
                                     ELEMENTSOF(protect_kernel_tunables_table));
 }
 
+static int append_protect_kernel_modules(BindMount **p, const char *root_directory) {
+        assert(p);
+
+        return append_target_mounts(p, root_directory, protect_kernel_modules_table,
+                                    ELEMENTSOF(protect_kernel_modules_table));
+}
+
 static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) {
         int r = 0;
 
@@ -660,14 +675,12 @@ static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned
 }
 
 static unsigned namespace_calculate_mounts(
+                const NameSpaceInfo *ns_info,
                 char** read_write_paths,
                 char** read_only_paths,
                 char** inaccessible_paths,
                 const char* tmp_dir,
                 const char* var_tmp_dir,
-                bool private_dev,
-                bool protect_sysctl,
-                bool protect_cgroups,
                 ProtectHome protect_home,
                 ProtectSystem protect_system) {
 
@@ -690,22 +703,21 @@ static unsigned namespace_calculate_mounts(
                 strv_length(read_write_paths) +
                 strv_length(read_only_paths) +
                 strv_length(inaccessible_paths) +
-                private_dev +
-                (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
-                (protect_cgroups ? 1 : 0) +
+                ns_info->private_dev +
+                (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+                (ns_info->protect_control_groups ? 1 : 0) +
+                (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
                 protect_home_cnt + protect_system_cnt;
 }
 
 int setup_namespace(
                 const char* root_directory,
+                const NameSpaceInfo *ns_info,
                 char** read_write_paths,
                 char** read_only_paths,
                 char** inaccessible_paths,
                 const char* tmp_dir,
                 const char* var_tmp_dir,
-                bool private_dev,
-                bool protect_sysctl,
-                bool protect_cgroups,
                 ProtectHome protect_home,
                 ProtectSystem protect_system,
                 unsigned long mount_flags) {
@@ -718,13 +730,12 @@ int setup_namespace(
         if (mount_flags == 0)
                 mount_flags = MS_SHARED;
 
-        n = namespace_calculate_mounts(read_write_paths,
+        n = namespace_calculate_mounts(ns_info,
+                                       read_write_paths,
                                        read_only_paths,
                                        inaccessible_paths,
                                        tmp_dir, var_tmp_dir,
-                                       private_dev, protect_sysctl,
-                                       protect_cgroups, protect_home,
-                                       protect_system);
+                                       protect_home, protect_system);
 
         /* Set mount slave mode */
         if (root_directory || n > 0)
@@ -756,16 +767,25 @@ int setup_namespace(
                         m++;
                 }
 
-                if (private_dev) {
+                if (ns_info->private_dev) {
                         m->path = prefix_roota(root_directory, "/dev");
                         m->mode = PRIVATE_DEV;
                         m++;
                 }
 
-                if (protect_sysctl)
-                        append_protect_kernel_tunables(&m, root_directory);
+                if (ns_info->protect_kernel_tunables) {
+                        r = append_protect_kernel_tunables(&m, root_directory);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (ns_info->protect_kernel_modules) {
+                        r = append_protect_kernel_modules(&m, root_directory);
+                        if (r < 0)
+                                return r;
+                }
 
-                if (protect_cgroups) {
+                if (ns_info->protect_control_groups) {
                         m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
                         m->mode = READONLY;
                         m++;
index 6505bcc499eaee44901c3e26fa92f9449324f714..6310638e9ad8a49618a8783f3a7db8bcd76f738a 100644 (file)
@@ -4,6 +4,7 @@
   This file is part of systemd.
 
   Copyright 2010 Lennart Poettering
+  Copyright 2016 Djalal Harouni
 
   systemd is free software; you can redistribute it and/or modify it
   under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +20,8 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+typedef struct NameSpaceInfo NameSpaceInfo;
+
 #include <stdbool.h>
 
 #include "macro.h"
@@ -40,15 +43,20 @@ typedef enum ProtectSystem {
         _PROTECT_SYSTEM_INVALID = -1
 } ProtectSystem;
 
+struct NameSpaceInfo {
+        bool private_dev:1;
+        bool protect_control_groups:1;
+        bool protect_kernel_tunables:1;
+        bool protect_kernel_modules:1;
+};
+
 int setup_namespace(const char *chroot,
+                    const NameSpaceInfo *ns_info,
                     char **read_write_paths,
                     char **read_only_paths,
                     char **inaccessible_paths,
                     const char *tmp_dir,
                     const char *var_tmp_dir,
-                    bool private_dev,
-                    bool protect_sysctl,
-                    bool protect_cgroups,
                     ProtectHome protect_home,
                     ProtectSystem protect_system,
                     unsigned long mount_flags);
index c4d4da6d0599cd1619f37e5fa2b15b88f2d6dbc2..da7a8b0565a61fda36025bf660fccf1a1651f680 100644 (file)
@@ -45,6 +45,14 @@ int main(int argc, char *argv[]) {
                 "/home/lennart/projects",
                 NULL
         };
+
+        static const NameSpaceInfo ns_info = {
+                .private_dev = true,
+                .protect_control_groups = true,
+                .protect_kernel_tunables = true,
+                .protect_kernel_modules = true,
+        };
+
         char *root_directory;
         char *projects_directory;
         int r;
@@ -69,14 +77,12 @@ int main(int argc, char *argv[]) {
                 log_info("Not chrooted");
 
         r = setup_namespace(root_directory,
+                            &ns_info,
                             (char **) writable,
                             (char **) readonly,
                             (char **) inaccessible,
                             tmp_dir,
                             var_tmp_dir,
-                            true,
-                            true,
-                            true,
                             PROTECT_HOME_NO,
                             PROTECT_SYSTEM_NO,
                             0);