]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: set NoNewPrivileges for seccomp if we don't have CAP_SYS_ADMIN
authorRonny Chevalier <chevalier.ronny@gmail.com>
Sat, 30 Jan 2016 16:26:39 +0000 (17:26 +0100)
committerRonny Chevalier <chevalier.ronny@gmail.com>
Sun, 28 Feb 2016 13:44:26 +0000 (14:44 +0100)
The manpage of seccomp specify that using seccomp with
SECCOMP_SET_MODE_FILTER will return EACCES if the caller do not have
CAP_SYS_ADMIN set, or if the no_new_privileges bit is not set. Hence,
without NoNewPrivilege set, it is impossible to use a SystemCall*
directive with a User directive set in system mode.

Now, NoNewPrivileges is set if we are in user mode, or if we are in
system mode and we don't have CAP_SYS_ADMIN, and SystemCall*
directives are used.

Makefile.am
man/systemd.exec.xml
src/core/execute.c
src/test/test-execute.c
test/test-execute/exec-systemcallfilter-system-user.service [new file with mode: 0644]

index 7bd98dddf65e97e4c2354de3671a56d7b271e006..02557ef46adbd0ee6d0c2cc06f96cadf873c5423 100644 (file)
@@ -1556,6 +1556,7 @@ EXTRA_DIST += \
        test/test-execute/exec-systemcallfilter-failing.service \
        test/test-execute/exec-systemcallfilter-not-failing2.service \
        test/test-execute/exec-systemcallfilter-not-failing.service \
+       test/test-execute/exec-systemcallfilter-system-user.service \
        test/test-execute/exec-user.service \
        test/test-execute/exec-workingdirectory.service \
        test/test-execute/exec-umask-0177.service \
index c1f47e84e6565d6af1e4906ab26bec93c62b6fc5..3e1a2cb22461cb48012fbd31f3cd2e0d1f262690 100644 (file)
         first character of the list is <literal>~</literal>, the
         effect is inverted: only the listed system calls will result
         in immediate process termination (blacklisting). If running in
-        user mode and this option is used,
+        user mode, or in system mode, but without the
+        <constant>CAP_SYS_ADMIN</constant> capabiblity (e.g. setting
+        <varname>User=nobody</varname>),
         <varname>NoNewPrivileges=yes</varname> is implied. This
         feature makes use of the Secure Computing Mode 2 interfaces of
         the kernel ('seccomp filtering') and is useful for enforcing a
         systems. The special <constant>native</constant> identifier
         implicitly maps to the native architecture of the system (or
         more strictly: to the architecture the system manager is
-        compiled for). If running in user mode and this option is
-        used, <varname>NoNewPrivileges=yes</varname> is implied. Note
+        compiled for). If running in user mode, or in system mode,
+        but without the <constant>CAP_SYS_ADMIN</constant>
+        capabiblity (e.g. setting <varname>User=nobody</varname>),
+        <varname>NoNewPrivileges=yes</varname> is implied. Note
         that setting this option to a non-empty list implies that
         <constant>native</constant> is included too. By default, this
         option is set to the empty list, i.e. no architecture system
         <function>socketpair()</function> (which creates connected
         AF_UNIX sockets only) are unaffected. Note that this option
         has no effect on 32-bit x86 and is ignored (but works
-        correctly on x86-64). If running in user mode and this option
-        is used, <varname>NoNewPrivileges=yes</varname> is implied. By
+        correctly on x86-64). If running in user mode, or in system
+        mode, but without the <constant>CAP_SYS_ADMIN</constant>
+        capabiblity (e.g. setting <varname>User=nobody</varname>),
+        <varname>NoNewPrivileges=yes</varname> is implied. By
         default, no restriction applies, all address families are
         accessible to processes. If assigned the empty string, any
         previous list changes are undone.</para>
index 8ede9e9afbcc5460f195294333763b06bd02095a..0c311ec330596f73fd163fdb18041681603665ad 100644 (file)
@@ -24,6 +24,7 @@
 #include <poll.h>
 #include <signal.h>
 #include <string.h>
+#include <sys/capability.h>
 #include <sys/personality.h>
 #include <sys/prctl.h>
 #include <sys/socket.h>
@@ -1824,6 +1825,11 @@ static int exec_child(
 
         if (params->apply_permissions) {
 
+                bool use_address_families = context->address_families_whitelist ||
+                        !set_isempty(context->address_families);
+                bool use_syscall_filter = context->syscall_whitelist ||
+                        !set_isempty(context->syscall_filter) ||
+                        !set_isempty(context->syscall_archs);
                 int secure_bits = context->secure_bits;
 
                 for (i = 0; i < _RLIMIT_MAX; i++) {
@@ -1890,15 +1896,15 @@ static int exec_child(
                                 return -errno;
                         }
 
-                if (context->no_new_privileges)
+                if (context->no_new_privileges ||
+                    (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || use_syscall_filter)))
                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
                                 return -errno;
                         }
 
 #ifdef HAVE_SECCOMP
-                if (context->address_families_whitelist ||
-                    !set_isempty(context->address_families)) {
+                if (use_address_families) {
                         r = apply_address_families(context);
                         if (r < 0) {
                                 *exit_status = EXIT_ADDRESS_FAMILIES;
@@ -1906,9 +1912,7 @@ static int exec_child(
                         }
                 }
 
-                if (context->syscall_whitelist ||
-                    !set_isempty(context->syscall_filter) ||
-                    !set_isempty(context->syscall_archs)) {
+                if (use_syscall_filter) {
                         r = apply_seccomp(context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
index 0d2e4bfc1513d2ccf1f88b26f38bdeb0e6fd3990..5645f5c086fc9e44668cbc940fdb7ad9a6691c0d 100644 (file)
@@ -130,6 +130,15 @@ static void test_exec_systemcallerrornumber(Manager *m) {
 #endif
 }
 
+static void test_exec_systemcall_system_mode_with_user(Manager *m) {
+#ifdef HAVE_SECCOMP
+        if (getpwnam("nobody"))
+                test(m, "exec-systemcallfilter-system-user.service", 0, CLD_EXITED);
+        else
+                log_error_errno(errno, "Skipping test_exec_systemcall_system_mode_with_user, could not find nobody user: %m");
+#endif
+}
+
 static void test_exec_user(Manager *m) {
         if (getpwnam("nobody"))
                 test(m, "exec-user.service", 0, CLD_EXITED);
@@ -267,8 +276,31 @@ static void test_exec_spec_interpolation(Manager *m) {
         test(m, "exec-spec-interpolation.service", 0, CLD_EXITED);
 }
 
+static int run_tests(ManagerRunningAs running_as, test_function_t *tests) {
+        test_function_t *test = NULL;
+        Manager *m = NULL;
+        int r;
+
+        assert_se(tests);
+
+        r = manager_new(running_as, true, &m);
+        if (MANAGER_SKIP_TEST(r)) {
+                printf("Skipping test: manager_new: %s\n", strerror(-r));
+                return EXIT_TEST_SKIP;
+        }
+        assert_se(r >= 0);
+        assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+        for (test = tests; test && *test; test++)
+                (*test)(m);
+
+        manager_free(m);
+
+        return 0;
+}
+
 int main(int argc, char *argv[]) {
-        test_function_t tests[] = {
+        test_function_t user_tests[] = {
                 test_exec_workingdirectory,
                 test_exec_personality,
                 test_exec_ignoresigpipe,
@@ -291,8 +323,10 @@ int main(int argc, char *argv[]) {
                 test_exec_spec_interpolation,
                 NULL,
         };
-        test_function_t *test = NULL;
-        Manager *m = NULL;
+        test_function_t system_tests[] = {
+                test_exec_systemcall_system_mode_with_user,
+                NULL,
+        };
         int r;
 
         log_parse_environment();
@@ -317,18 +351,9 @@ int main(int argc, char *argv[]) {
         assert_se(unsetenv("VAR2") == 0);
         assert_se(unsetenv("VAR3") == 0);
 
-        r = manager_new(MANAGER_USER, true, &m);
-        if (MANAGER_SKIP_TEST(r)) {
-                printf("Skipping test: manager_new: %s\n", strerror(-r));
-                return EXIT_TEST_SKIP;
-        }
-        assert_se(r >= 0);
-        assert_se(manager_startup(m, NULL, NULL) >= 0);
-
-        for (test = tests; test && *test; test++)
-                (*test)(m);
+        r = run_tests(MANAGER_USER, user_tests);
+        if (r != 0)
+                return r;
 
-        manager_free(m);
-
-        return 0;
+        return run_tests(MANAGER_SYSTEM, system_tests);
 }
diff --git a/test/test-execute/exec-systemcallfilter-system-user.service b/test/test-execute/exec-systemcallfilter-system-user.service
new file mode 100644 (file)
index 0000000..462f941
--- /dev/null
@@ -0,0 +1,11 @@
+[Unit]
+Description=Test for SystemCallFilter in system mode with User set
+
+[Service]
+ExecStart=/bin/echo "Foo bar"
+Type=oneshot
+User=nobody
+SystemCallFilter=~read write open execve ioperm
+SystemCallFilter=ioctl
+SystemCallFilter=read write open execve
+SystemCallFilter=~ioperm