]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
seccomp: LockPersonality boolean (#6193)
authorTopi Miettinen <toiwoton@gmail.com>
Tue, 4 Jul 2017 12:48:18 +0000 (15:48 +0300)
committerLennart Poettering <lennart@poettering.net>
Tue, 29 Aug 2017 13:54:50 +0000 (15:54 +0200)
Add LockPersonality boolean to allow locking down personality(2)
system call so that the execution domain can't be changed.
This may be useful to improve security because odd emulations
may be poorly tested and source of vulnerabilities, while
system services shouldn't need any weird personalities.

man/systemd.exec.xml
src/core/dbus-execute.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.m4
src/shared/seccomp-util.c
src/shared/seccomp-util.h
src/test/test-seccomp.c

index 7a7006b9a0b8a1f9b67dded534aa2400f57cff47..a9f1d8d74e11342281ed7869529db094b4470d53 100644 (file)
         personality of the host system's kernel.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>LockPersonality=</varname></term>
+
+        <listitem><para>Locks down the <citerefentry
+        project='man-pages'><refentrytitle>personality</refentrytitle><manvolnum>2</manvolnum></citerefentry> system
+        call so that the kernel execution domain may not be changed from the default or the personality selected with
+        <varname>Personality=</varname> directive. This may be useful to improve security, because odd personality
+        emulations may be poorly tested and source of vulnerabilities. If running in user mode, or in system mode, but
+        without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=</varname>),
+        <varname>NoNewPrivileges=yes</varname> is implied.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>RuntimeDirectory=</varname></term>
 
index 45497ca024d65f22e0e421bd6023fb8a278b2343..d28e8aafd671bd6d10c230a4f8f8275dd47a0dfa 100644 (file)
@@ -853,6 +853,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST),
index d192134b1c1f633943d97702c57a9af4bb4fc01b..4d285ff2506ca86f6f4135c239a09859f5ce16e6 100644 (file)
@@ -1296,7 +1296,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
                 c->protect_kernel_modules ||
                 c->private_devices ||
                 context_has_syscall_filters(c) ||
-                !set_isempty(c->syscall_archs);
+                !set_isempty(c->syscall_archs) ||
+                c->lock_personality;
 }
 
 #ifdef HAVE_SECCOMP
@@ -1455,6 +1456,25 @@ static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
         return seccomp_restrict_namespaces(c->restrict_namespaces);
 }
 
+static int apply_lock_personality(const Unit* u, const ExecContext *c) {
+        unsigned long personality = c->personality;
+
+        assert(u);
+        assert(c);
+
+        if (!c->lock_personality)
+                return 0;
+
+        if (skip_seccomp_unavailable(u, "LockPersonality="))
+                return 0;
+
+        /* If personality is not specified, use the default (Linux) */
+        if (personality == PERSONALITY_INVALID)
+                personality = PER_LINUX;
+
+        return seccomp_lock_personality(personality);
+}
+
 #endif
 
 static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -2972,6 +2992,13 @@ static int exec_child(
                         return r;
                 }
 
+                r = apply_lock_personality(unit, context);
+                if (r < 0) {
+                        *exit_status = EXIT_SECCOMP;
+                        *error_message = strdup("Failed to lock personalities");
+                        return r;
+                }
+
                 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
                  * by the filter as little as possible. */
                 r = apply_syscall_filter(unit, context, needs_ambient_hack);
@@ -3733,6 +3760,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                         "%sPersonality: %s\n",
                         prefix, strna(personality_to_string(c->personality)));
 
+        fprintf(f,
+                "%sLockPersonality: %s\n",
+                prefix, yes_no(c->lock_personality));
+
         if (c->syscall_filter) {
 #ifdef HAVE_SECCOMP
                 Iterator j;
index 9a2826928324ed273e3acb80c2be2724a372ec82..8a7ce8449b9bf761f2e537a7548165ea91a4b5ed 100644 (file)
@@ -227,6 +227,7 @@ struct ExecContext {
         bool same_pgrp;
 
         unsigned long personality;
+        bool lock_personality;
 
         unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
 
index 9a87f0acd3d1fea978977d65007a7469ff601e23..94f3d657f6b056880d33ff5c1e6e243c6654313e 100644 (file)
@@ -60,14 +60,16 @@ $1.SystemCallErrorNumber,        config_parse_syscall_errno,         0,
 $1.MemoryDenyWriteExecute,       config_parse_bool,                  0,                             offsetof($1, exec_context.memory_deny_write_execute)
 $1.RestrictNamespaces,           config_parse_restrict_namespaces,   0,                             offsetof($1, exec_context)
 $1.RestrictRealtime,             config_parse_bool,                  0,                             offsetof($1, exec_context.restrict_realtime)
-$1.RestrictAddressFamilies,      config_parse_address_families,      0,                             offsetof($1, exec_context)',
+$1.RestrictAddressFamilies,      config_parse_address_families,      0,                             offsetof($1, exec_context)
+$1.LockPersonality,              config_parse_bool,                  0,                             offsetof($1, exec_context.lock_personality)',
 `$1.SystemCallFilter,            config_parse_warn_compat,           DISABLED_CONFIGURATION,        0
 $1.SystemCallArchitectures,      config_parse_warn_compat,           DISABLED_CONFIGURATION,        0
 $1.SystemCallErrorNumber,        config_parse_warn_compat,           DISABLED_CONFIGURATION,        0
 $1.MemoryDenyWriteExecute,       config_parse_warn_compat,           DISABLED_CONFIGURATION,        0
 $1.RestrictNamespaces,           config_parse_warn_compat,           DISABLED_CONFIGURATION,        0
 $1.RestrictRealtime,             config_parse_warn_compat,           DISABLED_CONFIGURATION,        0
-$1.RestrictAddressFamilies,      config_parse_warn_compat,           DISABLED_CONFIGURATION,        0')
+$1.RestrictAddressFamilies,      config_parse_warn_compat,           DISABLED_CONFIGURATION,        0
+$1.LockPersonality,              config_parse_warn_compat,           DISABLED_CONFIGURATION,        0')
 $1.LimitCPU,                     config_parse_limit,                 RLIMIT_CPU,                    offsetof($1, exec_context.rlimit)
 $1.LimitFSIZE,                   config_parse_limit,                 RLIMIT_FSIZE,                  offsetof($1, exec_context.rlimit)
 $1.LimitDATA,                    config_parse_limit,                 RLIMIT_DATA,                   offsetof($1, exec_context.rlimit)
index dd6d4fbdc7e31997d9674d0a26e0f2dd39f6f3c6..bf2db28a82b9d571320b26e02a8a67d36c09a65a 100644 (file)
@@ -29,6 +29,7 @@
 #include "alloc-util.h"
 #include "macro.h"
 #include "nsflags.h"
+#include "process-util.h"
 #include "seccomp-util.h"
 #include "set.h"
 #include "string-util.h"
@@ -1402,3 +1403,21 @@ int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) {
 
         return 0;
 }
+
+int seccomp_lock_personality(unsigned long personality) {
+        _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+        int r;
+
+        seccomp = seccomp_init(SCMP_ACT_ALLOW);
+        if (!seccomp)
+                return -ENOMEM;
+
+        r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM),
+                                   SCMP_SYS(personality),
+                                   1,
+                                   SCMP_A0(SCMP_CMP_NE, personality));
+        if (r < 0)
+                return r;
+
+        return seccomp_load(seccomp);
+}
index 0edffa116ddfff5412553410faec0fac8b650d16..ca43ba8659a5ebae5b18c23f1ed1733ad8b3d44b 100644 (file)
@@ -78,6 +78,7 @@ int seccomp_protect_sysctl(void);
 int seccomp_restrict_address_families(Set *address_families, bool whitelist);
 int seccomp_restrict_realtime(void);
 int seccomp_memory_deny_write_execute(void);
+int seccomp_lock_personality(unsigned long personality);
 
 extern const uint32_t seccomp_local_archs[];
 
index 28fe2065072df4f77c629b71041daac0d8765338..7ffbc4754e8269be0d91074da78768fbd07b8732 100644 (file)
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 #include <sys/eventfd.h>
 #include <sys/mman.h>
+#include <sys/personality.h>
 #include <sys/poll.h>
 #include <sys/shm.h>
 #include <sys/types.h>
@@ -565,6 +566,40 @@ static void test_load_syscall_filter_set_raw(void) {
         assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid, true) == EXIT_SUCCESS);
 }
 
+static void test_lock_personality(void) {
+        pid_t pid;
+
+        if (!is_seccomp_available())
+                return;
+        if (geteuid() != 0)
+                return;
+
+        pid = fork();
+        assert_se(pid >= 0);
+
+        if (pid == 0) {
+                assert_se(seccomp_lock_personality(PER_LINUX) >= 0);
+
+                assert_se(personality(PER_LINUX) == PER_LINUX);
+                assert_se(personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -1 && errno == EPERM);
+                assert_se(personality(PER_LINUX | MMAP_PAGE_ZERO) == -1 && errno == EPERM);
+                assert_se(personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -1 && errno == EPERM);
+                assert_se(personality(PER_LINUX | READ_IMPLIES_EXEC) == -1 && errno == EPERM);
+                assert_se(personality(PER_LINUX_32BIT) == -1 && errno == EPERM);
+                assert_se(personality(PER_SVR4) == -1 && errno == EPERM);
+                assert_se(personality(PER_BSD) == -1 && errno == EPERM);
+                assert_se(personality(PER_LINUX32) == -1 && errno == EPERM);
+                assert_se(personality(PER_LINUX32_3GB) == -1 && errno == EPERM);
+                assert_se(personality(PER_UW7) == -1 && errno == EPERM);
+                assert_se(personality(0x42) == -1 && errno == EPERM);
+                assert_se(personality(PERSONALITY_INVALID) == -1 && errno == EPERM); /* maybe remove this later */
+                assert_se(personality(PER_LINUX) == PER_LINUX);
+                _exit(EXIT_SUCCESS);
+        }
+
+        assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
+}
+
 int main(int argc, char *argv[]) {
 
         log_set_max_level(LOG_DEBUG);
@@ -581,6 +616,7 @@ int main(int argc, char *argv[]) {
         test_memory_deny_write_execute_shmat();
         test_restrict_archs();
         test_load_syscall_filter_set_raw();
+        test_lock_personality();
 
         return 0;
 }