]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
seccomp: add new system call filter, suitable as default whitelist for system services
authorLennart Poettering <lennart@poettering.net>
Wed, 18 Apr 2018 19:19:54 +0000 (21:19 +0200)
committerLennart Poettering <lennart@poettering.net>
Thu, 14 Jun 2018 15:44:20 +0000 (17:44 +0200)
Currently we employ mostly system call blacklisting for our system
services. Let's add a new system call filter group @system-service that
helps turning this around into a whitelist by default.

The new group is very similar to nspawn's default filter list, but in
some ways more restricted (as sethostname() and suchlike shouldn't be
available to most system services just like that) and in others more
relaxed (for example @keyring is blocked in nspawn since it's not
properly virtualized yet in the kernel, but is fine for regular system
services).

man/systemd.exec.xml
src/shared/seccomp-util.c
src/shared/seccomp-util.h
src/test/test-seccomp.c

index a17db8d8505d134fc0dcfe2357fcb044cef88108..3bd790b4859e08b2eb3510c181b6d9113dc5127a 100644 (file)
@@ -1490,6 +1490,10 @@ RestrictNamespaces=~cgroup net</programlisting>
                 <entry>@sync</entry>
                 <entry>Synchronizing files and memory to disk: (<citerefentry project='man-pages'><refentrytitle>fsync</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>msync</refentrytitle><manvolnum>2</manvolnum></citerefentry>, and related calls)</entry>
               </row>
+              <row>
+                <entry>@system-service</entry>
+                <entry>A reasonable set of system calls used by common system services, excluding any special purpose calls. This is the recommended starting point for whitelisting system calls for system services, as it contains what is typically needed by system services, but excludes overly specific interfaces. For example, the following APIs are excluded: <literal>@clock</literal>, <literal>@mount</literal>, <literal>@swap</literal>, <literal>@reboot</literal>.</entry>
+              </row>
               <row>
                 <entry>@timer</entry>
                 <entry>System calls for scheduling operations by time (<citerefentry project='man-pages'><refentrytitle>alarm</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>timer_create</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
@@ -1504,6 +1508,14 @@ RestrictNamespaces=~cgroup net</programlisting>
         <command>systemd-analyze syscall-filter</command> to list the actual list of system calls in each
         filter.</para>
 
+        <para>Generally, whitelisting system calls (rather than blacklisting) is the safer mode of operation. It is
+        recommended to enforce system call whitelists for all long-running system services. Specifically, the
+        following lines are a relatively safe basic choice for the majority of system services:</para>
+
+        <programlisting>[Service]
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM</programlisting>
+
         <para>It is recommended to combine the file system namespacing related options with
         <varname>SystemCallFilter=~@mount</varname>, in order to prohibit the unit's processes to undo the
         mappings. Specifically these are the options <varname>PrivateTmp=</varname>,
index 517a1b450917d68c90ac89d95950732ee213a890..4a02d8c35f33fc413ee61cd8d496ad10e579269c 100644 (file)
@@ -756,6 +756,75 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
                 "sync_file_range\0"
                 "syncfs\0"
         },
+        [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = {
+                .name = "@system-service",
+                .help = "General system service operations",
+                .value =
+                "@aio\0"
+                "@basic-io\0"
+                "@chown\0"
+                "@default\0"
+                "@file-system\0"
+                "@io-event\0"
+                "@ipc\0"
+                "@keyring\0"
+                "@memlock\0"
+                "@network-io\0"
+                "@process\0"
+                "@resources\0"
+                "@setuid\0"
+                "@signal\0"
+                "@sync\0"
+                "@timer\0"
+                "brk\0"
+                "capget\0"
+                "capset\0"
+                "copy_file_range\0"
+                "fadvise64\0"
+                "fadvise64_64\0"
+                "flock\0"
+                "get_mempolicy\0"
+                "getcpu\0"
+                "getpriority\0"
+                "getrandom\0"
+                "ioctl\0"
+                "ioprio_get\0"
+                "kcmp\0"
+                "madvise\0"
+                "mincore\0"
+                "mprotect\0"
+                "mremap\0"
+                "name_to_handle_at\0"
+                "oldolduname\0"
+                "olduname\0"
+                "personality\0"
+                "readahead\0"
+                "readdir\0"
+                "remap_file_pages\0"
+                "sched_get_priority_max\0"
+                "sched_get_priority_min\0"
+                "sched_getaffinity\0"
+                "sched_getattr\0"
+                "sched_getparam\0"
+                "sched_getscheduler\0"
+                "sched_rr_get_interval\0"
+                "sched_yield\0"
+                "sendfile\0"
+                "sendfile64\0"
+                "setfsgid\0"
+                "setfsgid32\0"
+                "setfsuid\0"
+                "setfsuid32\0"
+                "setpgid\0"
+                "setsid\0"
+                "splice\0"
+                "sysinfo\0"
+                "tee\0"
+                "umask\0"
+                "uname\0"
+                "userfaultfd\0"
+                "vmsplice\0"
+        },
         [SYSCALL_FILTER_SET_TIMER] = {
                 .name = "@timer",
                 .help = "Schedule operations by time",
index 7dfff9df78090a8460f7f1b32e71e82649aea138..eac857afb9b9f01eafdb22836e0a1c43f2db95b1 100644 (file)
@@ -47,6 +47,7 @@ enum {
         SYSCALL_FILTER_SET_SIGNAL,
         SYSCALL_FILTER_SET_SWAP,
         SYSCALL_FILTER_SET_SYNC,
+        SYSCALL_FILTER_SET_SYSTEM_SERVICE,
         SYSCALL_FILTER_SET_TIMER,
         _SYSCALL_FILTER_SET_MAX
 };
index 33ec680753be98de9586d3e2b47d3c97589ae470..d82cb5c1c56e6d59593dc9b6cab0c969d7eb9daf 100644 (file)
@@ -104,7 +104,8 @@ static void test_filter_sets(void) {
                 if (pid == 0) { /* Child? */
                         int fd;
 
-                        if (i == SYSCALL_FILTER_SET_DEFAULT) /* if we look at the default set, whitelist instead of blacklist */
+                        /* if we look at the default set (or one that includes it), whitelist instead of blacklist */
+                        if (IN_SET(i, SYSCALL_FILTER_SET_DEFAULT, SYSCALL_FILTER_SET_SYSTEM_SERVICE))
                                 r = seccomp_load_syscall_filter_set(SCMP_ACT_ERRNO(EUCLEAN), syscall_filter_sets + i, SCMP_ACT_ALLOW);
                         else
                                 r = seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EUCLEAN));