]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: use LSM BPF functions to implement RestrictFileSystems=
authorIago Lopez Galeiras <iagol@microsoft.com>
Tue, 5 Oct 2021 11:18:49 +0000 (13:18 +0200)
committerIago Lopez Galeiras <iagol@microsoft.com>
Wed, 6 Oct 2021 08:52:14 +0000 (10:52 +0200)
It attaches the LSM BPF program when the system manager starts up.

It populates the hash of maps BPF map when services that have
RestrictFileSystems= set start.

It cleans up the hash of maps when the unit cgroup is pruned.

To pass the file descriptor of the BPF map we add it to the keep_fds
array.

src/basic/cgroup-util.h
src/core/cgroup.c
src/core/cgroup.h
src/core/execute.c
src/core/execute.h
src/core/main.c
src/core/manager.c

index eec13e18f17e3ac97f6841060b3ddb28285c3b8d..43801ee0f44f89d85aec5d815254146f6ec89df5 100644 (file)
@@ -33,6 +33,9 @@ typedef enum CGroupController {
         CGROUP_CONTROLLER_BPF_FOREIGN,
         CGROUP_CONTROLLER_BPF_SOCKET_BIND,
         CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES,
+        /* The BPF hook implementing RestrictFileSystems= is not defined here.
+         * It's applied as late as possible in exec_child() so we don't block
+         * our own unit setup code. */
 
         _CGROUP_CONTROLLER_MAX,
         _CGROUP_CONTROLLER_INVALID = -EINVAL,
index 155925962226968ed7d5afc7d223f24ae0d92beb..2b15310191b5ff213c6a9e50601cb3ae0c1bb4dc 100644 (file)
 #include "string-util.h"
 #include "virt.h"
 
+#if BPF_FRAMEWORK
+#include "bpf-dlopen.h"
+#include "bpf-link.h"
+#include "bpf/restrict_fs/restrict-fs-skel.h"
+#endif
+
 #define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
 
 /* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
@@ -2736,6 +2742,10 @@ void unit_prune_cgroup(Unit *u) {
 
         (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
 
+#if BPF_FRAMEWORK
+        (void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */
+#endif
+
         is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
 
         r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
index 8795f2724eb854dd688c41d43f7dbeec1204616d..4413eeaaa0afe5dd5dac24540eb35153a7188cbe 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <stdbool.h>
 
+#include "bpf-lsm.h"
 #include "cgroup-util.h"
 #include "cpu-set-util.h"
 #include "list.h"
index d68e31eb7dd19802316ec11e8a6d7b77b8fe9ea6..6397bab315a6deb3bcec5725e72644b2a9132862 100644 (file)
@@ -41,6 +41,7 @@
 #endif
 #include "async.h"
 #include "barrier.h"
+#include "bpf-lsm.h"
 #include "cap-list.h"
 #include "capability-util.h"
 #include "cgroup-setup.h"
@@ -1685,6 +1686,29 @@ static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
         return seccomp_restrict_namespaces(c->restrict_namespaces);
 }
 
+#if HAVE_LIBBPF
+static bool skip_lsm_bpf_unsupported(const Unit* u, const char* msg) {
+        if (lsm_bpf_supported())
+                return false;
+
+        log_unit_debug(u, "LSM BPF not supported, skipping %s", msg);
+        return true;
+}
+
+static int apply_restrict_filesystems(Unit *u, const ExecContext *c) {
+        assert(u);
+        assert(c);
+
+        if (!exec_context_restrict_filesystems_set(c))
+                return 0;
+
+        if (skip_lsm_bpf_unsupported(u, "RestrictFileSystems="))
+                return 0;
+
+        return lsm_bpf_unit_restrict_filesystems(u, c->restrict_filesystems, c->restrict_filesystems_allow_list);
+}
+#endif
+
 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
         unsigned long personality;
         int r;
@@ -3813,7 +3837,7 @@ static int exec_child(
         /* In case anything used libc syslog(), close this here, too */
         closelog();
 
-        int keep_fds[n_fds + 2];
+        int keep_fds[n_fds + 3];
         memcpy_safe(keep_fds, fds, n_fds * sizeof(int));
         n_keep_fds = n_fds;
 
@@ -3823,6 +3847,24 @@ static int exec_child(
                 return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
         }
 
+#if HAVE_LIBBPF
+        if (MANAGER_IS_SYSTEM(unit->manager) && lsm_bpf_supported()) {
+                int bpf_map_fd = -1;
+
+                bpf_map_fd = lsm_bpf_map_restrict_fs_fd(unit);
+                if (bpf_map_fd < 0) {
+                        *exit_status = EXIT_FDS;
+                        return log_unit_error_errno(unit, r, "Failed to get restrict filesystems BPF map fd: %m");
+                }
+
+                r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, bpf_map_fd, &bpf_map_fd);
+                if (r < 0) {
+                        *exit_status = EXIT_FDS;
+                        return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
+                }
+        }
+#endif
+
         r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, keep_fds, n_keep_fds);
         if (r < 0) {
                 *exit_status = EXIT_FDS;
@@ -4682,6 +4724,15 @@ static int exec_child(
                         return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
                 }
 #endif
+
+#if HAVE_LIBBPF
+                r = apply_restrict_filesystems(unit, context);
+                if (r < 0) {
+                        *exit_status = EXIT_BPF;
+                        return log_unit_error_errno(unit, r, "Failed to restrict filesystems: %m");
+                }
+#endif
+
         }
 
         if (!strv_isempty(context->unset_environment)) {
@@ -4967,6 +5018,8 @@ void exec_context_done(ExecContext *c) {
         c->apparmor_profile = mfree(c->apparmor_profile);
         c->smack_process_label = mfree(c->smack_process_label);
 
+        c->restrict_filesystems = set_free(c->restrict_filesystems);
+
         c->syscall_filter = hashmap_free(c->syscall_filter);
         c->syscall_archs = set_free(c->syscall_archs);
         c->address_families = set_free(c->address_families);
@@ -5734,6 +5787,12 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                                 prefix, strna(s));
         }
 
+#if HAVE_LIBBPF
+        if (exec_context_restrict_filesystems_set(c))
+                SET_FOREACH(e, c->restrict_filesystems)
+                        fprintf(f, "%sRestrictFileSystems: %s\n", prefix, *e);
+#endif
+
         if (c->network_namespace_path)
                 fprintf(f,
                         "%sNetworkNamespacePath: %s\n",
index 64a38b2d26a277e0d321f81f2c583be0e53d8dae..560dcbcc5eb57b6ee675852699be4bc2061d4afa 100644 (file)
@@ -314,6 +314,9 @@ struct ExecContext {
 
         unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
 
+        Set *restrict_filesystems;
+        bool restrict_filesystems_allow_list:1;
+
         Hashmap *syscall_filter;
         Set *syscall_archs;
         int syscall_errno;
@@ -342,6 +345,13 @@ static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
         return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
 }
 
+static inline bool exec_context_restrict_filesystems_set(const ExecContext *c) {
+        assert(c);
+
+        return c->restrict_filesystems_allow_list ||
+          !set_isempty(c->restrict_filesystems);
+}
+
 static inline bool exec_context_with_rootfs(const ExecContext *c) {
         assert(c);
 
index 059ba6dd493a5f85701299dd13b7e836359c8da8..62f39c7378f418c4c43762762951252c145fe251 100644 (file)
@@ -22,6 +22,9 @@
 #include "alloc-util.h"
 #include "apparmor-setup.h"
 #include "architecture.h"
+#if HAVE_LIBBPF
+#include "bpf-lsm.h"
+#endif
 #include "build.h"
 #include "bus-error.h"
 #include "bus-util.h"
index 0b2e29ae148be37e127f027d627cb652e9c0f220..6bcb6bd15359a1c9955ab8de9deed49c88341883 100644 (file)
@@ -930,6 +930,14 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
                 r = manager_setup_sigchld_event_source(m);
                 if (r < 0)
                         return r;
+
+#if HAVE_LIBBPF
+                if (MANAGER_IS_SYSTEM(m) && lsm_bpf_supported()) {
+                        r = lsm_bpf_setup(m);
+                        if (r < 0)
+                                return r;
+                }
+#endif
         }
 
         if (test_run_flags == 0) {
@@ -1535,6 +1543,10 @@ Manager* manager_free(Manager *m) {
                 m->prefix[dt] = mfree(m->prefix[dt]);
         free(m->received_credentials);
 
+#if BPF_FRAMEWORK
+        lsm_bpf_destroy(m->restrict_fs);
+#endif
+
         return mfree(m);
 }