]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add BPF LSM functions
authorIago Lopez Galeiras <iagol@microsoft.com>
Tue, 13 Jul 2021 07:51:06 +0000 (09:51 +0200)
committerIago Lopez Galeiras <iagol@microsoft.com>
Wed, 6 Oct 2021 08:52:14 +0000 (10:52 +0200)
This adds 6 functions to implement RestrictFileSystems=

* lsm_bpf_supported() checks if LSM BPF is supported. It checks that
  cgroupv2 is used, that BPF LSM is enabled, and tries to load the BPF
  LSM program which makes sure BTF and hash of maps are supported, and
  BPF LSM programs can be loaded.
* lsm_bpf_setup() loads and attaches the LSM BPF program.
* lsm_bpf_unit_restrict_filesystems() populates the hash of maps BPF map with the
  cgroupID and the set of allowed or denied filesystems.
* lsm_bpf_cleanup() removes a cgroupID entry from the hash of maps.
* lsm_bpf_map_restrict_fs_fd() is a helper function to get the file
  descriptor of the BPF map.
* lsm_bpf_destroy() is a wrapper around the destroy function of the BPF
  skeleton file.

src/core/bpf-lsm.c [new file with mode: 0644]
src/core/bpf-lsm.h [new file with mode: 0644]
src/core/cgroup.c
src/core/manager.h
src/core/meson.build
src/core/unit.h

diff --git a/src/core/bpf-lsm.c b/src/core/bpf-lsm.c
new file mode 100644 (file)
index 0000000..3e480c6
--- /dev/null
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-lsm.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "filesystems.h"
+#include "log.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "stat-util.h"
+#include "strv.h"
+
+#if BPF_FRAMEWORK
+/* libbpf, clang and llc compile time dependencies are satisfied */
+#include "bpf-dlopen.h"
+#include "bpf-link.h"
+#include "bpf/restrict_fs/restrict-fs-skel.h"
+
+#define CGROUP_HASH_SIZE_MAX 2048
+
+static struct restrict_fs_bpf *restrict_fs_bpf_free(struct restrict_fs_bpf *obj) {
+        /* restrict_fs_bpf__destroy handles object == NULL case */
+        (void) restrict_fs_bpf__destroy(obj);
+
+        return NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_fs_bpf *, restrict_fs_bpf_free);
+
+static bool bpf_can_link_lsm_program(struct bpf_program *prog) {
+        _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
+
+        assert(prog);
+
+        link = sym_bpf_program__attach_lsm(prog);
+        if (!link)
+                return -ENOMEM;
+
+        return 1;
+}
+
+static int prepare_restrict_fs_bpf(struct restrict_fs_bpf **ret_obj) {
+        struct restrict_fs_bpf *obj = 0;
+        _cleanup_close_ int inner_map_fd = -1;
+        int r;
+
+        assert(ret_obj);
+
+        obj = restrict_fs_bpf__open();
+        if (!obj)
+                return log_error_errno(errno, "Failed to open BPF object: %m");
+
+        /* TODO Maybe choose a number based on runtime information? */
+        r = sym_bpf_map__resize(obj->maps.cgroup_hash, CGROUP_HASH_SIZE_MAX);
+        if (r != 0)
+                return log_error_errno(r,
+                                "Failed to resize BPF map '%s': %m",
+                                sym_bpf_map__name(obj->maps.cgroup_hash));
+
+        /* Dummy map to satisfy the verifier */
+        inner_map_fd = sym_bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), sizeof(uint32_t), 128, 0);
+        if (inner_map_fd < 0)
+                return log_error_errno(errno, "Failed to create BPF map: %m");
+
+        r = sym_bpf_map__set_inner_map_fd(obj->maps.cgroup_hash, inner_map_fd);
+        if (r < 0)
+                return log_error_errno(r, "Failed to set inner map fd: %m");
+
+        r = restrict_fs_bpf__load(obj);
+        if (r)
+                return log_error_errno(r, "Failed to load BPF object");
+
+        *ret_obj = TAKE_PTR(obj);
+
+        return 0;
+}
+
+static int mac_bpf_use(void) {
+        _cleanup_free_ char *lsm_list = NULL;
+        static int cached_use = -1;
+        int r;
+
+        if (cached_use >= 0)
+                return cached_use;
+
+        cached_use = 0;
+
+        r = read_one_line_file("/sys/kernel/security/lsm", &lsm_list);
+        if (r < 0) {
+               if (errno != ENOENT)
+                       log_debug_errno(r, "Failed to read /sys/kernel/security/lsm, ignoring: %m");
+
+               return 0;
+        }
+
+        const char *p = lsm_list;
+
+        for (;;) {
+                _cleanup_free_ char *word = NULL;
+
+                r = extract_first_word(&p, &word, ",", 0);
+                if (r == 0)
+                        break;
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to parse /sys/kernel/security/lsm, ignoring: %m");
+                        return 0;
+                }
+
+                if (streq(word, "bpf")) {
+                        cached_use = 1;
+                        break;
+                }
+        }
+
+        return cached_use;
+}
+
+int lsm_bpf_supported(void) {
+        _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
+        static int supported = -1;
+        int r;
+
+        if (supported >= 0)
+                return supported;
+
+        r = dlopen_bpf();
+        if (r < 0) {
+                log_info_errno(r, "Failed to open libbpf, LSM BPF is not supported: %m");
+                return supported = 0;
+        }
+
+        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+        if (r < 0) {
+                log_warning_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+                return supported = 0;
+        }
+
+        if (r == 0) {
+                log_info_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+                               "Not running with unified cgroup hierarchy, LSM BPF is not supported");
+                return supported = 0;
+        }
+
+        r = mac_bpf_use();
+        if (r < 0) {
+                log_warning_errno(r, "Can't determine whether the BPF LSM module is used: %m");
+                return supported = 0;
+        }
+
+        if (r == 0) {
+                log_info_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+                               "BPF LSM hook not enabled in the kernel, LSM BPF not supported");
+                return supported = 0;
+        }
+
+        r = prepare_restrict_fs_bpf(&obj);
+        if (r < 0)
+                return supported = 0;
+
+        r = bpf_can_link_lsm_program(obj->progs.restrict_filesystems);
+        if (r < 0) {
+                log_warning_errno(r, "Failed to link BPF program. Assuming BPF is not available: %m");
+                return supported = 0;
+        }
+
+        return supported = 1;
+}
+
+int lsm_bpf_setup(Manager *m) {
+        struct restrict_fs_bpf *obj = NULL;
+        _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
+        int r;
+
+        assert(m);
+
+        r = prepare_restrict_fs_bpf(&obj);
+        if (r < 0)
+                return r;
+
+        m->restrict_fs = obj;
+
+        link = sym_bpf_program__attach_lsm(m->restrict_fs->progs.restrict_filesystems);
+        r = sym_libbpf_get_error(link);
+        if (r != 0)
+                return log_error_errno(r, "Failed to link '%s' LSM BPF program: %m",
+                                       sym_bpf_program__name(m->restrict_fs->progs.restrict_filesystems));
+
+        log_info("LSM BPF program attached");
+
+        m->restrict_fs->links.restrict_filesystems = TAKE_PTR(link);
+
+        return 0;
+}
+
+int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, bool allow_list) {
+        int inner_map_fd = -1, outer_map_fd = -1;
+        uint32_t dummy_value = 1, zero = 0;
+        const char *fs;
+        const statfs_f_type_t *magic;
+        int r;
+
+        assert(filesystems);
+        assert(u);
+
+        inner_map_fd = sym_bpf_create_map(
+                        BPF_MAP_TYPE_HASH,
+                        sizeof(uint32_t),
+                        sizeof(uint32_t),
+                        128, /* Should be enough for all filesystem types */
+                        0);
+        if (inner_map_fd < 0)
+                return log_unit_error_errno(u, errno, "Failed to create inner LSM map: %m");
+
+        outer_map_fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
+        if (outer_map_fd < 0)
+                return log_unit_error_errno(u, errno, "Failed to get BPF map fd: %m");
+
+        if (sym_bpf_map_update_elem(outer_map_fd, &u->cgroup_id, &inner_map_fd, BPF_ANY) != 0)
+                return log_unit_error_errno(u, errno, "Error populating LSM BPF map: %m");
+
+        uint32_t allow = allow_list;
+
+        /* Use key 0 to store whether this is an allow list or a deny list */
+        if (sym_bpf_map_update_elem(inner_map_fd, &zero, &allow, BPF_ANY) != 0)
+                return log_unit_error_errno(u, errno, "Error initializing BPF map: %m");
+
+        SET_FOREACH(fs, filesystems) {
+                r = fs_type_from_string(fs, &magic);
+                if (r < 0) {
+                        log_unit_warning(u, "Invalid filesystem name '%s', ignoring.", fs);
+                        continue;
+                }
+
+                log_unit_debug(u, "Restricting filesystem access to '%s'", fs);
+
+                for (int i = 0; i < FILESYSTEM_MAGIC_MAX; i++) {
+                        if (magic[i] == 0)
+                                break;
+
+                        if (sym_bpf_map_update_elem(inner_map_fd, &magic[i], &dummy_value, BPF_ANY) != 0) {
+                                r = log_unit_error_errno(u, errno, "Failed to update BPF map: %m");
+
+                                if (sym_bpf_map_delete_elem(outer_map_fd, &u->cgroup_id) != 0)
+                                        log_unit_debug_errno(u, errno, "Failed to delete cgroup entry from LSM BPF map: %m");
+
+                                return r;
+                        }
+                }
+        }
+
+        return 0;
+}
+
+int lsm_bpf_cleanup(const Unit *u) {
+        int fd = -1;
+
+        assert(u);
+        assert(u->manager);
+
+        if (!lsm_bpf_supported())
+                return 0;
+
+        if (!u->manager->restrict_fs)
+                return 0;
+
+        fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
+        if (fd < 0)
+                return log_unit_error_errno(u, errno, "Failed to get BPF map fd: %m");
+
+        if (sym_bpf_map_delete_elem(fd, &u->cgroup_id) != 0)
+                return log_unit_debug_errno(u, errno, "Failed to delete cgroup entry from LSM BPF map: %m");
+
+        return 0;
+}
+
+int lsm_bpf_map_restrict_fs_fd(Unit *unit) {
+        assert(unit);
+        assert(unit->manager);
+
+        if (!unit->manager->restrict_fs)
+                return -ENOMEDIUM;
+
+        return sym_bpf_map__fd(unit->manager->restrict_fs->maps.cgroup_hash);
+}
+
+void lsm_bpf_destroy(struct restrict_fs_bpf *prog) {
+        restrict_fs_bpf__destroy(prog);
+}
+#else /* ! BPF_FRAMEWORK */
+int lsm_bpf_supported(void) {
+        return 0;
+}
+
+int lsm_bpf_setup(Manager *m) {
+        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to set up LSM BPF: %m");
+}
+
+int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, const bool allow_list) {
+        return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to restrict filesystems using LSM BPF: %m");
+}
+
+int lsm_bpf_cleanup(const Unit *u) {
+        return 0;
+}
+
+int lsm_bpf_map_restrict_fs_fd(Unit *unit) {
+        return -ENOMEDIUM;
+}
+
+void lsm_bpf_destroy(struct restrict_fs_bpf *prog) {
+        return;
+}
+#endif
diff --git a/src/core/bpf-lsm.h b/src/core/bpf-lsm.h
new file mode 100644 (file)
index 0000000..625fb32
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+
+typedef struct Unit Unit;
+typedef struct Manager Manager;
+
+typedef struct restrict_fs_bpf restrict_fs_bpf;
+
+int lsm_bpf_supported(void);
+int lsm_bpf_setup(Manager *m);
+int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, bool allow_list);
+int lsm_bpf_cleanup(const Unit *u);
+int lsm_bpf_map_restrict_fs_fd(Unit *u);
+void lsm_bpf_destroy(struct restrict_fs_bpf *prog);
index a5770c0332807b26171bafa12f21e164a6a58932..155925962226968ed7d5afc7d223f24ae0d92beb 100644 (file)
@@ -2102,6 +2102,8 @@ static int unit_update_cgroup(
 
         bool created, is_root_slice;
         CGroupMask migrate_mask = 0;
+        _cleanup_free_ char *cgroup_full_path = NULL;
+        uint64_t cgroup_id = 0;
         int r;
 
         assert(u);
@@ -2120,6 +2122,18 @@ static int unit_update_cgroup(
                 return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", empty_to_root(u->cgroup_path));
         created = r;
 
+        if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_full_path);
+                if (r == 0) {
+                        r = cg_path_get_cgroupid(cgroup_full_path, &cgroup_id);
+                        if (r < 0)
+                                log_unit_warning_errno(u, r, "Failed to get cgroup ID on cgroup %s, ignoring: %m", cgroup_full_path);
+                } else
+                        log_unit_warning_errno(u, r, "Failed to get full cgroup path on cgroup %s, ignoring: %m", empty_to_root(u->cgroup_path));
+
+                u->cgroup_id = cgroup_id;
+        }
+
         /* Start watching it */
         (void) unit_watch_cgroup(u);
         (void) unit_watch_cgroup_memory(u);
index 97c12ce48f16995ca1448798db0ed59d7abe921f..29ce812121208936cb44146403aaed6ccc4501f3 100644 (file)
@@ -450,6 +450,9 @@ struct Manager {
          * we're a user manager, this object manages the client connection from the user manager to
          * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */
         Varlink *managed_oom_varlink;
+
+        /* Reference to RestrictFileSystems= BPF program */
+        struct restrict_fs_bpf *restrict_fs;
 };
 
 static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
index 62151e1678ed115954af7cdb980f2cc29d0f0a4d..de7c2ae798e7d9f18fc54bbbd2b012607dee39b3 100644 (file)
@@ -13,6 +13,8 @@ libcore_sources = '''
         bpf-firewall.h
         bpf-foreign.c
         bpf-foreign.h
+        bpf-lsm.c
+        bpf-lsm.h
         bpf-socket-bind.c
         bpf-socket-bind.h
         cgroup.c
index c8e99acf703974604e047b4249279f37d5d048ce..0dd6a9591d96c464922d224eaefa1f34ddacb190 100644 (file)
@@ -294,6 +294,7 @@ typedef struct Unit {
 
         /* Counterparts in the cgroup filesystem */
         char *cgroup_path;
+        uint64_t cgroup_id;
         CGroupMask cgroup_realized_mask;           /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
         CGroupMask cgroup_enabled_mask;            /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
         CGroupMask cgroup_invalidated_mask;        /* A mask specifying controllers which shall be considered invalidated, and require re-realization */