]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
cgroups/cgfsng: implement cgroup2 device controller live update
authorChristian Brauner <christian.brauner@ubuntu.com>
Sat, 30 Nov 2019 18:33:19 +0000 (19:33 +0100)
committerChristian Brauner <christian.brauner@ubuntu.com>
Sun, 1 Dec 2019 16:07:23 +0000 (17:07 +0100)
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
src/lxc/cgroups/cgfsng.c
src/lxc/cgroups/cgroup.h
src/lxc/cgroups/cgroup2_devices.c
src/lxc/cgroups/cgroup2_devices.h
src/lxc/commands.c
src/lxc/commands.h
src/lxc/conf.h
src/lxc/log.h

index 4b4a6a92a12d9176b5f74368ab3c8e3adfc66a2a..d41f486a0166d7ec877c04663181d281f29bc926 100644 (file)
@@ -176,6 +176,11 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
        (*clist)[newentry] = copy;
 }
 
+static inline bool pure_unified_layout(const struct cgroup_ops *ops)
+{
+       return ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED;
+}
+
 /* Given a handler's cgroup data, return the struct hierarchy for the controller
  * @c, or NULL if there is none.
  */
@@ -196,8 +201,12 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
                        if (ops->hierarchies[i]->controllers &&
                            !ops->hierarchies[i]->controllers[0])
                                return ops->hierarchies[i];
-
                        continue;
+               } else if (pure_unified_layout(ops) &&
+                          strcmp(controller, "devices") == 0) {
+                       if (ops->unified->bpf_device_controller)
+                               return ops->unified;
+                       break;
                }
 
                if (string_in_list(ops->hierarchies[i]->controllers, controller))
@@ -778,9 +787,9 @@ static char **cg_unified_make_empty_controller(void)
 static char **cg_unified_get_controllers(const char *file)
 {
        __do_free char *buf = NULL;
-       char *tok;
        char *sep = " \t\n";
        char **aret = NULL;
+       char *tok;
 
        buf = read_file(file);
        if (!buf)
@@ -2278,12 +2287,115 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
        return ret;
 }
 
+static int device_cgroup_rule_parse(struct device_item *device, const char *key,
+                                   const char *val)
+{
+       int count, ret;
+       char temp[50];
+
+       if (strcmp("devices.allow", key) == 0)
+               device->allow = 1;
+       else
+               device->allow = 0;
+
+       if (strcmp(val, "a") == 0) {
+               /* global rule */
+               device->type = 'a';
+               device->major = -1;
+               device->minor = -1;
+               device->global_rule = device->allow;
+               device->allow = -1;
+               return 0;
+       } else {
+               device->global_rule = -1;
+       }
+
+       switch (*val) {
+       case 'a':
+               __fallthrough;
+       case 'b':
+               __fallthrough;
+       case 'c':
+               device->type = *val;
+               break;
+       default:
+               return -1;
+       }
+
+       val++;
+       if (!isspace(*val))
+               return -1;
+       val++;
+       if (*val == '*') {
+               device->major = -1;
+               val++;
+       } else if (isdigit(*val)) {
+               memset(temp, 0, sizeof(temp));
+               for (count = 0; count < sizeof(temp) - 1; count++) {
+                       temp[count] = *val;
+                       val++;
+                       if (!isdigit(*val))
+                               break;
+               }
+               ret = lxc_safe_int(temp, &device->major);
+               if (ret)
+                       return -1;
+       } else {
+               return -1;
+       }
+       if (*val != ':')
+               return -1;
+       val++;
+
+       /* read minor */
+       if (*val == '*') {
+               device->minor = -1;
+               val++;
+       } else if (isdigit(*val)) {
+               memset(temp, 0, sizeof(temp));
+               for (count = 0; count < sizeof(temp) - 1; count++) {
+                       temp[count] = *val;
+                       val++;
+                       if (!isdigit(*val))
+                               break;
+               }
+               ret = lxc_safe_int(temp, &device->minor);
+               if (ret)
+                       return -1;
+       } else {
+               return -1;
+       }
+       if (!isspace(*val))
+               return -1;
+       for (val++, count = 0; count < 3; count++, val++) {
+               switch (*val) {
+               case 'r':
+                       device->access[count] = *val;
+                       break;
+               case 'w':
+                       device->access[count] = *val;
+                       break;
+               case 'm':
+                       device->access[count] = *val;
+                       break;
+               case '\n':
+               case '\0':
+                       count = 3;
+                       break;
+               default:
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
 /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.  Here we
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
  */
 __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
-                                    const char *filename, const char *value,
+                                    const char *key, const char *value,
                                     const char *name, const char *lxcpath)
 {
        __do_free char *path = NULL;
@@ -2292,11 +2404,26 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
        struct hierarchy *h;
        int ret = -1;
 
-       controller = must_copy_string(filename);
+       controller = must_copy_string(key);
        p = strchr(controller, '.');
        if (p)
                *p = '\0';
 
+       if (pure_unified_layout(ops) && strcmp(controller, "devices") == 0) {
+               struct device_item device = {0};
+
+               ret = device_cgroup_rule_parse(&device, key, value);
+               if (ret < 0)
+                       return error_log_errno(EINVAL, "Failed to parse device string %s=%s",
+                                              key, value);
+
+               ret = lxc_cmd_add_bpf_device_cgroup(name, lxcpath, &device);
+               if (ret < 0)
+                       return -1;
+
+               return 0;
+       }
+
        path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
        /* not running */
        if (!path)
@@ -2306,7 +2433,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
        if (h) {
                __do_free char *fullpath = NULL;
 
-               fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+               fullpath = build_full_cgpath_from_monitorpath(h, path, key);
                ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
        }
 
@@ -2481,50 +2608,6 @@ out:
        return ret;
 }
 
-static int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device)
-{
-       __do_free struct lxc_list *list_elem = NULL;
-       __do_free struct device_item *new_device = NULL;
-       struct lxc_list *it;
-
-       lxc_list_for_each(it, &conf->devices) {
-               struct device_item *cur = it->elem;
-
-               if (cur->type != device->type)
-                       continue;
-               if (cur->major != device->major)
-                       continue;
-               if (cur->minor != device->minor)
-                       continue;
-               if (strcmp(cur->access, device->access))
-                       continue;
-
-               /*
-                * The rule is switched from allow to deny or vica versa so
-                * don't bother allocating just flip the existing one.
-                */
-               if (cur->allow != device->allow) {
-                       cur->allow = device->allow;
-                       return log_trace(0, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d",
-                                        cur->type, cur->major, cur->minor,
-                                        cur->access, cur->allow);
-               }
-       }
-
-       list_elem = malloc(sizeof(*list_elem));
-       if (!list_elem)
-               return error_log_errno(ENOMEM, "Failed to allocate new device list");
-
-       new_device = memdup(device, sizeof(struct device_item));
-       if (!new_device)
-               return error_log_errno(ENOMEM, "Failed to allocate new device item");
-
-       lxc_list_add_elem(list_elem, move_ptr(new_device));
-       lxc_list_add_tail(&conf->devices, move_ptr(list_elem));
-
-       return 0;
-}
-
 /*
  * Some of the parsing logic comes from the original cgroup device v1
  * implementation in the kernel.
@@ -2535,129 +2618,17 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
 {
 #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
        struct device_item device_item = {0};
-       int count, ret;
-       char temp[50];
-       struct bpf_program *device;
-
-       if (ops->cgroup2_devices) {
-               device = ops->cgroup2_devices;
-       } else {
-               device = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
-               if (device && bpf_program_init(device)) {
-                       ERROR("Failed to initialize bpf program");
-                       return -1;
-               }
-       }
-       if (!device) {
-               ERROR("Failed to create new ebpf device program");
-               return -1;
-       }
-
-       ops->cgroup2_devices = device;
-
-       if (strcmp("devices.allow", key) == 0)
-               device_item.allow = 1;
-
-       if (strcmp(val, "a") == 0) {
-               device->blacklist = (device_item.allow == 1);
-               return 0;
-       }
-
-       switch (*val) {
-       case 'a':
-               __fallthrough;
-       case 'b':
-               __fallthrough;
-       case 'c':
-               device_item.type = *val;
-               break;
-       default:
-               return -1;
-       }
-
-       val++;
-       if (!isspace(*val))
-               return -1;
-       val++;
-       if (*val == '*') {
-               device_item.major = ~0;
-               val++;
-       } else if (isdigit(*val)) {
-               memset(temp, 0, sizeof(temp));
-               for (count = 0; count < sizeof(temp) - 1; count++) {
-                       temp[count] = *val;
-                       val++;
-                       if (!isdigit(*val))
-                               break;
-               }
-               ret = lxc_safe_uint(temp, &device_item.major);
-               if (ret)
-                       return -1;
-       } else {
-               return -1;
-       }
-       if (*val != ':')
-               return -1;
-       val++;
-
-       /* read minor */
-       if (*val == '*') {
-               device_item.minor = ~0;
-               val++;
-       } else if (isdigit(*val)) {
-               memset(temp, 0, sizeof(temp));
-               for (count = 0; count < sizeof(temp) - 1; count++) {
-                       temp[count] = *val;
-                       val++;
-                       if (!isdigit(*val))
-                               break;
-               }
-               ret = lxc_safe_uint(temp, &device_item.minor);
-               if (ret)
-                       return -1;
-       } else {
-               return -1;
-       }
-       if (!isspace(*val))
-               return -1;
-       for (val++, count = 0; count < 3; count++, val++) {
-               switch (*val) {
-               case 'r':
-                       device_item.access[count] = *val;
-                       break;
-               case 'w':
-                       device_item.access[count] = *val;
-                       break;
-               case 'm':
-                       device_item.access[count] = *val;
-                       break;
-               case '\n':
-               case '\0':
-                       count = 3;
-                       break;
-               default:
-                       return -1;
-               }
-       }
+       int ret;
 
-       ret = bpf_program_append_device(device, device_item.type, device_item.major,
-                                       device_item.minor, device_item.access,
-                                       device_item.allow);
-       if (ret) {
-               ERROR("Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
-                     device_item.type, device_item.major, device_item.minor,
-                     device_item.access, device_item.allow);
-               return -1;
-       } else {
-               TRACE("Added new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
-                     device_item.type, device_item.major, device_item.minor,
-                     device_item.access, device_item.allow);
-       }
+       ret = device_cgroup_rule_parse(&device_item, key, val);
+       if (ret < 0)
+               return error_log_errno(EINVAL,
+                                      "Failed to parse device string %s=%s",
+                                      key, val);
 
        ret = bpf_list_add_device(conf, &device_item);
-       if (ret)
+       if (ret < 0)
                return -1;
-
 #endif
        return 0;
 }
@@ -2705,36 +2676,57 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
                                          struct lxc_handler *handler)
 {
 #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+       __do_bpf_program_free struct bpf_program *devices = NULL;
+       struct lxc_conf *conf = handler->conf;
+       struct hierarchy *unified = ops->unified;
        int ret;
-       struct lxc_conf *conf;
-       struct hierarchy *h = ops->unified;
-       struct bpf_program *devices_new = ops->cgroup2_devices;
+       struct lxc_list *it;
+       struct bpf_program *devices_old;
 
-       if (!h)
+       if (!unified)
                return false;
 
-       if (!devices_new)
+       if (lxc_list_empty(&conf->devices))
                return true;
 
-       ret = bpf_program_finalize(devices_new);
+       devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+       if (!devices)
+               return log_error(false, ENOMEM,
+                                "Failed to create new bpf program");
+
+       ret = bpf_program_init(devices);
        if (ret)
-               return false;
+               return log_error(false, ENOMEM,
+                                "Failed to initialize bpf program");
+
+       lxc_list_for_each(it, &conf->devices) {
+               struct device_item *cur = it->elem;
+
+               ret = bpf_program_append_device(devices, cur);
+               if (ret)
+                       return log_error(false,
+                                        ENOMEM, "Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                                        cur->type, cur->major, cur->minor,
+                                        cur->access, cur->allow, cur->global_rule);
+               TRACE("Added rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                     cur->type, cur->major, cur->minor, cur->access,
+                     cur->allow, cur->global_rule);
+       }
+
+       ret = bpf_program_finalize(devices);
+       if (ret)
+               return log_error(false, ENOMEM, "Failed to finalize bpf program");
 
-       ret = bpf_program_cgroup_attach(devices_new, BPF_CGROUP_DEVICE,
-                                       h->container_full_path,
+       ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
+                                       unified->container_full_path,
                                        BPF_F_ALLOW_MULTI);
        if (ret)
-               return false;
+               return log_error(false, ENOMEM, "Failed to attach bpf program");
 
        /* Replace old bpf program. */
-       conf = handler->conf;
-       if (conf->cgroup2_devices) {
-               struct bpf_program *old_devices;
-
-               old_devices = move_ptr(conf->cgroup2_devices);
-               conf->cgroup2_devices = move_ptr(ops->cgroup2_devices);
-               bpf_program_free(old_devices);
-       }
+       devices_old = move_ptr(conf->cgroup2_devices);
+       conf->cgroup2_devices = move_ptr(devices);
+       devices = move_ptr(devices_old);
 #endif
        return true;
 }
@@ -3045,6 +3037,9 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
        if (!unprivileged)
                cg_unified_delegate(&new->cgroup2_chown);
 
+       if (bpf_devices_cgroup_supported())
+               new->bpf_device_controller = 1;
+
        ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
        ops->unified = new;
        return CGROUP2_SUPER_MAGIC;
index 47a1550079feba7b218d11bdef82748021c5854f..a3eb46b9d6937fe2576b0c132a796e30aeaef9a0 100644 (file)
@@ -92,6 +92,7 @@ struct hierarchy {
        char *container_full_path;
        char *monitor_full_path;
        int version;
+       int bpf_device_controller:1;
 };
 
 struct cgroup_ops {
index 762fd14f6d4f3c69b736fde9c2d0b3e4cfc2d8e0..52c1860f51e8c90e3c8e94f72c4ebdc454f5fb5d 100644 (file)
@@ -195,33 +195,38 @@ int bpf_program_init(struct bpf_program *prog)
        return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn));
 }
 
-int bpf_program_append_device(struct bpf_program *prog, char type, int major,
-                             int minor, const char *access, int allow)
+int bpf_program_append_device(struct bpf_program *prog, struct device_item *device)
 {
        int ret;
        int jump_nr = 1;
        struct bpf_insn bpf_access_decision[] = {
-           BPF_MOV64_IMM(BPF_REG_0, allow),
+           BPF_MOV64_IMM(BPF_REG_0, device->allow),
            BPF_EXIT_INSN(),
        };
        int access_mask;
        int device_type;
 
-       device_type = bpf_device_type(type);
+       /* This is a global rule so no need to append anything. */
+       if (device->global_rule >= 0) {
+               prog->blacklist = device->global_rule;
+               return 0;
+       }
+
+       device_type = bpf_device_type(device->type);
        if (device_type < 0)
-               return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", type);
+               return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", device->type);
 
        if (device_type > 0)
                jump_nr++;
 
-       access_mask = bpf_access_mask(access);
+       access_mask = bpf_access_mask(device->access);
        if (!bpf_device_all_access(access_mask))
                jump_nr += 3;
 
-       if (major >= 0)
+       if (device->major != -1)
                jump_nr++;
 
-       if (minor >= 0)
+       if (device->minor != -1)
                jump_nr++;
 
        if (device_type > 0) {
@@ -247,9 +252,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
                        return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
        }
 
-       if (major >= 0) {
+       if (device->major >= 0) {
                struct bpf_insn ins[] = {
-                   BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, jump_nr--),
+                   BPF_JMP_IMM(BPF_JNE, BPF_REG_4, device->major, jump_nr--),
                };
 
                ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
@@ -257,9 +262,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
                        return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
        }
 
-       if (minor >= 0) {
+       if (device->minor >= 0) {
                struct bpf_insn ins[] = {
-                   BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, jump_nr--),
+                   BPF_JMP_IMM(BPF_JNE, BPF_REG_5, device->minor, jump_nr--),
                };
 
                ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
@@ -411,4 +416,94 @@ void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
                (void)bpf_program_free(conf->cgroup2_devices);
        }
 }
+
+int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device)
+{
+       __do_free struct lxc_list *list_elem = NULL;
+       __do_free struct device_item *new_device = NULL;
+       struct lxc_list *it;
+
+       lxc_list_for_each(it, &conf->devices) {
+               struct device_item *cur = it->elem;
+
+               if (cur->global_rule != -1 && device->global_rule != -1) {
+                       TRACE("Switched from %s to %s",
+                             cur->global_rule == 0 ? "whitelist" : "blacklist",
+                             device->global_rule == 0 ? "whitelist"
+                                                      : "blacklist");
+                       cur->global_rule = device->global_rule;
+                       return 1;
+               }
+
+               if (cur->type != device->type)
+                       continue;
+               if (cur->major != device->major)
+                       continue;
+               if (cur->minor != device->minor)
+                       continue;
+               if (strcmp(cur->access, device->access))
+                       continue;
+
+               /*
+                * The rule is switched from allow to deny or vica versa so
+                * don't bother allocating just flip the existing one.
+                */
+               if (cur->allow != device->allow) {
+                       cur->allow = device->allow;
+                       return log_trace(0, "Switched existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                                        cur->type, cur->major, cur->minor,
+                                        cur->access, cur->allow,
+                                        cur->global_rule);
+               }
+
+               return log_trace(1, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                                cur->type, cur->major, cur->minor, cur->access,
+                                cur->allow, cur->global_rule);
+       }
+
+       list_elem = malloc(sizeof(*list_elem));
+       if (!list_elem)
+               return error_log_errno(ENOMEM, "Failed to allocate new device list");
+
+       new_device = memdup(device, sizeof(struct device_item));
+       if (!new_device)
+               return error_log_errno(ENOMEM, "Failed to allocate new device item");
+
+       lxc_list_add_elem(list_elem, move_ptr(new_device));
+       lxc_list_add_tail(&conf->devices, move_ptr(list_elem));
+
+       return 0;
+}
+
+bool bpf_devices_cgroup_supported(void)
+{
+       const struct bpf_insn dummy[] = {
+           BPF_MOV64_IMM(BPF_REG_0, 1),
+           BPF_EXIT_INSN(),
+       };
+
+       __do_bpf_program_free struct bpf_program *prog = NULL;
+       int ret;
+
+       if (geteuid() != 0)
+               return log_error(false, EINVAL,
+                                "The bpf device cgroup requires real root");
+
+       prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+       if (prog < 0)
+               return log_error(false,
+                                errno, "Failed to allocate new bpf device cgroup program");
+
+       ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy));
+       if (ret < 0)
+               return log_error(false,
+                                errno, "Failed to add new instructions to bpf device cgroup program");
+
+       ret = bpf_program_load_kernel(prog, NULL, 0);
+       if (ret < 0)
+               return log_error(false,
+                                errno, "Failed to load new bpf device cgroup program");
+
+       return log_trace(true, "The bpf device cgroup is supported");
+}
 #endif
index afcc6b937698d6fcb1aec45f5972bbbfabe8d20e..a02735a1ab921a2b045833e0399d716f3abc0bd6 100644 (file)
@@ -5,6 +5,7 @@
 #ifndef __LXC_CGROUP2_DEVICES_H
 #define __LXC_CGROUP2_DEVICES_H
 
+#include <errno.h>
 #include <fcntl.h>
 #include <stdbool.h>
 #include <stddef.h>
@@ -79,53 +80,61 @@ struct bpf_program {
 #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
 struct bpf_program *bpf_program_new(uint32_t prog_type);
 int bpf_program_init(struct bpf_program *prog);
-int bpf_program_append_device(struct bpf_program *prog, char type, int major,
-                             int minor, const char *access, int allow);
+int bpf_program_append_device(struct bpf_program *prog,
+                             struct device_item *device);
 int bpf_program_finalize(struct bpf_program *prog);
 int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
                              const char *path, uint32_t flags);
 int bpf_program_cgroup_detach(struct bpf_program *prog);
 void bpf_program_free(struct bpf_program *prog);
 void lxc_clear_cgroup2_devices(struct lxc_conf *conf);
-static inline void __do_bpf_program_free(struct bpf_program **prog)
+bool bpf_devices_cgroup_supported(void);
+static inline void __auto_bpf_program_free__(struct bpf_program **prog)
 {
        if (*prog) {
                bpf_program_free(*prog);
                *prog = NULL;
        }
 }
+int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device);
 #else
 static inline struct bpf_program *bpf_program_new(uint32_t prog_type)
 {
+       errno = ENOSYS;
        return NULL;
 }
 
 static inline int bpf_program_init(struct bpf_program *prog)
 {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
 }
 
 static inline int bpf_program_append_device(struct bpf_program *prog, char type,
                                            int major, int minor,
                                            const char *access, int allow)
 {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
 }
 
 static inline int bpf_program_finalize(struct bpf_program *prog)
 {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
 }
 
 static inline int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
                                            const char *path, uint32_t flags)
 {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
 }
 
 static inline int bpf_program_cgroup_detach(struct bpf_program *prog)
 {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
 }
 
 static inline void bpf_program_free(struct bpf_program *prog)
@@ -136,9 +145,24 @@ static inline void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
 {
 }
 
-static inline void __do_bpf_program_free(struct bpf_program **prog)
+static inline bool bpf_devices_cgroup_supported(void)
+{
+       return false;
+}
+
+static inline void __auto_bpf_program_free__(struct bpf_program **prog)
+{
+}
+
+static inline int bpf_list_add_device(struct lxc_conf *conf,
+                                     struct device_item *device)
 {
+       errno = ENOSYS;
+       return -1;
 }
 #endif
 
+#define __do_bpf_program_free \
+       __attribute__((__cleanup__(__auto_bpf_program_free__)))
+
 #endif /* __LXC_CGROUP2_DEVICES_H */
index 90e3c5863d41f13b217c76fef4fd8d9aab65f96b..f4920c7846a5137bd17f14a160e8870378c12912 100644 (file)
@@ -39,6 +39,7 @@
 
 #include "af_unix.h"
 #include "cgroup.h"
+#include "cgroups/cgroup2_devices.h"
 #include "commands.h"
 #include "commands_utils.h"
 #include "conf.h"
@@ -85,20 +86,21 @@ lxc_log_define(commands, lxc);
 static const char *lxc_cmd_str(lxc_cmd_t cmd)
 {
        static const char *const cmdname[LXC_CMD_MAX] = {
-               [LXC_CMD_CONSOLE]             = "console",
-               [LXC_CMD_TERMINAL_WINCH]      = "terminal_winch",
-               [LXC_CMD_STOP]                = "stop",
-               [LXC_CMD_GET_STATE]           = "get_state",
-               [LXC_CMD_GET_INIT_PID]        = "get_init_pid",
-               [LXC_CMD_GET_CLONE_FLAGS]     = "get_clone_flags",
-               [LXC_CMD_GET_CGROUP]          = "get_cgroup",
-               [LXC_CMD_GET_CONFIG_ITEM]     = "get_config_item",
-               [LXC_CMD_GET_NAME]            = "get_name",
-               [LXC_CMD_GET_LXCPATH]         = "get_lxcpath",
-               [LXC_CMD_ADD_STATE_CLIENT]    = "add_state_client",
-               [LXC_CMD_CONSOLE_LOG]         = "console_log",
-               [LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients",
-               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener",
+               [LXC_CMD_CONSOLE]                       = "console",
+               [LXC_CMD_TERMINAL_WINCH]                = "terminal_winch",
+               [LXC_CMD_STOP]                          = "stop",
+               [LXC_CMD_GET_STATE]                     = "get_state",
+               [LXC_CMD_GET_INIT_PID]                  = "get_init_pid",
+               [LXC_CMD_GET_CLONE_FLAGS]               = "get_clone_flags",
+               [LXC_CMD_GET_CGROUP]                    = "get_cgroup",
+               [LXC_CMD_GET_CONFIG_ITEM]               = "get_config_item",
+               [LXC_CMD_GET_NAME]                      = "get_name",
+               [LXC_CMD_GET_LXCPATH]                   = "get_lxcpath",
+               [LXC_CMD_ADD_STATE_CLIENT]              = "add_state_client",
+               [LXC_CMD_CONSOLE_LOG]                   = "console_log",
+               [LXC_CMD_SERVE_STATE_CLIENTS]           = "serve_state_clients",
+               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER]   = "seccomp_notify_add_listener",
+               [LXC_CMD_ADD_BPF_DEVICE_CGROUP]         = "add_bpf_device_cgroup",
        };
 
        if (cmd >= LXC_CMD_MAX)
@@ -925,6 +927,118 @@ reap_client_fd:
        return 1;
 }
 
+int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
+                                 struct device_item *device)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+       int stopped = 0;
+       struct lxc_cmd_rr cmd = {
+           .req = {
+               .cmd     = LXC_CMD_ADD_BPF_DEVICE_CGROUP,
+               .data    = device,
+               .datalen = sizeof(struct device_item),
+           },
+       };
+       int ret;
+
+       if (strlen(device->access) > STRLITERALLEN("rwm"))
+               return error_log_errno(EINVAL, "Invalid access mode specified %s",
+                                      device->access);
+
+       ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
+       if (ret < 0 || cmd.rsp.ret < 0)
+               return error_log_errno(errno, "Failed to add new bpf device cgroup rule");
+
+       return 0;
+#else
+       return minus_one_set_errno(ENOSYS);
+#endif
+}
+
+static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *req,
+                                                 struct lxc_handler *handler,
+                                                 struct lxc_epoll_descr *descr)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+       __do_bpf_program_free struct bpf_program *devices = NULL;
+       struct lxc_cmd_rsp rsp = {0};
+       struct lxc_conf *conf = handler->conf;
+       struct hierarchy *unified = handler->cgroup_ops->unified;
+       struct lxc_list *list_elem = NULL;
+       struct device_item *new_device = NULL;
+       int ret;
+       struct lxc_list *it;
+       struct device_item *device;
+       struct bpf_program *devices_old;
+
+       if (req->datalen <= 0)
+               goto reap_client_fd;
+
+       if (req->datalen != sizeof(struct device_item))
+               goto reap_client_fd;
+
+       if (!req->data)
+               goto reap_client_fd;
+       device = (struct device_item *)req->data;
+
+       rsp.ret = -1;
+       if (!unified)
+               goto respond;
+
+       ret = bpf_list_add_device(conf, device);
+       if (ret < 0)
+               goto respond;
+
+       devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+       if (!devices)
+               goto respond;
+
+       ret = bpf_program_init(devices);
+       if (ret)
+               goto respond;
+
+       lxc_list_for_each(it, &conf->devices) {
+               struct device_item *cur = it->elem;
+
+               ret = bpf_program_append_device(devices, cur);
+               if (ret)
+                       goto respond;
+       }
+
+       ret = bpf_program_finalize(devices);
+       if (ret)
+               goto respond;
+
+       ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
+                                       unified->container_full_path,
+                                       BPF_F_ALLOW_MULTI);
+       if (ret)
+               goto respond;
+
+       /* Replace old bpf program. */
+       devices_old = move_ptr(conf->cgroup2_devices);
+       conf->cgroup2_devices = move_ptr(devices);
+       devices = move_ptr(devices_old);
+
+       rsp.ret = 0;
+
+respond:
+       ret = lxc_cmd_rsp_send(fd, &rsp);
+       if (ret < 0)
+               goto reap_client_fd;
+
+       return 0;
+
+reap_client_fd:
+       /* Special indicator to lxc_cmd_handler() to close the fd and do related
+        * cleanup.
+        */
+       return 1;
+#else
+       return minus_one_set_errno(ENOSYS);
+#endif
+}
+
 int lxc_cmd_console_log(const char *name, const char *lxcpath,
                        struct lxc_console_log *log)
 {
@@ -1123,20 +1237,21 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
                                struct lxc_epoll_descr *);
 
        callback cb[LXC_CMD_MAX] = {
-               [LXC_CMD_CONSOLE]                     = lxc_cmd_console_callback,
-               [LXC_CMD_TERMINAL_WINCH]              = lxc_cmd_terminal_winch_callback,
-               [LXC_CMD_STOP]                        = lxc_cmd_stop_callback,
-               [LXC_CMD_GET_STATE]                   = lxc_cmd_get_state_callback,
-               [LXC_CMD_GET_INIT_PID]                = lxc_cmd_get_init_pid_callback,
-               [LXC_CMD_GET_CLONE_FLAGS]             = lxc_cmd_get_clone_flags_callback,
-               [LXC_CMD_GET_CGROUP]                  = lxc_cmd_get_cgroup_callback,
-               [LXC_CMD_GET_CONFIG_ITEM]             = lxc_cmd_get_config_item_callback,
-               [LXC_CMD_GET_NAME]                    = lxc_cmd_get_name_callback,
-               [LXC_CMD_GET_LXCPATH]                 = lxc_cmd_get_lxcpath_callback,
-               [LXC_CMD_ADD_STATE_CLIENT]            = lxc_cmd_add_state_client_callback,
-               [LXC_CMD_CONSOLE_LOG]                 = lxc_cmd_console_log_callback,
-               [LXC_CMD_SERVE_STATE_CLIENTS]         = lxc_cmd_serve_state_clients_callback,
-               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = lxc_cmd_seccomp_notify_add_listener_callback,
+               [LXC_CMD_CONSOLE]                       = lxc_cmd_console_callback,
+               [LXC_CMD_TERMINAL_WINCH]                = lxc_cmd_terminal_winch_callback,
+               [LXC_CMD_STOP]                          = lxc_cmd_stop_callback,
+               [LXC_CMD_GET_STATE]                     = lxc_cmd_get_state_callback,
+               [LXC_CMD_GET_INIT_PID]                  = lxc_cmd_get_init_pid_callback,
+               [LXC_CMD_GET_CLONE_FLAGS]               = lxc_cmd_get_clone_flags_callback,
+               [LXC_CMD_GET_CGROUP]                    = lxc_cmd_get_cgroup_callback,
+               [LXC_CMD_GET_CONFIG_ITEM]               = lxc_cmd_get_config_item_callback,
+               [LXC_CMD_GET_NAME]                      = lxc_cmd_get_name_callback,
+               [LXC_CMD_GET_LXCPATH]                   = lxc_cmd_get_lxcpath_callback,
+               [LXC_CMD_ADD_STATE_CLIENT]              = lxc_cmd_add_state_client_callback,
+               [LXC_CMD_CONSOLE_LOG]                   = lxc_cmd_console_log_callback,
+               [LXC_CMD_SERVE_STATE_CLIENTS]           = lxc_cmd_serve_state_clients_callback,
+               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER]   = lxc_cmd_seccomp_notify_add_listener_callback,
+               [LXC_CMD_ADD_BPF_DEVICE_CGROUP]         = lxc_cmd_add_bpf_device_cgroup_callback,
        };
 
        if (req->cmd >= LXC_CMD_MAX) {
index d7d0c6096aa2d77f24490a69f5ef13f96428dc51..008b7c30e24dd718f85ffb8cd4add564b4475c8d 100644 (file)
@@ -47,6 +47,7 @@ typedef enum {
        LXC_CMD_CONSOLE_LOG,
        LXC_CMD_SERVE_STATE_CLIENTS,
        LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER,
+       LXC_CMD_ADD_BPF_DEVICE_CGROUP,
        LXC_CMD_MAX,
 } lxc_cmd_t;
 
@@ -131,4 +132,8 @@ extern int lxc_cmd_seccomp_notify_add_listener(const char *name,
                                               /* unused */ unsigned int command,
                                               /* unused */ unsigned int flags);
 
+struct device_item;
+extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
+                                        struct device_item *device);
+
 #endif /* __commands_h */
index 44d7934fe4bf55980cfae9f2b788601856ff49e3..9142d31710e550d9f06265e02b8c75c395a0c483 100644 (file)
@@ -236,6 +236,11 @@ struct device_item {
        int minor;
        char access[4];
        int allow;
+       /* -1 -> no global rule
+        *  0 -> whitelist (deny all)
+        *  1 -> blacklist (allow all)
+        */
+       int global_rule;
 };
 
 struct lxc_conf {
index 8e459196436445fa6b9330f1ab7dbc604654c496..c6b2be2d6e738f184a5bdbe7728a28899428e315 100644 (file)
@@ -518,6 +518,13 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo,        \
                __ret__;                      \
        })
 
+#define log_error(__ret__, __errno__, format, ...) \
+       ({                                         \
+               errno = __errno__;                 \
+               SYSERROR(format, ##__VA_ARGS__);   \
+               __ret__;                           \
+       })
+
 extern int lxc_log_fd;
 
 extern int lxc_log_syslog(int facility);