cgroups/cgfsng: implement cgroup2 device controller live update

author Christian Brauner <christian.brauner@ubuntu.com>

Sat, 30 Nov 2019 18:33:19 +0000 (19:33 +0100)

committer Christian Brauner <christian.brauner@ubuntu.com>

Sun, 1 Dec 2019 16:07:23 +0000 (17:07 +0100)
author Christian Brauner <christian.brauner@ubuntu.com>
Sat, 30 Nov 2019 18:33:19 +0000 (19:33 +0100)
committer Christian Brauner <christian.brauner@ubuntu.com>
Sun, 1 Dec 2019 16:07:23 +0000 (17:07 +0100)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index 4b4a6a92a12d9176b5f74368ab3c8e3adfc66a2a..d41f486a0166d7ec877c04663181d281f29bc926 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -176,6 +176,11 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
         (*clist)[newentry] = copy;
  }
  
+static inline bool pure_unified_layout(const struct cgroup_ops *ops)
+{
+       return ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED;
+}
+
  /* Given a handler's cgroup data, return the struct hierarchy for the controller
   * @c, or NULL if there is none.
   */
@@ -196,8 +201,12 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
                         if (ops->hierarchies[i]->controllers &&
                             !ops->hierarchies[i]->controllers[0])
                                 return ops->hierarchies[i];
-
                         continue;
+               } else if (pure_unified_layout(ops) &&
+                          strcmp(controller, "devices") == 0) {
+                       if (ops->unified->bpf_device_controller)
+                               return ops->unified;
+                       break;
                 }
  
                 if (string_in_list(ops->hierarchies[i]->controllers, controller))
@@ -778,9 +787,9 @@ static char **cg_unified_make_empty_controller(void)
  static char **cg_unified_get_controllers(const char *file)
  {
         __do_free char *buf = NULL;
-       char *tok;
         char *sep = " \t\n";
         char **aret = NULL;
+       char *tok;
  
         buf = read_file(file);
         if (!buf)
@@ -2278,12 +2287,115 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
         return ret;
  }
  
+static int device_cgroup_rule_parse(struct device_item *device, const char *key,
+                                   const char *val)
+{
+       int count, ret;
+       char temp[50];
+
+       if (strcmp("devices.allow", key) == 0)
+               device->allow = 1;
+       else
+               device->allow = 0;
+
+       if (strcmp(val, "a") == 0) {
+               /* global rule */
+               device->type = 'a';
+               device->major = -1;
+               device->minor = -1;
+               device->global_rule = device->allow;
+               device->allow = -1;
+               return 0;
+       } else {
+               device->global_rule = -1;
+       }
+
+       switch (*val) {
+       case 'a':
+               __fallthrough;
+       case 'b':
+               __fallthrough;
+       case 'c':
+               device->type = *val;
+               break;
+       default:
+               return -1;
+       }
+
+       val++;
+       if (!isspace(*val))
+               return -1;
+       val++;
+       if (*val == '*') {
+               device->major = -1;
+               val++;
+       } else if (isdigit(*val)) {
+               memset(temp, 0, sizeof(temp));
+               for (count = 0; count < sizeof(temp) - 1; count++) {
+                       temp[count] = *val;
+                       val++;
+                       if (!isdigit(*val))
+                               break;
+               }
+               ret = lxc_safe_int(temp, &device->major);
+               if (ret)
+                       return -1;
+       } else {
+               return -1;
+       }
+       if (*val != ':')
+               return -1;
+       val++;
+
+       /* read minor */
+       if (*val == '*') {
+               device->minor = -1;
+               val++;
+       } else if (isdigit(*val)) {
+               memset(temp, 0, sizeof(temp));
+               for (count = 0; count < sizeof(temp) - 1; count++) {
+                       temp[count] = *val;
+                       val++;
+                       if (!isdigit(*val))
+                               break;
+               }
+               ret = lxc_safe_int(temp, &device->minor);
+               if (ret)
+                       return -1;
+       } else {
+               return -1;
+       }
+       if (!isspace(*val))
+               return -1;
+       for (val++, count = 0; count < 3; count++, val++) {
+               switch (*val) {
+               case 'r':
+                       device->access[count] = *val;
+                       break;
+               case 'w':
+                       device->access[count] = *val;
+                       break;
+               case 'm':
+                       device->access[count] = *val;
+                       break;
+               case '\n':
+               case '\0':
+                       count = 3;
+                       break;
+               default:
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
  /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.  Here we
   * don't have a cgroup_data set up, so we ask the running container through the
   * commands API for the cgroup path.
   */
  __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
-                                    const char *filename, const char *value,
+                                    const char *key, const char *value,
                                      const char *name, const char *lxcpath)
  {
         __do_free char *path = NULL;
@@ -2292,11 +2404,26 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
         struct hierarchy *h;
         int ret = -1;
  
-       controller = must_copy_string(filename);
+       controller = must_copy_string(key);
         p = strchr(controller, '.');
         if (p)
                 *p = '\0';
  
+       if (pure_unified_layout(ops) && strcmp(controller, "devices") == 0) {
+               struct device_item device = {0};
+
+               ret = device_cgroup_rule_parse(&device, key, value);
+               if (ret < 0)
+                       return error_log_errno(EINVAL, "Failed to parse device string %s=%s",
+                                              key, value);
+
+               ret = lxc_cmd_add_bpf_device_cgroup(name, lxcpath, &device);
+               if (ret < 0)
+                       return -1;
+
+               return 0;
+       }
+
         path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
         /* not running */
         if (!path)
@@ -2306,7 +2433,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
         if (h) {
                 __do_free char *fullpath = NULL;
  
-               fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+               fullpath = build_full_cgpath_from_monitorpath(h, path, key);
                 ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
         }
  
@@ -2481,50 +2608,6 @@ out:
         return ret;
  }
  
-static int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device)
-{
-       __do_free struct lxc_list *list_elem = NULL;
-       __do_free struct device_item *new_device = NULL;
-       struct lxc_list *it;
-
-       lxc_list_for_each(it, &conf->devices) {
-               struct device_item *cur = it->elem;
-
-               if (cur->type != device->type)
-                       continue;
-               if (cur->major != device->major)
-                       continue;
-               if (cur->minor != device->minor)
-                       continue;
-               if (strcmp(cur->access, device->access))
-                       continue;
-
-               /*
-                * The rule is switched from allow to deny or vica versa so
-                * don't bother allocating just flip the existing one.
-                */
-               if (cur->allow != device->allow) {
-                       cur->allow = device->allow;
-                       return log_trace(0, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d",
-                                        cur->type, cur->major, cur->minor,
-                                        cur->access, cur->allow);
-               }
-       }
-
-       list_elem = malloc(sizeof(*list_elem));
-       if (!list_elem)
-               return error_log_errno(ENOMEM, "Failed to allocate new device list");
-
-       new_device = memdup(device, sizeof(struct device_item));
-       if (!new_device)
-               return error_log_errno(ENOMEM, "Failed to allocate new device item");
-
-       lxc_list_add_elem(list_elem, move_ptr(new_device));
-       lxc_list_add_tail(&conf->devices, move_ptr(list_elem));
-
-       return 0;
-}
-
  /*
   * Some of the parsing logic comes from the original cgroup device v1
   * implementation in the kernel.
@@ -2535,129 +2618,17 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
  {
  #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
         struct device_item device_item = {0};
-       int count, ret;
-       char temp[50];
-       struct bpf_program *device;
-
-       if (ops->cgroup2_devices) {
-               device = ops->cgroup2_devices;
-       } else {
-               device = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
-               if (device && bpf_program_init(device)) {
-                       ERROR("Failed to initialize bpf program");
-                       return -1;
-               }
-       }
-       if (!device) {
-               ERROR("Failed to create new ebpf device program");
-               return -1;
-       }
-
-       ops->cgroup2_devices = device;
-
-       if (strcmp("devices.allow", key) == 0)
-               device_item.allow = 1;
-
-       if (strcmp(val, "a") == 0) {
-               device->blacklist = (device_item.allow == 1);
-               return 0;
-       }
-
-       switch (*val) {
-       case 'a':
-               __fallthrough;
-       case 'b':
-               __fallthrough;
-       case 'c':
-               device_item.type = *val;
-               break;
-       default:
-               return -1;
-       }
-
-       val++;
-       if (!isspace(*val))
-               return -1;
-       val++;
-       if (*val == '*') {
-               device_item.major = ~0;
-               val++;
-       } else if (isdigit(*val)) {
-               memset(temp, 0, sizeof(temp));
-               for (count = 0; count < sizeof(temp) - 1; count++) {
-                       temp[count] = *val;
-                       val++;
-                       if (!isdigit(*val))
-                               break;
-               }
-               ret = lxc_safe_uint(temp, &device_item.major);
-               if (ret)
-                       return -1;
-       } else {
-               return -1;
-       }
-       if (*val != ':')
-               return -1;
-       val++;
-
-       /* read minor */
-       if (*val == '*') {
-               device_item.minor = ~0;
-               val++;
-       } else if (isdigit(*val)) {
-               memset(temp, 0, sizeof(temp));
-               for (count = 0; count < sizeof(temp) - 1; count++) {
-                       temp[count] = *val;
-                       val++;
-                       if (!isdigit(*val))
-                               break;
-               }
-               ret = lxc_safe_uint(temp, &device_item.minor);
-               if (ret)
-                       return -1;
-       } else {
-               return -1;
-       }
-       if (!isspace(*val))
-               return -1;
-       for (val++, count = 0; count < 3; count++, val++) {
-               switch (*val) {
-               case 'r':
-                       device_item.access[count] = *val;
-                       break;
-               case 'w':
-                       device_item.access[count] = *val;
-                       break;
-               case 'm':
-                       device_item.access[count] = *val;
-                       break;
-               case '\n':
-               case '\0':
-                       count = 3;
-                       break;
-               default:
-                       return -1;
-               }
-       }
+       int ret;
  
-       ret = bpf_program_append_device(device, device_item.type, device_item.major,
-                                       device_item.minor, device_item.access,
-                                       device_item.allow);
-       if (ret) {
-               ERROR("Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
-                     device_item.type, device_item.major, device_item.minor,
-                     device_item.access, device_item.allow);
-               return -1;
-       } else {
-               TRACE("Added new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
-                     device_item.type, device_item.major, device_item.minor,
-                     device_item.access, device_item.allow);
-       }
+       ret = device_cgroup_rule_parse(&device_item, key, val);
+       if (ret < 0)
+               return error_log_errno(EINVAL,
+                                      "Failed to parse device string %s=%s",
+                                      key, val);
  
         ret = bpf_list_add_device(conf, &device_item);
-       if (ret)
+       if (ret < 0)
                 return -1;
-
  #endif
         return 0;
  }
@@ -2705,36 +2676,57 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
                                           struct lxc_handler *handler)
  {
  #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+       __do_bpf_program_free struct bpf_program *devices = NULL;
+       struct lxc_conf *conf = handler->conf;
+       struct hierarchy *unified = ops->unified;
         int ret;
-       struct lxc_conf *conf;
-       struct hierarchy *h = ops->unified;
-       struct bpf_program *devices_new = ops->cgroup2_devices;
+       struct lxc_list *it;
+       struct bpf_program *devices_old;
  
-       if (!h)
+       if (!unified)
                 return false;
  
-       if (!devices_new)
+       if (lxc_list_empty(&conf->devices))
                 return true;
  
-       ret = bpf_program_finalize(devices_new);
+       devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+       if (!devices)
+               return log_error(false, ENOMEM,
+                                "Failed to create new bpf program");
+
+       ret = bpf_program_init(devices);
         if (ret)
-               return false;
+               return log_error(false, ENOMEM,
+                                "Failed to initialize bpf program");
+
+       lxc_list_for_each(it, &conf->devices) {
+               struct device_item *cur = it->elem;
+
+               ret = bpf_program_append_device(devices, cur);
+               if (ret)
+                       return log_error(false,
+                                        ENOMEM, "Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                                        cur->type, cur->major, cur->minor,
+                                        cur->access, cur->allow, cur->global_rule);
+               TRACE("Added rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                     cur->type, cur->major, cur->minor, cur->access,
+                     cur->allow, cur->global_rule);
+       }
+
+       ret = bpf_program_finalize(devices);
+       if (ret)
+               return log_error(false, ENOMEM, "Failed to finalize bpf program");
  
-       ret = bpf_program_cgroup_attach(devices_new, BPF_CGROUP_DEVICE,
-                                       h->container_full_path,
+       ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
+                                       unified->container_full_path,
                                         BPF_F_ALLOW_MULTI);
         if (ret)
-               return false;
+               return log_error(false, ENOMEM, "Failed to attach bpf program");
  
         /* Replace old bpf program. */
-       conf = handler->conf;
-       if (conf->cgroup2_devices) {
-               struct bpf_program *old_devices;
-
-               old_devices = move_ptr(conf->cgroup2_devices);
-               conf->cgroup2_devices = move_ptr(ops->cgroup2_devices);
-               bpf_program_free(old_devices);
-       }
+       devices_old = move_ptr(conf->cgroup2_devices);
+       conf->cgroup2_devices = move_ptr(devices);
+       devices = move_ptr(devices_old);
  #endif
         return true;
  }
@@ -3045,6 +3037,9 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
         if (!unprivileged)
                 cg_unified_delegate(&new->cgroup2_chown);
  
+       if (bpf_devices_cgroup_supported())
+               new->bpf_device_controller = 1;
+
         ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
         ops->unified = new;
         return CGROUP2_SUPER_MAGIC;
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h

index 47a1550079feba7b218d11bdef82748021c5854f..a3eb46b9d6937fe2576b0c132a796e30aeaef9a0 100644 (file)
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -92,6 +92,7 @@ struct hierarchy {
         char *container_full_path;
         char *monitor_full_path;
         int version;
+       int bpf_device_controller:1;
  };
  
  struct cgroup_ops {
diff --git a/src/lxc/cgroups/cgroup2_devices.c b/src/lxc/cgroups/cgroup2_devices.c

index 762fd14f6d4f3c69b736fde9c2d0b3e4cfc2d8e0..52c1860f51e8c90e3c8e94f72c4ebdc454f5fb5d 100644 (file)
--- a/src/lxc/cgroups/cgroup2_devices.c
+++ b/src/lxc/cgroups/cgroup2_devices.c
@@ -195,33 +195,38 @@ int bpf_program_init(struct bpf_program *prog)
         return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn));
  }
  
-int bpf_program_append_device(struct bpf_program *prog, char type, int major,
-                             int minor, const char *access, int allow)
+int bpf_program_append_device(struct bpf_program *prog, struct device_item *device)
  {
         int ret;
         int jump_nr = 1;
         struct bpf_insn bpf_access_decision[] = {
-           BPF_MOV64_IMM(BPF_REG_0, allow),
+           BPF_MOV64_IMM(BPF_REG_0, device->allow),
             BPF_EXIT_INSN(),
         };
         int access_mask;
         int device_type;
  
-       device_type = bpf_device_type(type);
+       /* This is a global rule so no need to append anything. */
+       if (device->global_rule >= 0) {
+               prog->blacklist = device->global_rule;
+               return 0;
+       }
+
+       device_type = bpf_device_type(device->type);
         if (device_type < 0)
-               return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", type);
+               return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", device->type);
  
         if (device_type > 0)
                 jump_nr++;
  
-       access_mask = bpf_access_mask(access);
+       access_mask = bpf_access_mask(device->access);
         if (!bpf_device_all_access(access_mask))
                 jump_nr += 3;
  
-       if (major >= 0)
+       if (device->major != -1)
                 jump_nr++;
  
-       if (minor >= 0)
+       if (device->minor != -1)
                 jump_nr++;
  
         if (device_type > 0) {
@@ -247,9 +252,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
                         return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
         }
  
-       if (major >= 0) {
+       if (device->major >= 0) {
                 struct bpf_insn ins[] = {
-                   BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, jump_nr--),
+                   BPF_JMP_IMM(BPF_JNE, BPF_REG_4, device->major, jump_nr--),
                 };
  
                 ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
@@ -257,9 +262,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
                         return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
         }
  
-       if (minor >= 0) {
+       if (device->minor >= 0) {
                 struct bpf_insn ins[] = {
-                   BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, jump_nr--),
+                   BPF_JMP_IMM(BPF_JNE, BPF_REG_5, device->minor, jump_nr--),
                 };
  
                 ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
@@ -411,4 +416,94 @@ void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
                 (void)bpf_program_free(conf->cgroup2_devices);
         }
  }
+
+int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device)
+{
+       __do_free struct lxc_list *list_elem = NULL;
+       __do_free struct device_item *new_device = NULL;
+       struct lxc_list *it;
+
+       lxc_list_for_each(it, &conf->devices) {
+               struct device_item *cur = it->elem;
+
+               if (cur->global_rule != -1 && device->global_rule != -1) {
+                       TRACE("Switched from %s to %s",
+                             cur->global_rule == 0 ? "whitelist" : "blacklist",
+                             device->global_rule == 0 ? "whitelist"
+                                                      : "blacklist");
+                       cur->global_rule = device->global_rule;
+                       return 1;
+               }
+
+               if (cur->type != device->type)
+                       continue;
+               if (cur->major != device->major)
+                       continue;
+               if (cur->minor != device->minor)
+                       continue;
+               if (strcmp(cur->access, device->access))
+                       continue;
+
+               /*
+                * The rule is switched from allow to deny or vica versa so
+                * don't bother allocating just flip the existing one.
+                */
+               if (cur->allow != device->allow) {
+                       cur->allow = device->allow;
+                       return log_trace(0, "Switched existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                                        cur->type, cur->major, cur->minor,
+                                        cur->access, cur->allow,
+                                        cur->global_rule);
+               }
+
+               return log_trace(1, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+                                cur->type, cur->major, cur->minor, cur->access,
+                                cur->allow, cur->global_rule);
+       }
+
+       list_elem = malloc(sizeof(*list_elem));
+       if (!list_elem)
+               return error_log_errno(ENOMEM, "Failed to allocate new device list");
+
+       new_device = memdup(device, sizeof(struct device_item));
+       if (!new_device)
+               return error_log_errno(ENOMEM, "Failed to allocate new device item");
+
+       lxc_list_add_elem(list_elem, move_ptr(new_device));
+       lxc_list_add_tail(&conf->devices, move_ptr(list_elem));
+
+       return 0;
+}
+
+bool bpf_devices_cgroup_supported(void)
+{
+       const struct bpf_insn dummy[] = {
+           BPF_MOV64_IMM(BPF_REG_0, 1),
+           BPF_EXIT_INSN(),
+       };
+
+       __do_bpf_program_free struct bpf_program *prog = NULL;
+       int ret;
+
+       if (geteuid() != 0)
+               return log_error(false, EINVAL,
+                                "The bpf device cgroup requires real root");
+
+       prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+       if (prog < 0)
+               return log_error(false,
+                                errno, "Failed to allocate new bpf device cgroup program");
+
+       ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy));
+       if (ret < 0)
+               return log_error(false,
+                                errno, "Failed to add new instructions to bpf device cgroup program");
+
+       ret = bpf_program_load_kernel(prog, NULL, 0);
+       if (ret < 0)
+               return log_error(false,
+                                errno, "Failed to load new bpf device cgroup program");
+
+       return log_trace(true, "The bpf device cgroup is supported");
+}
  #endif
diff --git a/src/lxc/cgroups/cgroup2_devices.h b/src/lxc/cgroups/cgroup2_devices.h

index afcc6b937698d6fcb1aec45f5972bbbfabe8d20e..a02735a1ab921a2b045833e0399d716f3abc0bd6 100644 (file)
--- a/src/lxc/cgroups/cgroup2_devices.h
+++ b/src/lxc/cgroups/cgroup2_devices.h
@@ -5,6 +5,7 @@
  #ifndef __LXC_CGROUP2_DEVICES_H
  #define __LXC_CGROUP2_DEVICES_H
  
+#include <errno.h>
  #include <fcntl.h>
  #include <stdbool.h>
  #include <stddef.h>
@@ -79,53 +80,61 @@ struct bpf_program {
  #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
  struct bpf_program *bpf_program_new(uint32_t prog_type);
  int bpf_program_init(struct bpf_program *prog);
-int bpf_program_append_device(struct bpf_program *prog, char type, int major,
-                             int minor, const char *access, int allow);
+int bpf_program_append_device(struct bpf_program *prog,
+                             struct device_item *device);
  int bpf_program_finalize(struct bpf_program *prog);
  int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
                               const char *path, uint32_t flags);
  int bpf_program_cgroup_detach(struct bpf_program *prog);
  void bpf_program_free(struct bpf_program *prog);
  void lxc_clear_cgroup2_devices(struct lxc_conf *conf);
-static inline void __do_bpf_program_free(struct bpf_program **prog)
+bool bpf_devices_cgroup_supported(void);
+static inline void __auto_bpf_program_free__(struct bpf_program **prog)
  {
         if (*prog) {
                 bpf_program_free(*prog);
                 *prog = NULL;
         }
  }
+int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device);
  #else
  static inline struct bpf_program *bpf_program_new(uint32_t prog_type)
  {
+       errno = ENOSYS;
         return NULL;
  }
  
  static inline int bpf_program_init(struct bpf_program *prog)
  {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
  }
  
  static inline int bpf_program_append_device(struct bpf_program *prog, char type,
                                             int major, int minor,
                                             const char *access, int allow)
  {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
  }
  
  static inline int bpf_program_finalize(struct bpf_program *prog)
  {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
  }
  
  static inline int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
                                             const char *path, uint32_t flags)
  {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
  }
  
  static inline int bpf_program_cgroup_detach(struct bpf_program *prog)
  {
-       return -ENOSYS;
+       errno = ENOSYS;
+       return -1;
  }
  
  static inline void bpf_program_free(struct bpf_program *prog)
@@ -136,9 +145,24 @@ static inline void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
  {
  }
  
-static inline void __do_bpf_program_free(struct bpf_program **prog)
+static inline bool bpf_devices_cgroup_supported(void)
+{
+       return false;
+}
+
+static inline void __auto_bpf_program_free__(struct bpf_program **prog)
+{
+}
+
+static inline int bpf_list_add_device(struct lxc_conf *conf,
+                                     struct device_item *device)
  {
+       errno = ENOSYS;
+       return -1;
  }
  #endif
  
+#define __do_bpf_program_free \
+       __attribute__((__cleanup__(__auto_bpf_program_free__)))
+
  #endif /* __LXC_CGROUP2_DEVICES_H */
diff --git a/src/lxc/commands.c b/src/lxc/commands.c

index 90e3c5863d41f13b217c76fef4fd8d9aab65f96b..f4920c7846a5137bd17f14a160e8870378c12912 100644 (file)
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -39,6 +39,7 @@
  
  #include "af_unix.h"
  #include "cgroup.h"
+#include "cgroups/cgroup2_devices.h"
  #include "commands.h"
  #include "commands_utils.h"
  #include "conf.h"
@@ -85,20 +86,21 @@ lxc_log_define(commands, lxc);
  static const char *lxc_cmd_str(lxc_cmd_t cmd)
  {
         static const char *const cmdname[LXC_CMD_MAX] = {
-               [LXC_CMD_CONSOLE]             = "console",
-               [LXC_CMD_TERMINAL_WINCH]      = "terminal_winch",
-               [LXC_CMD_STOP]                = "stop",
-               [LXC_CMD_GET_STATE]           = "get_state",
-               [LXC_CMD_GET_INIT_PID]        = "get_init_pid",
-               [LXC_CMD_GET_CLONE_FLAGS]     = "get_clone_flags",
-               [LXC_CMD_GET_CGROUP]          = "get_cgroup",
-               [LXC_CMD_GET_CONFIG_ITEM]     = "get_config_item",
-               [LXC_CMD_GET_NAME]            = "get_name",
-               [LXC_CMD_GET_LXCPATH]         = "get_lxcpath",
-               [LXC_CMD_ADD_STATE_CLIENT]    = "add_state_client",
-               [LXC_CMD_CONSOLE_LOG]         = "console_log",
-               [LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients",
-               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener",
+               [LXC_CMD_CONSOLE]                       = "console",
+               [LXC_CMD_TERMINAL_WINCH]                = "terminal_winch",
+               [LXC_CMD_STOP]                          = "stop",
+               [LXC_CMD_GET_STATE]                     = "get_state",
+               [LXC_CMD_GET_INIT_PID]                  = "get_init_pid",
+               [LXC_CMD_GET_CLONE_FLAGS]               = "get_clone_flags",
+               [LXC_CMD_GET_CGROUP]                    = "get_cgroup",
+               [LXC_CMD_GET_CONFIG_ITEM]               = "get_config_item",
+               [LXC_CMD_GET_NAME]                      = "get_name",
+               [LXC_CMD_GET_LXCPATH]                   = "get_lxcpath",
+               [LXC_CMD_ADD_STATE_CLIENT]              = "add_state_client",
+               [LXC_CMD_CONSOLE_LOG]                   = "console_log",
+               [LXC_CMD_SERVE_STATE_CLIENTS]           = "serve_state_clients",
+               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER]   = "seccomp_notify_add_listener",
+               [LXC_CMD_ADD_BPF_DEVICE_CGROUP]         = "add_bpf_device_cgroup",
         };
  
         if (cmd >= LXC_CMD_MAX)
@@ -925,6 +927,118 @@ reap_client_fd:
         return 1;
  }
  
+int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
+                                 struct device_item *device)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+       int stopped = 0;
+       struct lxc_cmd_rr cmd = {
+           .req = {
+               .cmd     = LXC_CMD_ADD_BPF_DEVICE_CGROUP,
+               .data    = device,
+               .datalen = sizeof(struct device_item),
+           },
+       };
+       int ret;
+
+       if (strlen(device->access) > STRLITERALLEN("rwm"))
+               return error_log_errno(EINVAL, "Invalid access mode specified %s",
+                                      device->access);
+
+       ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
+       if (ret < 0 || cmd.rsp.ret < 0)
+               return error_log_errno(errno, "Failed to add new bpf device cgroup rule");
+
+       return 0;
+#else
+       return minus_one_set_errno(ENOSYS);
+#endif
+}
+
+static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *req,
+                                                 struct lxc_handler *handler,
+                                                 struct lxc_epoll_descr *descr)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+       __do_bpf_program_free struct bpf_program *devices = NULL;
+       struct lxc_cmd_rsp rsp = {0};
+       struct lxc_conf *conf = handler->conf;
+       struct hierarchy *unified = handler->cgroup_ops->unified;
+       struct lxc_list *list_elem = NULL;
+       struct device_item *new_device = NULL;
+       int ret;
+       struct lxc_list *it;
+       struct device_item *device;
+       struct bpf_program *devices_old;
+
+       if (req->datalen <= 0)
+               goto reap_client_fd;
+
+       if (req->datalen != sizeof(struct device_item))
+               goto reap_client_fd;
+
+       if (!req->data)
+               goto reap_client_fd;
+       device = (struct device_item *)req->data;
+
+       rsp.ret = -1;
+       if (!unified)
+               goto respond;
+
+       ret = bpf_list_add_device(conf, device);
+       if (ret < 0)
+               goto respond;
+
+       devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+       if (!devices)
+               goto respond;
+
+       ret = bpf_program_init(devices);
+       if (ret)
+               goto respond;
+
+       lxc_list_for_each(it, &conf->devices) {
+               struct device_item *cur = it->elem;
+
+               ret = bpf_program_append_device(devices, cur);
+               if (ret)
+                       goto respond;
+       }
+
+       ret = bpf_program_finalize(devices);
+       if (ret)
+               goto respond;
+
+       ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
+                                       unified->container_full_path,
+                                       BPF_F_ALLOW_MULTI);
+       if (ret)
+               goto respond;
+
+       /* Replace old bpf program. */
+       devices_old = move_ptr(conf->cgroup2_devices);
+       conf->cgroup2_devices = move_ptr(devices);
+       devices = move_ptr(devices_old);
+
+       rsp.ret = 0;
+
+respond:
+       ret = lxc_cmd_rsp_send(fd, &rsp);
+       if (ret < 0)
+               goto reap_client_fd;
+
+       return 0;
+
+reap_client_fd:
+       /* Special indicator to lxc_cmd_handler() to close the fd and do related
+        * cleanup.
+        */
+       return 1;
+#else
+       return minus_one_set_errno(ENOSYS);
+#endif
+}
+
  int lxc_cmd_console_log(const char *name, const char *lxcpath,
                         struct lxc_console_log *log)
  {
@@ -1123,20 +1237,21 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
                                 struct lxc_epoll_descr *);
  
         callback cb[LXC_CMD_MAX] = {
-               [LXC_CMD_CONSOLE]                     = lxc_cmd_console_callback,
-               [LXC_CMD_TERMINAL_WINCH]              = lxc_cmd_terminal_winch_callback,
-               [LXC_CMD_STOP]                        = lxc_cmd_stop_callback,
-               [LXC_CMD_GET_STATE]                   = lxc_cmd_get_state_callback,
-               [LXC_CMD_GET_INIT_PID]                = lxc_cmd_get_init_pid_callback,
-               [LXC_CMD_GET_CLONE_FLAGS]             = lxc_cmd_get_clone_flags_callback,
-               [LXC_CMD_GET_CGROUP]                  = lxc_cmd_get_cgroup_callback,
-               [LXC_CMD_GET_CONFIG_ITEM]             = lxc_cmd_get_config_item_callback,
-               [LXC_CMD_GET_NAME]                    = lxc_cmd_get_name_callback,
-               [LXC_CMD_GET_LXCPATH]                 = lxc_cmd_get_lxcpath_callback,
-               [LXC_CMD_ADD_STATE_CLIENT]            = lxc_cmd_add_state_client_callback,
-               [LXC_CMD_CONSOLE_LOG]                 = lxc_cmd_console_log_callback,
-               [LXC_CMD_SERVE_STATE_CLIENTS]         = lxc_cmd_serve_state_clients_callback,
-               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = lxc_cmd_seccomp_notify_add_listener_callback,
+               [LXC_CMD_CONSOLE]                       = lxc_cmd_console_callback,
+               [LXC_CMD_TERMINAL_WINCH]                = lxc_cmd_terminal_winch_callback,
+               [LXC_CMD_STOP]                          = lxc_cmd_stop_callback,
+               [LXC_CMD_GET_STATE]                     = lxc_cmd_get_state_callback,
+               [LXC_CMD_GET_INIT_PID]                  = lxc_cmd_get_init_pid_callback,
+               [LXC_CMD_GET_CLONE_FLAGS]               = lxc_cmd_get_clone_flags_callback,
+               [LXC_CMD_GET_CGROUP]                    = lxc_cmd_get_cgroup_callback,
+               [LXC_CMD_GET_CONFIG_ITEM]               = lxc_cmd_get_config_item_callback,
+               [LXC_CMD_GET_NAME]                      = lxc_cmd_get_name_callback,
+               [LXC_CMD_GET_LXCPATH]                   = lxc_cmd_get_lxcpath_callback,
+               [LXC_CMD_ADD_STATE_CLIENT]              = lxc_cmd_add_state_client_callback,
+               [LXC_CMD_CONSOLE_LOG]                   = lxc_cmd_console_log_callback,
+               [LXC_CMD_SERVE_STATE_CLIENTS]           = lxc_cmd_serve_state_clients_callback,
+               [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER]   = lxc_cmd_seccomp_notify_add_listener_callback,
+               [LXC_CMD_ADD_BPF_DEVICE_CGROUP]         = lxc_cmd_add_bpf_device_cgroup_callback,
         };
  
         if (req->cmd >= LXC_CMD_MAX) {
diff --git a/src/lxc/commands.h b/src/lxc/commands.h

index d7d0c6096aa2d77f24490a69f5ef13f96428dc51..008b7c30e24dd718f85ffb8cd4add564b4475c8d 100644 (file)
--- a/src/lxc/commands.h
+++ b/src/lxc/commands.h
@@ -47,6 +47,7 @@ typedef enum {
         LXC_CMD_CONSOLE_LOG,
         LXC_CMD_SERVE_STATE_CLIENTS,
         LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER,
+       LXC_CMD_ADD_BPF_DEVICE_CGROUP,
         LXC_CMD_MAX,
  } lxc_cmd_t;
  
@@ -131,4 +132,8 @@ extern int lxc_cmd_seccomp_notify_add_listener(const char *name,
                                                /* unused */ unsigned int command,
                                                /* unused */ unsigned int flags);
  
+struct device_item;
+extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
+                                        struct device_item *device);
+
  #endif /* __commands_h */
diff --git a/src/lxc/conf.h b/src/lxc/conf.h

index 44d7934fe4bf55980cfae9f2b788601856ff49e3..9142d31710e550d9f06265e02b8c75c395a0c483 100644 (file)
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -236,6 +236,11 @@ struct device_item {
         int minor;
         char access[4];
         int allow;
+       /* -1 -> no global rule
+        *  0 -> whitelist (deny all)
+        *  1 -> blacklist (allow all)
+        */
+       int global_rule;
  };
  
  struct lxc_conf {
diff --git a/src/lxc/log.h b/src/lxc/log.h

index 8e459196436445fa6b9330f1ab7dbc604654c496..c6b2be2d6e738f184a5bdbe7728a28899428e315 100644 (file)
--- a/src/lxc/log.h
+++ b/src/lxc/log.h
@@ -518,6 +518,13 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo,        \
                 __ret__;                      \
         })
  
+#define log_error(__ret__, __errno__, format, ...) \
+       ({                                         \
+               errno = __errno__;                 \
+               SYSERROR(format, ##__VA_ARGS__);   \
+               __ret__;                           \
+       })
+
  extern int lxc_log_fd;
  
  extern int lxc_log_syslog(int facility);
author	Christian Brauner <christian.brauner@ubuntu.com>
	Sat, 30 Nov 2019 18:33:19 +0000 (19:33 +0100)
committer	Christian Brauner <christian.brauner@ubuntu.com>
	Sun, 1 Dec 2019 16:07:23 +0000 (17:07 +0100)
src/lxc/cgroups/cgfsng.c		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup.h		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup2_devices.c		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup2_devices.h		patch \| blob \| blame \| history
src/lxc/commands.c		patch \| blob \| blame \| history
src/lxc/commands.h		patch \| blob \| blame \| history
src/lxc/conf.h		patch \| blob \| blame \| history
src/lxc/log.h		patch \| blob \| blame \| history