cgroup2: rework controller delegation

author Christian Brauner <christian.brauner@ubuntu.com>

Fri, 6 Dec 2019 08:42:47 +0000 (09:42 +0100)

committer Christian Brauner <christian.brauner@ubuntu.com>

Fri, 6 Dec 2019 14:38:33 +0000 (15:38 +0100)
author Christian Brauner <christian.brauner@ubuntu.com>
Fri, 6 Dec 2019 08:42:47 +0000 (09:42 +0100)
committer Christian Brauner <christian.brauner@ubuntu.com>
Fri, 6 Dec 2019 14:38:33 +0000 (15:38 +0100)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index 9541031828a3b500e7cf41904ab6452ed131cb42..d5ddc8388d166c00c3953747326913903f449361 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1184,71 +1184,6 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
         }
  }
  
-static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
-{
-       __do_free char *add_controllers = NULL, *cgroup = NULL;
-       size_t i, parts_len;
-       char **it;
-       size_t full_len = 0;
-       char **parts = NULL;
-       bool bret = false;
-
-       if (h->version != CGROUP2_SUPER_MAGIC)
-               return true;
-
-       if (!h->controllers)
-               return true;
-
-       /* For now we simply enable all controllers that we have detected by
-        * creating a string like "+memory +pids +cpu +io".
-        * TODO: In the near future we might want to support "-<controller>"
-        * etc. but whether supporting semantics like this make sense will need
-        * some thinking.
-        */
-       for (it = h->controllers; it && *it; it++) {
-               full_len += strlen(*it) + 2;
-               add_controllers = must_realloc(add_controllers, full_len + 1);
-
-               if (h->controllers[0] == *it)
-                       add_controllers[0] = '\0';
-
-               (void)strlcat(add_controllers, "+", full_len + 1);
-               (void)strlcat(add_controllers, *it, full_len + 1);
-
-               if ((it + 1) && *(it + 1))
-                       (void)strlcat(add_controllers, " ", full_len + 1);
-       }
-
-       parts = lxc_string_split(cgname, '/');
-       if (!parts)
-               goto on_error;
-
-       parts_len = lxc_array_len((void **)parts);
-       if (parts_len > 0)
-               parts_len--;
-
-       cgroup = must_make_path(h->mountpoint, h->container_base_path, NULL);
-       for (i = 0; i < parts_len; i++) {
-               int ret;
-               __do_free char *target = NULL;
-
-               cgroup = must_append_path(cgroup, parts[i], NULL);
-               target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
-               ret = lxc_write_to_file(target, add_controllers, full_len, false, 0666);
-               if (ret < 0) {
-                       SYSERROR("Could not enable \"%s\" controllers in the "
-                                "unified cgroup \"%s\"", add_controllers, cgroup);
-                       goto on_error;
-               }
-       }
-
-       bret = true;
-
-on_error:
-       lxc_free_array((void **)parts, free);
-       return bret;
-}
-
  static int mkdir_eexist_on_last(const char *dir, mode_t mode)
  {
         const char *tmp = dir;
@@ -1298,7 +1233,7 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
                 return false;
         }
  
-       return cg_unified_create_cgroup(h, cgname);
+       return true;
  }
  
  static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
@@ -1317,7 +1252,7 @@ static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgnam
                 return false;
         }
  
-       return cg_unified_create_cgroup(h, cgname);
+       return true;
  }
  
  static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname, bool monitor)
@@ -1400,6 +1335,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
                 return false;
  
         INFO("The monitor process uses \"%s\" as cgroup", monitor_cgroup);
+       ops->monitor_cgroup = move_ptr(monitor_cgroup);
         return true;
  }
  
@@ -1479,47 +1415,66 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
         return true;
  }
  
-__cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
-                                            bool monitor)
+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
+                                             struct lxc_handler *handler)
  {
-       int len;
-       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+       int monitor_len, transient_len;
+       char monitor[INTTYPE_TO_STRLEN(pid_t)],
+           transient[INTTYPE_TO_STRLEN(pid_t)];
  
         if (!ops->hierarchies)
                 return true;
  
-       len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
-       if (len < 0 || (size_t)len >= sizeof(pidstr))
-               return false;
+       monitor_len = snprintf(monitor, sizeof(monitor), "%d", handler->monitor_pid);
+       if (handler->transient_pid > 0)
+               transient_len = snprintf(transient, sizeof(transient), "%d",
+                                        handler->transient_pid);
  
         for (int i = 0; ops->hierarchies[i]; i++) {
-               int ret;
                 __do_free char *path = NULL;
+               int ret;
  
-               if (monitor)
-                       path = must_make_path(ops->hierarchies[i]->monitor_full_path,
-                                             "cgroup.procs", NULL);
-               else
-                       path = must_make_path(ops->hierarchies[i]->container_full_path,
-                                             "cgroup.procs", NULL);
-               ret = lxc_write_to_file(path, pidstr, len, false, 0666);
-               if (ret != 0) {
-                       SYSERROR("Failed to enter cgroup \"%s\"", path);
-                       return false;
-               }
+               path = must_make_path(ops->hierarchies[i]->monitor_full_path,
+                                     "cgroup.procs", NULL);
+               ret = lxc_writeat(-1, path, monitor, monitor_len);
+               if (ret != 0)
+                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
+
+                if (handler->transient_pid < 0)
+                       return true;
+
+               ret = lxc_writeat(-1, path, transient, transient_len);
+               if (ret != 0)
+                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
         }
+       handler->transient_pid = -1;
  
         return true;
  }
  
-__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
+__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
+                                             struct lxc_handler *handler)
  {
-       return __do_cgroup_enter(ops, pid, true);
-}
+       int len;
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
  
-static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
-{
-       return __do_cgroup_enter(ops, pid, false);
+       if (!ops->hierarchies)
+               return true;
+
+       len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
+
+       for (int i = 0; ops->hierarchies[i]; i++) {
+               __do_free char *path = NULL;
+               int ret;
+
+               path = must_make_path(ops->hierarchies[i]->container_full_path,
+                                     "cgroup.procs", NULL);
+               ret = lxc_writeat(-1, path, pidstr, len);
+               if (ret != 0)
+                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
+       }
+
+       return true;
  }
  
  static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
@@ -2625,11 +2580,12 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
         return ret;
  }
  
-static bool __cg_legacy_setup_limits(struct cgroup_ops *ops,
-                                    struct lxc_list *cgroup_settings,
-                                    bool do_devices)
+__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
+                                                   struct lxc_conf *conf,
+                                                   bool do_devices)
  {
         __do_free struct lxc_list *sorted_cgroup_settings = NULL;
+       struct lxc_list *cgroup_settings = &conf->cgroup;
         struct lxc_list *iterator, *next;
         struct lxc_cgroup *cg;
         bool ret = false;
@@ -2699,12 +2655,13 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
         return 0;
  }
  
-static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
-                                     struct lxc_list *cgroup_settings,
-                                     struct lxc_conf *conf)
+__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
+                                            struct lxc_handler *handler)
  {
         struct lxc_list *iterator;
         struct hierarchy *h = ops->unified;
+       struct lxc_conf *conf = handler->conf;
+       struct lxc_list *cgroup_settings = &conf->cgroup2;
  
         if (lxc_list_empty(cgroup_settings))
                 return true;
@@ -2798,18 +2755,79 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
         return true;
  }
  
-__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
-                                            struct lxc_conf *conf,
-                                            bool do_devices)
+bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
  {
-       if (!__cg_legacy_setup_limits(ops, &conf->cgroup, do_devices))
-               return false;
+       __do_free char *add_controllers = NULL, *base_path = NULL;
+       struct hierarchy *unified = ops->unified;
+       ssize_t parts_len;
+       char **it;
+       size_t full_len = 0;
+       char **parts = NULL;
+       bool bret = false;
  
-       /* for v2 we will have already set up devices */
-       if (do_devices)
+       if (!ops->hierarchies || !pure_unified_layout(ops) ||
+           !unified->controllers[0])
                 return true;
  
-       return __cg_unified_setup_limits(ops, &conf->cgroup2, conf);
+       /* For now we simply enable all controllers that we have detected by
+        * creating a string like "+memory +pids +cpu +io".
+        * TODO: In the near future we might want to support "-<controller>"
+        * etc. but whether supporting semantics like this make sense will need
+        * some thinking.
+        */
+       for (it = unified->controllers; it && *it; it++) {
+               full_len += strlen(*it) + 2;
+               add_controllers = must_realloc(add_controllers, full_len + 1);
+
+               if (unified->controllers[0] == *it)
+                       add_controllers[0] = '\0';
+
+               (void)strlcat(add_controllers, "+", full_len + 1);
+               (void)strlcat(add_controllers, *it, full_len + 1);
+
+               if ((it + 1) && *(it + 1))
+                       (void)strlcat(add_controllers, " ", full_len + 1);
+       }
+
+       parts = lxc_string_split(cgroup, '/');
+       if (!parts)
+               goto on_error;
+
+       parts_len = lxc_array_len((void **)parts);
+       if (parts_len > 0)
+               parts_len--;
+
+       base_path = must_make_path(unified->mountpoint, unified->container_base_path, NULL);
+       for (ssize_t i = -1; i < parts_len; i++) {
+               int ret;
+               __do_free char *target = NULL;
+
+               if (i >= 0)
+                       base_path = must_append_path(base_path, parts[i], NULL);
+               target = must_make_path(base_path, "cgroup.subtree_control", NULL);
+               ret = lxc_writeat(-1, target, add_controllers, full_len);
+               if (ret < 0) {
+                       SYSERROR("Could not enable \"%s\" controllers in the unified cgroup \"%s\"", add_controllers, target);
+                       goto on_error;
+               }
+               TRACE("Enable \"%s\" controllers in the unified cgroup \"%s\"", add_controllers, target);
+       }
+
+       bret = true;
+
+on_error:
+       lxc_free_array((void **)parts, free);
+       return bret;
+}
+
+__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
+{
+       return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
+}
+
+__cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
+{
+       return __cgfsng_delegate_controllers(ops, ops->container_cgroup);
  }
  
  static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
@@ -3062,15 +3080,15 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
         base_cgroup = cg_unified_get_current_cgroup(relative);
         if (!base_cgroup)
                 return -EINVAL;
-       prune_init_scope(base_cgroup);
+       if (!relative)
+               prune_init_scope(base_cgroup);
  
         /* We assume that we have already been given controllers to delegate
          * further down the hierarchy. If not it is up to the user to delegate
          * them to us.
          */
         mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
-       subtree_path = must_make_path(mountpoint, base_cgroup,
-                                     "cgroup.subtree_control", NULL);
+       subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
         delegatable = cg_unified_get_controllers(subtree_path);
         if (!delegatable)
                 delegatable = cg_unified_make_empty_controller();
@@ -3162,6 +3180,8 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
         cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
         cgfsng_ops->monitor_create = cgfsng_monitor_create;
         cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
+       cgfsng_ops->monitor_delegate_controllers = cgfsng_monitor_delegate_controllers;
+       cgfsng_ops->payload_delegate_controllers = cgfsng_payload_delegate_controllers;
         cgfsng_ops->payload_create = cgfsng_payload_create;
         cgfsng_ops->payload_enter = cgfsng_payload_enter;
         cgfsng_ops->escape = cgfsng_escape;
@@ -3172,6 +3192,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
         cgfsng_ops->set = cgfsng_set;
         cgfsng_ops->freeze = cgfsng_freeze;
         cgfsng_ops->unfreeze = cgfsng_unfreeze;
+       cgfsng_ops->setup_limits_legacy = cgfsng_setup_limits_legacy;
         cgfsng_ops->setup_limits = cgfsng_setup_limits;
         cgfsng_ops->driver = "cgfsng";
         cgfsng_ops->version = "1.0.0";
diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c

index 35e4b5ae4e2439dc71db462e46125e06f3f93cb9..8804d59ac3abfd452241078c7ddf56a2ba2e045c 100644 (file)
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -65,6 +65,7 @@ void cgroup_exit(struct cgroup_ops *ops)
  
         free(ops->cgroup_pattern);
         free(ops->container_cgroup);
+       free(ops->monitor_cgroup);
  
         if (ops->cgroup2_devices)
                 bpf_program_free(ops->cgroup2_devices);
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h

index 81320e4876183acb033875d1a42e2603a844c5e1..80d2c315a3c45b108a17da0544f3323ee7806d73 100644 (file)
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -88,6 +88,7 @@ struct cgroup_ops {
         char **cgroup_use;
         char *cgroup_pattern;
         char *container_cgroup;
+       char *monitor_cgroup;
  
         /* Static memory, do not free.*/
         const char *monitor_pattern;
@@ -135,9 +136,9 @@ struct cgroup_ops {
         void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
         void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
         bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
-       bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid);
+       bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
         bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
-       bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid);
+       bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
         const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
         bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
         int (*num_hierarchies)(struct cgroup_ops *ops);
@@ -148,8 +149,9 @@ struct cgroup_ops {
                    size_t len, const char *name, const char *lxcpath);
         int (*freeze)(struct cgroup_ops *ops, int timeout);
         int (*unfreeze)(struct cgroup_ops *ops, int timeout);
-       bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
-                            bool with_devices);
+       bool (*setup_limits_legacy)(struct cgroup_ops *ops,
+                                   struct lxc_conf *conf, bool with_devices);
+       bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_handler *handler);
         bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
         bool (*attach)(struct cgroup_ops *ops, const char *name,
                        const char *lxcpath, pid_t pid);
@@ -158,6 +160,8 @@ struct cgroup_ops {
         int (*nrtasks)(struct cgroup_ops *ops);
         bool (*devices_activate)(struct cgroup_ops *ops,
                                  struct lxc_handler *handler);
+       bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
+       bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
  };
  
  extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c

index e89caf4e15b01f6b3219a0cf7ba4ad706bcc94d2..b97b58ec6ae14eeb08ecb59de8ed1e0c68e4b68e 100644 (file)
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -824,6 +824,15 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
  {
         int ret, state;
  
+       /* The first child is going to fork() again and then exits. So we reap
+        * the first child here.
+        */
+       ret = wait_for_pid(pid);
+       if (ret < 0)
+               DEBUG("Failed waiting on first child %d", pid);
+       else
+               DEBUG("First child %d exited", pid);
+
         /* Close write end of the socket pair. */
         close(handler->state_socket_pair[1]);
         handler->state_socket_pair[1] = -1;
@@ -834,15 +843,6 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
         close(handler->state_socket_pair[0]);
         handler->state_socket_pair[0] = -1;
  
-       /* The first child is going to fork() again and then exits. So we reap
-        * the first child here.
-        */
-       ret = wait_for_pid(pid);
-       if (ret < 0)
-               DEBUG("Failed waiting on first child %d", pid);
-       else
-               DEBUG("First child %d exited", pid);
-
         if (state < 0) {
                 SYSERROR("Failed to receive the container state");
                 return false;
@@ -935,17 +935,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
         if (c->daemonize) {
                 bool started;
                 char title[2048];
-               pid_t pid;
+               pid_t pid_first, pid_second;
  
-               pid = fork();
-               if (pid < 0) {
+               pid_first = fork();
+               if (pid_first < 0) {
                         free_init_cmd(init_cmd);
                         lxc_free_handler(handler);
                         return false;
                 }
  
                 /* first parent */
-               if (pid != 0) {
+               if (pid_first != 0) {
                         /* Set to NULL because we don't want father unlink
                          * the PID file, child will do the free and unlink.
                          */
@@ -954,7 +954,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
                         /* Wait for container to tell us whether it started
                          * successfully.
                          */
-                       started = wait_on_daemonized_start(handler, pid);
+                       started = wait_on_daemonized_start(handler, pid_first);
  
                         free_init_cmd(init_cmd);
                         lxc_free_handler(handler);
@@ -980,14 +980,14 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
                  * POSIX's daemon() function we change to "/" and redirect
                  * std{in,out,err} to /dev/null.
                  */
-               pid = fork();
-               if (pid < 0) {
+               pid_second = fork();
+               if (pid_second < 0) {
                         SYSERROR("Failed to fork first child process");
                         _exit(EXIT_FAILURE);
                 }
  
                 /* second parent */
-               if (pid != 0) {
+               if (pid_second != 0) {
                         free_init_cmd(init_cmd);
                         lxc_free_handler(handler);
                         _exit(EXIT_SUCCESS);
diff --git a/src/lxc/macro.h b/src/lxc/macro.h

index 2aeda4e3da8f87dddb54573f97192c6bc86ab5e4..e011596d219ae64ed0c56793a6108ab688ceec0f 100644 (file)
--- a/src/lxc/macro.h
+++ b/src/lxc/macro.h
@@ -448,6 +448,12 @@ enum {
                 -1;                    \
         })
  
+#define ret_set_errno(__ret__, __errno__) \
+       ({                                \
+               errno = __errno__;        \
+               __ret__;                  \
+       })
+
  #define free_replace_move_ptr(a, b) \
         ({                          \
                 free(a);            \
diff --git a/src/lxc/start.c b/src/lxc/start.c

index 6e2f0ab046de85f286eec55a8f9568c1b6c22c85..aa4939945da8e99c1127a457a2cd746007b3fc29 100644 (file)
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -737,6 +737,10 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
                 handler->nsfd[i] = -1;
  
         handler->name = name;
+       if (daemonize)
+               handler->transient_pid = lxc_raw_getpid();
+       else
+               handler->transient_pid = -1;
  
         if (daemonize && handler->conf->reboot == REBOOT_NONE) {
                 /* Create socketpair() to synchronize on daemonized startup.
@@ -912,7 +916,7 @@ int lxc_init(const char *name, struct lxc_handler *handler)
         ret = lsm_process_prepare(conf, handler->lxcpath);
         if (ret < 0) {
                 ERROR("Failed to initialize LSM");
-               goto out_destroy_cgroups;
+               goto out_delete_terminal;
         }
         TRACE("Initialized LSM");
  
@@ -920,10 +924,6 @@ int lxc_init(const char *name, struct lxc_handler *handler)
         handler->monitor_status_fd = move_fd(status_fd);
         return 0;
  
-out_destroy_cgroups:
-       handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
-       handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
-
  out_delete_terminal:
         lxc_terminal_delete(&handler->conf->console);
  
@@ -1016,8 +1016,10 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
  
         lsm_process_cleanup(handler->conf, handler->lxcpath);
  
-       cgroup_ops->payload_destroy(cgroup_ops, handler);
-       cgroup_ops->monitor_destroy(cgroup_ops, handler);
+       if (cgroup_ops) {
+               cgroup_ops->payload_destroy(cgroup_ops, handler);
+               cgroup_ops->monitor_destroy(cgroup_ops, handler);
+       }
  
         if (handler->conf->reboot == REBOOT_NONE) {
                 /* For all new state clients simply close the command socket.
@@ -1813,14 +1815,24 @@ static int lxc_spawn(struct lxc_handler *handler)
         if (ret < 0)
                 goto out_delete_net;
  
-       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) {
+       if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) {
                 ERROR("Failed to setup cgroup limits for container \"%s\"", name);
                 goto out_delete_net;
         }
  
-       if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
+       if (!cgroup_ops->payload_enter(cgroup_ops, handler))
                 goto out_delete_net;
  
+       if (!cgroup_ops->payload_delegate_controllers(cgroup_ops)) {
+               ERROR("Failed to delegate controllers to payload cgroup");
+               goto out_delete_net;
+       }
+
+       if (!cgroup_ops->setup_limits(cgroup_ops, handler)) {
+               ERROR("Failed to setup cgroup limits for container \"%s\"", name);
+               goto out_delete_net;
+       }
+
         if (!cgroup_ops->chown(cgroup_ops, handler->conf))
                 goto out_delete_net;
  
@@ -1883,7 +1895,7 @@ static int lxc_spawn(struct lxc_handler *handler)
         if (ret < 0)
                 goto out_delete_net;
  
-       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
+       if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) {
                 ERROR("Failed to setup legacy device cgroup controller limits");
                 goto out_delete_net;
         }
@@ -2015,12 +2027,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
                 goto out_fini_nonet;
         }
  
-       if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
+       if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
                 ERROR("Failed to enter monitor cgroup");
                 ret = -1;
                 goto out_fini_nonet;
         }
  
+       if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
+               ERROR("Failed to delegate controllers to monitor cgroup");
+               ret = -1;
+               goto out_fini_nonet;
+       }
+
         if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
                 /* If the backing store is a device, mount it here and now. */
                 if (rootfs_is_blockdev(conf)) {
diff --git a/src/lxc/start.h b/src/lxc/start.h

index dc40f29eeb7195ab5edbab81aebf16a26fdae7f2..662ac5570445c96c94f6601f62edd42d0726a4a7 100644 (file)
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -89,6 +89,9 @@ struct lxc_handler {
          */
         int proc_pidfd;
  
+       /* The grandfather's pid when double-forking. */
+       pid_t transient_pid;
+
         /* The monitor's pid. */
         pid_t monitor_pid;
author	Christian Brauner <christian.brauner@ubuntu.com>
	Fri, 6 Dec 2019 08:42:47 +0000 (09:42 +0100)
committer	Christian Brauner <christian.brauner@ubuntu.com>
	Fri, 6 Dec 2019 14:38:33 +0000 (15:38 +0100)
src/lxc/cgroups/cgfsng.c		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup.c		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup.h		patch \| blob \| blame \| history
src/lxc/lxccontainer.c		patch \| blob \| blame \| history
src/lxc/macro.h		patch \| blob \| blame \| history
src/lxc/start.c		patch \| blob \| blame \| history
src/lxc/start.h		patch \| blob \| blame \| history