]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
cgroups/cgfsng: improve cgroup creation and removal 3226/head
authorChristian Brauner <christian.brauner@ubuntu.com>
Wed, 11 Dec 2019 06:37:36 +0000 (07:37 +0100)
committerChristian Brauner <christian.brauner@ubuntu.com>
Wed, 11 Dec 2019 10:51:09 +0000 (11:51 +0100)
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
src/lxc/cgroups/cgfsng.c
src/lxc/cgroups/cgroup.c
src/lxc/cgroups/cgroup.h
src/lxc/commands.c
src/lxc/file_utils.c
src/lxc/memory_utils.h

index b5f64cc774d2e81df6bf372c2f1fbdde8bb4ffaf..83f41c005d6ee4c53119190826d2817579c62a9c 100644 (file)
@@ -155,11 +155,6 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
        (*clist)[newentry] = copy;
 }
 
-static inline bool pure_unified_layout(const struct cgroup_ops *ops)
-{
-       return ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED;
-}
-
 /* Given a handler's cgroup data, return the struct hierarchy for the controller
  * @c, or NULL if there is none.
  */
@@ -770,14 +765,13 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
        struct hierarchy *new;
        int newentry;
 
-       new = must_realloc(NULL, sizeof(*new));
+       new = zalloc(sizeof(*new));
        new->controllers = clist;
        new->mountpoint = mountpoint;
        new->container_base_path = container_base_path;
-       new->container_full_path = NULL;
-       new->monitor_full_path = NULL;
        new->version = type;
-       new->cgroup2_chown = NULL;
+       new->cgfd_con = -EBADF;
+       new->cgfd_mon = -EBADF;
 
        newentry = append_null_to_list((void ***)h);
        (*h)[newentry] = new;
@@ -1096,6 +1090,7 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
 {
        int len;
        char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+       const struct lxc_conf *conf;
 
        if (!ops)
                log_error_errno(return, ENOENT, "Called with uninitialized cgroup operations");
@@ -1106,25 +1101,44 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
        if (!handler)
                log_error_errno(return, EINVAL, "Called with uninitialized handler");
 
+       if (!handler->conf)
+               log_error_errno(return, EINVAL, "Called with uninitialized conf");
+       conf = handler->conf;
+
        len = snprintf(pidstr, sizeof(pidstr), "%d", handler->monitor_pid);
        if (len < 0 || (size_t)len >= sizeof(pidstr))
                return;
 
        for (int i = 0; ops->hierarchies[i]; i++) {
-               __do_free char *base_path = NULL;
+               __do_free char *pivot_path = NULL;
                struct hierarchy *h = ops->hierarchies[i];
                int ret;
 
                if (!h->monitor_full_path)
                        continue;
 
-               base_path = must_make_path(h->mountpoint, h->container_base_path, NULL);
-               ret = lxc_write_openat(base_path, "cgroup.procs", pidstr, len);
+               if (conf && conf->cgroup_meta.dir)
+                       pivot_path = must_make_path(h->mountpoint,
+                                                   h->container_base_path,
+                                                   conf->cgroup_meta.dir,
+                                                   CGROUP_PIVOT, NULL);
+               else
+                       pivot_path = must_make_path(h->mountpoint,
+                                                   h->container_base_path,
+                                                   CGROUP_PIVOT, NULL);
+
+               ret = mkdir_p(pivot_path, 0755);
+               if (ret < 0 && errno != EEXIST)
+                       log_error_errno(goto try_recursive_destroy, errno,
+                                       "Failed to create %s", pivot_path);
+
+               ret = lxc_write_openat(pivot_path, "cgroup.procs", pidstr, len);
                if (ret != 0)
                        log_warn_errno(continue, errno,
                                       "Failed to move monitor %s to \"%s\"",
-                                      pidstr, base_path);
+                                      pidstr, pivot_path);
 
+try_recursive_destroy:
                ret = recursive_destroy(h->monitor_full_path);
                if (ret < 0)
                        WARN("Failed to destroy \"%s\"", h->monitor_full_path);
@@ -1189,10 +1203,17 @@ static bool create_cgroup_tree(struct hierarchy *h, const char *cgroup_tree,
                        return log_error_errno(false, errno, "Failed to create %s cgroup", path);
        }
 
-       if (payload)
+       if (payload) {
+               h->cgfd_con = lxc_open_dirfd(path);
+               if (h->cgfd_con < 0)
+                       return log_error_errno(false, errno, "Failed to open %s", path);
                h->container_full_path = move_ptr(path);
-       else
+       } else {
+               h->cgfd_mon = lxc_open_dirfd(path);
+               if (h->cgfd_mon < 0)
+                       return log_error_errno(false, errno, "Failed to open %s", path);
                h->monitor_full_path = move_ptr(path);
+       }
 
        return true;
 }
@@ -1201,10 +1222,15 @@ static void cgroup_remove_leaf(struct hierarchy *h, bool payload)
 {
        __do_free char *full_path = NULL;
 
-       if (payload)
+       if (payload) {
+               __lxc_unused __do_close_prot_errno int fd = move_fd(h->cgfd_con);
+               h->cgfd_con = -EBADF;
                full_path = h->container_full_path;
-       else
+       } else {
+               __lxc_unused __do_close_prot_errno int fd = move_fd(h->cgfd_mon);
+               h->cgfd_mon = -EBADF;
                full_path = h->monitor_full_path;
+       }
 
        if (rmdir(full_path))
                SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path);
@@ -1343,17 +1369,6 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
        if (idx == 1000)
                return ret_set_errno(false, ERANGE);
 
-       if (ops->unified && ops->unified->container_full_path) {
-               int ret;
-
-               ret = open(ops->unified->container_full_path,
-                          O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-               if (ret < 0)
-                       return log_error_errno(false,
-                                              errno, "Failed to open file descriptor for unified hierarchy");
-               ops->unified_fd = ret;
-       }
-
        ops->container_cgroup = move_ptr(container_cgroup);
        INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup);
        return true;
@@ -1380,25 +1395,31 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
 
        monitor_len = snprintf(monitor, sizeof(monitor), "%d", handler->monitor_pid);
        if (handler->transient_pid > 0)
-               transient_len = snprintf(transient, sizeof(transient), "%d",
-                                        handler->transient_pid);
+               transient_len = snprintf(transient, sizeof(transient), "%d", handler->transient_pid);
 
        for (int i = 0; ops->hierarchies[i]; i++) {
-               __do_free char *path = NULL;
+               struct hierarchy *h = ops->hierarchies[i];
                int ret;
 
-               path = must_make_path(ops->hierarchies[i]->monitor_full_path,
-                                     "cgroup.procs", NULL);
-               ret = lxc_writeat(-1, path, monitor, monitor_len);
-               if (ret != 0)
-                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
+               ret = lxc_writeat(h->cgfd_mon, "cgroup.procs", monitor, monitor_len);
+               if (ret)
+                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", h->monitor_full_path);
 
                 if (handler->transient_pid < 0)
                        return true;
 
-               ret = lxc_writeat(-1, path, transient, transient_len);
-               if (ret != 0)
-                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
+               ret = lxc_writeat(h->cgfd_mon, "cgroup.procs", transient, transient_len);
+               if (ret)
+                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", h->monitor_full_path);
+
+               /*
+                * We don't keep the fds for non-unified hierarchies around
+                * mainly because we don't make use of them anymore after the
+                * core cgroup setup is done but also because they're quite a
+                * lot of them.
+                */
+               if (!is_unified_hierarchy(h))
+                       close_prot_errno_disarm(h->cgfd_mon);
        }
        handler->transient_pid = -1;
 
@@ -1426,35 +1447,43 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
        len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
 
        for (int i = 0; ops->hierarchies[i]; i++) {
-               __do_free char *path = NULL;
+               struct hierarchy *h = ops->hierarchies[i];
                int ret;
 
-               path = must_make_path(ops->hierarchies[i]->container_full_path,
-                                     "cgroup.procs", NULL);
-               ret = lxc_writeat(-1, path, pidstr, len);
+               ret = lxc_writeat(h->cgfd_con, "cgroup.procs", pidstr, len);
                if (ret != 0)
-                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
+                       return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", h->container_full_path);
+
+               /*
+                * We don't keep the fds for non-unified hierarchies around
+                * mainly because we don't make use of them anymore after the
+                * core cgroup setup is done but also because they're quite a
+                * lot of them.
+                */
+               if (!is_unified_hierarchy(h))
+                       close_prot_errno_disarm(h->cgfd_con);
        }
 
        return true;
 }
 
-static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
-                  mode_t chmod_mode)
+static int fchowmodat(int dirfd, const char *path, uid_t chown_uid,
+                     gid_t chown_gid, mode_t chmod_mode)
 {
        int ret;
 
-       ret = chown(path, chown_uid, chown_gid);
-       if (ret < 0) {
-               SYSWARN("Failed to chown(%s, %d, %d)", path, (int)chown_uid, (int)chown_gid);
-               return -1;
-       }
+       ret = fchownat(dirfd, path, chown_uid, chown_gid,
+                      AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+       if (ret < 0)
+               return log_warn_errno(-1,
+                                     errno, "Failed to fchownat(%d, %s, %d, %d, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW )",
+                                     dirfd, path, (int)chown_uid,
+                                     (int)chown_gid);
 
-       ret = chmod(path, chmod_mode);
-       if (ret < 0) {
-               SYSWARN("Failed to chmod(%s, %d)", path, (int)chmod_mode);
-               return -1;
-       }
+       ret = fchmodat(dirfd, (*path != '\0') ? path : ".", chmod_mode, 0);
+       if (ret < 0)
+               return log_warn_errno(-1, errno, "Failed to fchmodat(%d, %s, %d, AT_SYMLINK_NOFOLLOW)",
+                                     dirfd, path, (int)chmod_mode);
 
        return 0;
 }
@@ -1495,46 +1524,28 @@ static int chown_cgroup_wrapper(void *data)
                destuid = 0;
 
        for (int i = 0; arg->hierarchies[i]; i++) {
-               __do_free char *fullpath = NULL;
-               char *path = arg->hierarchies[i]->container_full_path;
+               int dirfd = arg->hierarchies[i]->cgfd_con;
 
-               ret = chowmod(path, destuid, nsgid, 0775);
-               if (ret < 0)
-                       log_info_errno(continue,
-                                      errno, "Failed to change %s to uid %d and gid %d and mode 0755",
-                                      path, destuid, nsgid);
+               (void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
 
-               /* Failures to chown() these are inconvenient but not
+               /*
+                * Failures to chown() these are inconvenient but not
                 * detrimental We leave these owned by the container launcher,
                 * so that container root can write to the files to attach.  We
                 * chmod() them 664 so that container systemd can write to the
                 * files (which systemd in wily insists on doing).
                 */
 
-               if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
-                       fullpath = must_make_path(path, "tasks", NULL);
-                       ret = chowmod(fullpath, destuid, nsgid, 0664);
-                       if (ret < 0)
-                               SYSINFO("Failed to change %s to uid %d and gid %d and mode 0664",
-                                       fullpath, destuid, nsgid);
-               }
+               if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC)
+                       (void)fchowmodat(dirfd, "tasks", destuid, nsgid, 0664);
 
-               fullpath = must_make_path(path, "cgroup.procs", NULL);
-               ret = chowmod(fullpath, destuid, nsgid, 0664);
-               if (ret < 0)
-                       SYSINFO("Failed to change %s to uid %d and gid %d and mode 0664",
-                               fullpath, destuid, nsgid);
+               (void)fchowmodat(dirfd, "cgroup.procs", destuid, nsgid, 0664);
 
                if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
                        continue;
 
-               for (char **p = arg->hierarchies[i]->cgroup2_chown; p && *p; p++) {
-                       fullpath = must_make_path(path, *p, NULL);
-                       ret = chowmod(fullpath, destuid, nsgid, 0664);
-                       if (ret < 0)
-                               SYSINFO("Failed to change %s to uid %d and gid %d and mode 0664",
-                                       fullpath, destuid, nsgid);
-               }
+               for (char **p = arg->hierarchies[i]->cgroup2_chown; p && *p; p++)
+                       (void)fchowmodat(dirfd, *p, destuid, nsgid, 0664);
        }
 
        return 0;
@@ -3233,8 +3244,6 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
        if (cg_init(cgfsng_ops, conf))
                return NULL;
 
-       cgfsng_ops->unified_fd = -EBADF;
-
        cgfsng_ops->data_init = cgfsng_data_init;
        cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
        cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
index 84171c18d9077a093bd41fc9da036e15476c7057..11d14d27c46cc84d54948c9a0f36eccff2b4c6e0 100644 (file)
@@ -67,9 +67,6 @@ void cgroup_exit(struct cgroup_ops *ops)
        if (ops->cgroup2_devices)
                bpf_program_free(ops->cgroup2_devices);
 
-       if (ops->unified_fd >= 0)
-               close(ops->unified_fd);
-
        for (it = ops->hierarchies; it && *it; it++) {
                char **p;
 
@@ -85,6 +82,10 @@ void cgroup_exit(struct cgroup_ops *ops)
                free((*it)->container_base_path);
                free((*it)->container_full_path);
                free((*it)->monitor_full_path);
+               if ((*it)->cgfd_mon >= 0)
+                       close((*it)->cgfd_con);
+               if ((*it)->cgfd_mon >= 0)
+                       close((*it)->cgfd_mon);
                free(*it);
        }
        free(ops->hierarchies);
index dccc454462d6cf5880164888c0d21b1f332fcbcb..ac7cbe5f328fb2a71199cd1ce6aaa2a03aaeacbd 100644 (file)
@@ -14,6 +14,7 @@
 #define DEFAULT_MONITOR_CGROUP_PREFIX "lxc.monitor."
 #define CGROUP_CREATE_RETRY "-NNNN"
 #define CGROUP_CREATE_RETRY_LEN (STRLITERALLEN(CGROUP_CREATE_RETRY))
+#define CGROUP_PIVOT "lxc.pivot"
 
 struct lxc_handler;
 struct lxc_conf;
@@ -78,6 +79,11 @@ struct hierarchy {
 
        /* cgroup2 only */
        unsigned int bpf_device_controller:1;
+
+       /* monitor cgroup fd */
+       int cgfd_con;
+       /* container cgroup fd */
+       int cgfd_mon;
 };
 
 struct cgroup_ops {
@@ -101,8 +107,6 @@ struct cgroup_ops {
        struct hierarchy **hierarchies;
        /* Pointer to the unified hierarchy. Do not free! */
        struct hierarchy *unified;
-       /* File descriptor to the container's cgroup. */
-       int unified_fd;
 
        /*
         * @cgroup2_devices
@@ -179,4 +183,9 @@ extern int cgroup_attach(const char *name, const char *lxcpath, int64_t pid);
 
 #define __do_cgroup_exit __attribute__((__cleanup__(__auto_cgroup_exit__)))
 
+static inline bool pure_unified_layout(const struct cgroup_ops *ops)
+{
+       return ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED;
+}
+
 #endif
index c46a4106d5c30af9aa34a4c71567d68ed94949aa..cb67e71902562bee6f56ab4395774746cfc41897 100644 (file)
@@ -1226,7 +1226,7 @@ static int lxc_cmd_freeze_callback(int fd, struct lxc_cmd_req *req,
        };
        struct cgroup_ops *ops = handler->cgroup_ops;
 
-       if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
+       if (pure_unified_layout(ops))
                rsp.ret = ops->freeze(ops, timeout);
 
        return lxc_cmd_rsp_send(fd, &rsp);
@@ -1259,7 +1259,7 @@ static int lxc_cmd_unfreeze_callback(int fd, struct lxc_cmd_req *req,
        };
        struct cgroup_ops *ops = handler->cgroup_ops;
 
-       if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
+       if (pure_unified_layout(ops))
                rsp.ret = ops->unfreeze(ops, timeout);
 
        return lxc_cmd_rsp_send(fd, &rsp);
@@ -1294,11 +1294,11 @@ static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
        struct cgroup_ops *ops = handler->cgroup_ops;
        int ret;
 
-       if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
+       if (!pure_unified_layout(ops) || !ops->unified)
                return lxc_cmd_rsp_send(fd, &rsp);
 
        rsp.ret = 0;
-       ret = lxc_abstract_unix_send_fds(fd, &ops->unified_fd, 1, &rsp,
+       ret = lxc_abstract_unix_send_fds(fd, &ops->unified->cgfd_con, 1, &rsp,
                                         sizeof(rsp));
        if (ret < 0)
                return log_error(1, "Failed to send cgroup2 fd");
index 14ff7a93aa40d27ea4ae6c8b37dcd2ed5ab7680b..009d26d7c8797b03c85577f4b22054f6ff20e820 100644 (file)
@@ -21,7 +21,7 @@
 
 int lxc_open_dirfd(const char *dir)
 {
-       return open(dir, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+       return open(dir, O_DIRECTORY | O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
 }
 
 int lxc_readat(int dirfd, const char *filename, void *buf, size_t count)
index 919a1f490b80d72aede5ab0ea8177b068628f425..c75674f10a05ceb06681a0fbe5facd5ec41a42d5 100644 (file)
@@ -61,4 +61,6 @@ static inline void *memdup(const void *data, size_t len)
        return copy ? memcpy(copy, data, len) : NULL;
 }
 
+#define zalloc(__size__) (calloc(1, __size__))
+
 #endif /* __LXC_MEMORY_UTILS_H */