]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
introduce lxc.cgroup.dir.{monitor,container,container.inner}
authorWolfgang Bumiller <w.bumiller@proxmox.com>
Thu, 2 Apr 2020 08:01:37 +0000 (10:01 +0200)
committerWolfgang Bumiller <w.bumiller@proxmox.com>
Fri, 3 Apr 2020 15:22:04 +0000 (17:22 +0200)
This is a new approach to #1302 with a container-side
configuration instead of a global boolean flag.

Contrary to the previous PR using an optional additional
parameter for the get-cgroup command, this introduces two
new additional commands to get the limiting cgroup path and
cgroup2 file descriptor. If the limiting option is not in
use, these behave identical to their full-path counterparts.

If these variables are used the payload will end up in the
concatenation of lxc.cgroup.dir.container and
lxc.cgroup.dir.container.inner (which may be empty), and the
monitor will end up in lxc.cgruop.dir.monitor. The
directories are fixed, no retry count logic is applied,
failing to create these directories will simply be a hard
error.

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
doc/lxc.container.conf.sgml.in
src/lxc/cgroups/cgfsng.c
src/lxc/cgroups/cgroup.h
src/lxc/commands.c
src/lxc/commands.h
src/lxc/conf.c
src/lxc/conf.h
src/lxc/confile.c
src/lxc/criu.c

index ae04e3af36537085bc3a5e5a48428561aa1b37d0..b456398839c96acfff03f7d62dfee5a96541bfc1 100644 (file)
@@ -1571,6 +1571,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
             </para>
           </listitem>
         </varlistentry>
+        <varlistentry>
+          <term>
+            <option>lxc.cgroup.dir.container</option>
+          </term>
+          <listitem>
+            <para>
+              This is similar to <option>lxc.cgroup.dir</option>, but must be
+              used together with <option>lxc.cgroup.dir.monitor</option> and
+              affects only the container's cgroup path. This option is mutually
+              exclusive with <option>lxc.cgroup.dir</option>.
+              Note that the final path the container attaches to may be
+              extended further by the
+              <option>lxc.cgroup.dir.container.namespace</option> option.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term>
+            <option>lxc.cgroup.dir.monitor</option>
+          </term>
+          <listitem>
+            <para>
+              This is the monitor process counterpart to
+              <option>lxc.cgroup.dir.container</option>.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term>
+            <option>lxc.cgroup.dir.container.namespace</option>
+          </term>
+          <listitem>
+            <para>
+              Specify an additional subdirectory where the cgroup namespace
+              will be created. With this option, the cgroup limits will be
+              applied to the outer path specified in
+              <option>lxc.cgroup.dir.container</option>, which is not accessible
+              from within the container, making it possible to better enforce
+              limits for privileged containers in a way they cannot override
+              them.
+              This only works in conjunction with the
+              <option>lxc.cgroup.dir.container</option> and
+              <option>lxc.cgroup.dir.monitor</option> options and has otherwise
+              no effect.
+            </para>
+          </listitem>
+        </varlistentry>
         <varlistentry>
           <term>
             <option>lxc.cgroup.relative</option>
index d3595bcdf9ee4e76d99bb4a010156b242f028302..cf0f5fbc52abd592e67d6b98270fad6de4cfc1d7 100644 (file)
@@ -725,6 +725,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
        new->container_base_path = container_base_path;
        new->version = type;
        new->cgfd_con = -EBADF;
+       new->cgfd_limit = -EBADF;
        new->cgfd_mon = -EBADF;
 
        newentry = append_null_to_list((void ***)h);
@@ -956,13 +957,15 @@ static int cgroup_tree_remove(struct hierarchy **hierarchies,
                struct hierarchy *h = hierarchies[i];
                int ret;
 
-               if (!h->container_full_path)
+               if (!h->container_limit_path)
                        continue;
 
-               ret = lxc_rm_rf(h->container_full_path);
+               ret = lxc_rm_rf(h->container_limit_path);
                if (ret < 0)
-                       WARN("Failed to destroy \"%s\"", h->container_full_path);
+                       WARN("Failed to destroy \"%s\"", h->container_limit_path);
 
+               if (h->container_limit_path != h->container_full_path)
+                       free_disarm(h->container_limit_path);
                free_disarm(h->container_full_path);
        }
 
@@ -1089,7 +1092,12 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
                        goto try_lxc_rm_rf;
                }
 
-               if (conf && conf->cgroup_meta.dir)
+               if (conf && conf->cgroup_meta.monitor_dir)
+                       pivot_path = must_make_path(h->mountpoint,
+                                                   h->container_base_path,
+                                                   conf->cgroup_meta.monitor_dir,
+                                                   CGROUP_PIVOT, NULL);
+               else if (conf && conf->cgroup_meta.dir)
                        pivot_path = must_make_path(h->mountpoint,
                                                    h->container_base_path,
                                                    conf->cgroup_meta.dir,
@@ -1147,7 +1155,8 @@ static int mkdir_eexist_on_last(const char *dir, mode_t mode)
 }
 
 static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree,
-                              const char *cgroup_leaf, bool payload)
+                              const char *cgroup_leaf, bool payload,
+                              const char *cgroup_limit_dir)
 {
        __do_free char *path = NULL;
        int ret, ret_cpuset;
@@ -1176,6 +1185,16 @@ static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree,
                if (h->cgfd_con < 0)
                        return log_error_errno(false, errno, "Failed to open %s", path);
                h->container_full_path = move_ptr(path);
+               if (cgroup_limit_dir) {
+                       path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL);
+                       h->cgfd_limit = lxc_open_dirfd(path);
+                       if (h->cgfd_limit < 0)
+                               return log_error_errno(false, errno, "Failed to open %s", path);
+                       h->container_limit_path = move_ptr(path);
+               } else {
+                       h->container_limit_path = h->container_full_path;
+                       h->cgfd_limit = h->cgfd_con;
+               }
        } else {
                h->cgfd_mon = lxc_open_dirfd(path);
                if (h->cgfd_mon < 0)
@@ -1188,11 +1207,15 @@ static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree,
 
 static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
 {
-       __do_free char *full_path = NULL;
+       __do_free char *full_path = NULL, *__limit_path = NULL;
+       char *limit_path = NULL;
 
        if (payload) {
                __lxc_unused __do_close int fd = move_fd(h->cgfd_con);
                full_path = move_ptr(h->container_full_path);
+               limit_path = move_ptr(h->container_limit_path);
+               if (limit_path != full_path)
+                       __limit_path = limit_path;
        } else {
                __lxc_unused __do_close int fd = move_fd(h->cgfd_mon);
                full_path = move_ptr(h->monitor_full_path);
@@ -1200,6 +1223,39 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
 
        if (full_path && rmdir(full_path))
                SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path);
+       if (limit_path && rmdir(limit_path))
+               SYSWARN("Failed to rmdir(\"%s\") cgroup", limit_path);
+}
+
+/*
+ * Check we have no lxc.cgroup.dir, and that lxc.cgroup.dir.limit_prefix is a
+ * proper prefix directory of lxc.cgroup.dir.payload.
+ *
+ * Returns the prefix length if it is set, otherwise zero on success.
+ */
+static bool check_cgroup_dir_config(struct lxc_conf *conf)
+{
+       const char *monitor_dir = conf->cgroup_meta.monitor_dir,
+                  *container_dir = conf->cgroup_meta.container_dir,
+                  *namespace_dir = conf->cgroup_meta.namespace_dir;
+       size_t prefix_len;
+
+       /* none of the new options are set, all is fine */
+       if (!monitor_dir && !container_dir && !namespace_dir)
+               return true;
+
+       /* some are set, make sure lxc.cgroup.dir is not also set*/
+       if (conf->cgroup_meta.dir)
+               return log_error_errno(false, EINVAL,
+                       "lxc.cgroup.dir conflicts with lxc.cgroup.dir.payload/monitor");
+
+       /* make sure both monitor and payload are set */
+       if (!monitor_dir || !container_dir)
+               return log_error_errno(false, EINVAL,
+                       "lxc.cgroup.dir.payload and lxc.cgroup.dir.monitor must both be set");
+
+       /* namespace_dir may be empty */
+       return true;
 }
 
 __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
@@ -1210,7 +1266,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
        int idx = 0;
        int i;
        size_t len;
-       char *suffix;
+       char *suffix = NULL;
        struct lxc_conf *conf;
 
        if (!ops)
@@ -1227,7 +1283,13 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
 
        conf = handler->conf;
 
-       if (conf->cgroup_meta.dir) {
+       if (!check_cgroup_dir_config(conf))
+               return false;
+
+       if (conf->cgroup_meta.monitor_dir) {
+               cgroup_tree = NULL;
+               monitor_cgroup = strdup(conf->cgroup_meta.monitor_dir);
+       } else if (conf->cgroup_meta.dir) {
                cgroup_tree = conf->cgroup_meta.dir;
                monitor_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/",
                                             DEFAULT_MONITOR_CGROUP_PREFIX,
@@ -1251,14 +1313,16 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
        if (!monitor_cgroup)
                return ret_set_errno(false, ENOMEM);
 
-       suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN;
-       *suffix = '\0';
+       if (!conf->cgroup_meta.monitor_dir) {
+               suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN;
+               *suffix = '\0';
+       }
        do {
-               if (idx)
+               if (idx && suffix)
                        sprintf(suffix, "-%d", idx);
 
                for (i = 0; ops->hierarchies[i]; i++) {
-                       if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false))
+                       if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false, NULL))
                                continue;
 
                        ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)");
@@ -1268,9 +1332,9 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
                        idx++;
                        break;
                }
-       } while (ops->hierarchies[i] && idx > 0 && idx < 1000);
+       } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix);
 
-       if (idx == 1000)
+       if (idx == 1000 || (!suffix && idx != 0))
                return ret_set_errno(false, ERANGE);
 
        ops->monitor_cgroup = move_ptr(monitor_cgroup);
@@ -1284,12 +1348,14 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
 __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
                                                      struct lxc_handler *handler)
 {
-       __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
+       __do_free char *container_cgroup = NULL,
+                      *__cgroup_tree = NULL,
+                      *limiting_cgroup = NULL;
        const char *cgroup_tree;
        int idx = 0;
        int i;
        size_t len;
-       char *suffix;
+       char *suffix = NULL;
        struct lxc_conf *conf;
 
        if (!ops)
@@ -1306,7 +1372,20 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
 
        conf = handler->conf;
 
-       if (conf->cgroup_meta.dir) {
+       if (!check_cgroup_dir_config(conf))
+               return false;
+
+       if (conf->cgroup_meta.container_dir) {
+               cgroup_tree = NULL;
+
+               limiting_cgroup = strdup(conf->cgroup_meta.container_dir);
+               if (!limiting_cgroup)
+                       return ret_set_errno(false, ENOMEM);
+
+               container_cgroup = must_make_path(limiting_cgroup,
+                                                 conf->cgroup_meta.namespace_dir,
+                                                 NULL);
+       } else if (conf->cgroup_meta.dir) {
                cgroup_tree = conf->cgroup_meta.dir;
                container_cgroup = must_concat(&len, cgroup_tree, "/",
                                             DEFAULT_PAYLOAD_CGROUP_PREFIX,
@@ -1330,14 +1409,18 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
        if (!container_cgroup)
                return ret_set_errno(false, ENOMEM);
 
-       suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN;
-       *suffix = '\0';
+       if (!conf->cgroup_meta.container_dir) {
+               suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN;
+               *suffix = '\0';
+       }
        do {
-               if (idx)
+               if (idx && suffix)
                        sprintf(suffix, "-%d", idx);
 
                for (i = 0; ops->hierarchies[i]; i++) {
-                       if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, container_cgroup, true))
+                       if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree,
+                                              container_cgroup, true,
+                                              limiting_cgroup))
                                continue;
 
                        ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)");
@@ -1347,9 +1430,9 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
                        idx++;
                        break;
                }
-       } while (ops->hierarchies[i] && idx > 0 && idx < 1000);
+       } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix);
 
-       if (idx == 1000)
+       if (idx == 1000 || (!suffix && idx != 0))
                return ret_set_errno(false, ERANGE);
 
        ops->container_cgroup = move_ptr(container_cgroup);
@@ -2039,8 +2122,8 @@ __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout)
        return cg_unified_unfreeze(ops, timeout);
 }
 
-__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
-                                                 const char *controller)
+static const char *cgfsng_get_cgroup_do(struct cgroup_ops *ops,
+                                       const char *controller, bool limiting)
 {
        struct hierarchy *h;
 
@@ -2049,11 +2132,28 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
                return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
                                      controller ? controller : "(null)");
 
+       if (limiting)
+               return h->container_limit_path
+                          ? h->container_limit_path + strlen(h->mountpoint)
+                          : NULL;
+
        return h->container_full_path
                   ? h->container_full_path + strlen(h->mountpoint)
                   : NULL;
 }
 
+__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+                                                 const char *controller)
+{
+    return cgfsng_get_cgroup_do(ops, controller, false);
+}
+
+__cgfsng_ops static const char *cgfsng_get_limiting_cgroup(struct cgroup_ops *ops,
+                                                          const char *controller)
+{
+    return cgfsng_get_cgroup_do(ops, controller, true);
+}
+
 /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
  * which must be freed by the caller.
  */
@@ -2382,7 +2482,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
        if (p)
                *p = '\0';
 
-       path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+       path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller);
        /* not running */
        if (!path)
                return -1;
@@ -2547,7 +2647,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
                return 0;
        }
 
-       path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+       path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller);
        /* not running */
        if (!path)
                return -1;
@@ -2657,7 +2757,7 @@ static int convert_devpath(const char *invalue, char *dest)
  * we created the cgroups.
  */
 static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
-                             const char *value)
+                             const char *value, bool is_cpuset)
 {
        __do_free char *controller = NULL;
        char *p;
@@ -2683,7 +2783,12 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
        if (!h)
                return log_error_errno(-ENOENT, ENOENT, "Failed to setup limits for the \"%s\" controller. The controller seems to be unused by \"cgfsng\" cgroup driver or not enabled on the cgroup hierarchy", controller);
 
-       return lxc_write_openat(h->container_full_path, filename, value, strlen(value));
+       if (is_cpuset) {
+               int ret = lxc_write_openat(h->container_full_path, filename, value, strlen(value));
+               if (ret)
+                       return ret;
+       }
+       return lxc_write_openat(h->container_limit_path, filename, value, strlen(value));
 }
 
 __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
@@ -2717,7 +2822,7 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
                cg = iterator->elem;
 
                if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
-                       if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
+                       if (cg_legacy_set_data(ops, cg->subsystem, cg->value, strncmp("cpuset", cg->subsystem, 6) == 0)) {
                                if (do_devices && (errno == EACCES || errno == EPERM)) {
                                        SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
                                        continue;
@@ -2802,7 +2907,7 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
                        ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem,
                                                        cg->value);
                } else {
-                       ret = lxc_write_openat(h->container_full_path,
+                       ret = lxc_write_openat(h->container_limit_path,
                                               cg->subsystem, cg->value,
                                               strlen(cg->value));
                        if (ret < 0)
@@ -2878,7 +2983,7 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
                return log_error_errno(false, ENOMEM, "Failed to finalize bpf program");
 
        ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
-                                       unified->container_full_path,
+                                       unified->container_limit_path,
                                        BPF_F_ALLOW_MULTI);
        if (ret)
                return log_error_errno(false, ENOMEM, "Failed to attach bpf program");
@@ -3323,6 +3428,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
        cgfsng_ops->chown = cgfsng_chown;
        cgfsng_ops->mount = cgfsng_mount;
        cgfsng_ops->devices_activate = cgfsng_devices_activate;
+       cgfsng_ops->get_limiting_cgroup = cgfsng_get_limiting_cgroup;
 
        return move_ptr(cgfsng_ops);
 }
index 1e08a017a921f6a99c9a859069e5fb1c2ab923ab..c5bf7941ada88cb8f7a519bf4f3a6ed2500f9aac 100644 (file)
@@ -54,7 +54,11 @@ typedef enum {
  *   init's cgroup (if root).
  *
  * @container_full_path
- * - The full path to the containers cgroup.
+ * - The full path to the container's cgroup.
+ *
+ * @container_limit_path
+ * - The full path to the container's limiting cgroup. May simply point to
+ *   container_full_path.
  *
  * @monitor_full_path
  * - The full path to the monitor's cgroup.
@@ -77,15 +81,18 @@ struct hierarchy {
        char *mountpoint;
        char *container_base_path;
        char *container_full_path;
+       char *container_limit_path;
        char *monitor_full_path;
        int version;
 
        /* cgroup2 only */
        unsigned int bpf_device_controller:1;
 
-       /* monitor cgroup fd */
-       int cgfd_con;
        /* container cgroup fd */
+       int cgfd_con;
+       /* limiting cgroup fd (may be equal to cgfd_con if not separated) */
+       int cgfd_limit;
+       /* monitor cgroup fd */
        int cgfd_mon;
 };
 
@@ -169,6 +176,7 @@ struct cgroup_ops {
        bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
        bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
        void (*payload_finalize)(struct cgroup_ops *ops);
+       const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller);
 };
 
 extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
index 991bca290e21c0a6e99d12b0643f1c44296d7ce0..5ac3f5d9c93e0aeba08e069542c0f6698e72c974 100644 (file)
@@ -84,6 +84,8 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
                [LXC_CMD_UNFREEZE]                      = "unfreeze",
                [LXC_CMD_GET_CGROUP2_FD]                = "get_cgroup2_fd",
                [LXC_CMD_GET_INIT_PIDFD]                = "get_init_pidfd",
+               [LXC_CMD_GET_LIMITING_CGROUP]           = "get_limiting_cgroup",
+               [LXC_CMD_GET_LIMITING_CGROUP2_FD]       = "get_limiting_cgroup2_fd",
        };
 
        if (cmd >= LXC_CMD_MAX)
@@ -142,7 +144,9 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
                rsp->data = rspdata;
        }
 
-       if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) {
+       if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD ||
+           cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD)
+       {
                int cgroup2_fd = move_fd(fd_rsp);
                rsp->data = INT_TO_PTR(cgroup2_fd);
        }
@@ -483,25 +487,14 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
        return 0;
 }
 
-/*
- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
- * particular subsystem. This is the cgroup path relative to the root
- * of the cgroup filesystem.
- *
- * @name      : name of container to connect to
- * @lxcpath   : the lxcpath in which the container is running
- * @subsystem : the subsystem being asked about
- *
- * Returns the path on success, NULL on failure. The caller must free() the
- * returned path.
- */
-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-                             const char *subsystem)
+static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath,
+                                       const char *subsystem,
+                                       lxc_cmd_t command)
 {
        int ret, stopped;
        struct lxc_cmd_rr cmd = {
                .req = {
-                       .cmd = LXC_CMD_GET_CGROUP,
+                       .cmd = command,
                        .data = subsystem,
                        .datalen = 0,
                },
@@ -525,24 +518,72 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
        return cmd.rsp.data;
 }
 
-static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
-                                      struct lxc_handler *handler,
-                                      struct lxc_epoll_descr *descr)
+/*
+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
+ * particular subsystem. This is the cgroup path relative to the root
+ * of the cgroup filesystem.
+ *
+ * @name      : name of container to connect to
+ * @lxcpath   : the lxcpath in which the container is running
+ * @subsystem : the subsystem being asked about
+ *
+ * Returns the path on success, NULL on failure. The caller must free() the
+ * returned path.
+ */
+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+                             const char *subsystem)
+{
+       return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem,
+                                         LXC_CMD_GET_CGROUP);
+}
+
+/*
+ * lxc_cmd_get_limiting_cgroup_path: Calculate a container's limiting cgroup
+ * path for a particular subsystem. This is the cgroup path relative to the
+ * root of the cgroup filesystem. This may be the same as the path returned by
+ * lxc_cmd_get_cgroup_path if the container doesn't have a limiting path prefix
+ * set.
+ *
+ * @name      : name of container to connect to
+ * @lxcpath   : the lxcpath in which the container is running
+ * @subsystem : the subsystem being asked about
+ *
+ * Returns the path on success, NULL on failure. The caller must free() the
+ * returned path.
+ */
+char *lxc_cmd_get_limiting_cgroup_path(const char *name, const char *lxcpath,
+                                      const char *subsystem)
+{
+       return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem,
+                                         LXC_CMD_GET_LIMITING_CGROUP);
+}
+
+static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req,
+                                         struct lxc_handler *handler,
+                                         struct lxc_epoll_descr *descr,
+                                         bool limiting_cgroup)
 {
        int ret;
        const char *path;
+       const void *reqdata;
        struct lxc_cmd_rsp rsp;
        struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
+       const char *(*get_fn)(struct cgroup_ops *ops, const char *controller);
 
        if (req->datalen > 0) {
                ret = validate_string_request(fd, req);
                if (ret != 0)
                        return ret;
-
-               path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
+               reqdata = req->data;
        } else {
-               path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
+               reqdata = NULL;
        }
+
+       get_fn = (limiting_cgroup ? cgroup_ops->get_cgroup
+                                 : cgroup_ops->get_limiting_cgroup);
+
+       path = get_fn(cgroup_ops, reqdata);
+
        if (!path)
                return -1;
 
@@ -557,6 +598,20 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
        return 0;
 }
 
+static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
+                                      struct lxc_handler *handler,
+                                      struct lxc_epoll_descr *descr)
+{
+       return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, false);
+}
+
+static int lxc_cmd_get_limiting_cgroup_callback(int fd, struct lxc_cmd_req *req,
+                                               struct lxc_handler *handler,
+                                               struct lxc_epoll_descr *descr)
+{
+       return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, true);
+}
+
 /*
  * lxc_cmd_get_config_item: Get config item the running container
  *
@@ -1366,28 +1421,48 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
        return PTR_TO_INT(cmd.rsp.data);
 }
 
-static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
-                                          struct lxc_handler *handler,
-                                          struct lxc_epoll_descr *descr)
+static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req,
+                                             struct lxc_handler *handler,
+                                             struct lxc_epoll_descr *descr,
+                                             bool limiting_cgroup)
 {
        struct lxc_cmd_rsp rsp = {
                .ret = -EINVAL,
        };
        struct cgroup_ops *ops = handler->cgroup_ops;
-       int ret;
+       int ret, send_fd;
 
        if (!pure_unified_layout(ops) || !ops->unified)
                return lxc_cmd_rsp_send(fd, &rsp);
 
+       send_fd = limiting_cgroup ? ops->unified->cgfd_limit
+                                 : ops->unified->cgfd_con;
+
        rsp.ret = 0;
-       ret = lxc_abstract_unix_send_fds(fd, &ops->unified->cgfd_con, 1, &rsp,
-                                        sizeof(rsp));
+       ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp));
        if (ret < 0)
                return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd");
 
        return 0;
 }
 
+static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
+                                          struct lxc_handler *handler,
+                                          struct lxc_epoll_descr *descr)
+{
+       return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr,
+                                                 false);
+}
+
+static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd,
+                                                   struct lxc_cmd_req *req,
+                                                   struct lxc_handler *handler,
+                                                   struct lxc_epoll_descr *descr)
+{
+       return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr,
+                                                 true);
+}
+
 static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
                           struct lxc_handler *handler,
                           struct lxc_epoll_descr *descr)
@@ -1415,6 +1490,8 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
                [LXC_CMD_UNFREEZE]                      = lxc_cmd_unfreeze_callback,
                [LXC_CMD_GET_CGROUP2_FD]                = lxc_cmd_get_cgroup2_fd_callback,
                [LXC_CMD_GET_INIT_PIDFD]                = lxc_cmd_get_init_pidfd_callback,
+               [LXC_CMD_GET_LIMITING_CGROUP]           = lxc_cmd_get_limiting_cgroup_callback,
+               [LXC_CMD_GET_LIMITING_CGROUP2_FD]       = lxc_cmd_get_limiting_cgroup2_fd_callback,
        };
 
        if (req->cmd >= LXC_CMD_MAX)
index 9e5248424968b783b1488ab14c12f485e954226d..878998832bf0bfc328fb2325704b9691fa9097ca 100644 (file)
@@ -38,6 +38,8 @@ typedef enum {
        LXC_CMD_UNFREEZE,
        LXC_CMD_GET_CGROUP2_FD,
        LXC_CMD_GET_INIT_PIDFD,
+       LXC_CMD_GET_LIMITING_CGROUP,
+       LXC_CMD_GET_LIMITING_CGROUP2_FD,
        LXC_CMD_MAX,
 } lxc_cmd_t;
 
@@ -129,5 +131,9 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
 extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout);
 extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout);
 extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath);
+extern char *lxc_cmd_get_limiting_cgroup_path(const char *name,
+                                             const char *lxcpath,
+                                             const char *subsystem);
+extern int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath);
 
 #endif /* __commands_h */
index 2f6be9f26363f571f4f5cb68fdaed3fa284862a2..8d480b049a2d4e81ac6f2052d05ea8ef9224af25 100644 (file)
@@ -3832,6 +3832,9 @@ void lxc_conf_free(struct lxc_conf *conf)
        lxc_clear_apparmor_raw(conf);
        lxc_clear_namespace(conf);
        free(conf->cgroup_meta.dir);
+       free(conf->cgroup_meta.monitor_dir);
+       free(conf->cgroup_meta.container_dir);
+       free(conf->cgroup_meta.namespace_dir);
        free(conf->cgroup_meta.controllers);
        free(conf->shmount.path_host);
        free(conf->shmount.path_cont);
index 64885c35ea68d544f33265dee85abd28ca6d0b60..3ff226b729c9eb1ebab469f6dae11d28229d2182 100644 (file)
@@ -60,6 +60,9 @@ struct lxc_cgroup {
                struct /* meta */ {
                        char *controllers;
                        char *dir;
+                       char *monitor_dir;
+                       char *container_dir;
+                       char *namespace_dir;
                        bool relative;
                };
        };
index 0ca577fa3fb04ecbbb6b430f5a54022476a0987f..59553f23e050ee99c3bd5aaa511a784b9eafd983 100644 (file)
@@ -71,6 +71,9 @@ lxc_config_define(cap_keep);
 lxc_config_define(cgroup_controller);
 lxc_config_define(cgroup2_controller);
 lxc_config_define(cgroup_dir);
+lxc_config_define(cgroup_monitor_dir);
+lxc_config_define(cgroup_container_dir);
+lxc_config_define(cgroup_container_inner_dir);
 lxc_config_define(cgroup_relative);
 lxc_config_define(console_buffer_size);
 lxc_config_define(console_logfile);
@@ -170,6 +173,9 @@ static struct lxc_config_t config_jump_table[] = {
        { "lxc.cap.drop",                  set_config_cap_drop,                    get_config_cap_drop,                    clr_config_cap_drop,                  },
        { "lxc.cap.keep",                  set_config_cap_keep,                    get_config_cap_keep,                    clr_config_cap_keep,                  },
        { "lxc.cgroup2",                   set_config_cgroup2_controller,          get_config_cgroup2_controller,          clr_config_cgroup2_controller,        },
+       { "lxc.cgroup.dir.monitor",        set_config_cgroup_monitor_dir,          get_config_cgroup_monitor_dir,          clr_config_cgroup_monitor_dir,        },
+       { "lxc.cgroup.dir.container",      set_config_cgroup_container_dir,        get_config_cgroup_container_dir,        clr_config_cgroup_container_dir,      },
+       { "lxc.cgroup.dir.container.inner",set_config_cgroup_container_inner_dir,  get_config_cgroup_container_inner_dir,  clr_config_cgroup_container_inner_dir,},
        { "lxc.cgroup.dir",                set_config_cgroup_dir,                  get_config_cgroup_dir,                  clr_config_cgroup_dir,                },
        { "lxc.cgroup.relative",           set_config_cgroup_relative,             get_config_cgroup_relative,             clr_config_cgroup_relative,           },
        { "lxc.cgroup",                    set_config_cgroup_controller,           get_config_cgroup_controller,           clr_config_cgroup_controller,         },
@@ -1721,6 +1727,48 @@ static int set_config_cgroup_dir(const char *key, const char *value,
        return set_config_string_item(&lxc_conf->cgroup_meta.dir, value);
 }
 
+static int set_config_cgroup_monitor_dir(const char *key, const char *value,
+                                        struct lxc_conf *lxc_conf, void *data)
+{
+       if (lxc_config_value_empty(value))
+               return clr_config_cgroup_monitor_dir(key, lxc_conf, NULL);
+
+       return set_config_string_item(&lxc_conf->cgroup_meta.monitor_dir,
+                                     value);
+}
+
+static int set_config_cgroup_container_dir(const char *key, const char *value,
+                                          struct lxc_conf *lxc_conf,
+                                          void *data)
+{
+       if (lxc_config_value_empty(value))
+               return clr_config_cgroup_container_dir(key, lxc_conf, NULL);
+
+       return set_config_string_item(&lxc_conf->cgroup_meta.container_dir,
+                                     value);
+}
+
+static int set_config_cgroup_container_inner_dir(const char *key,
+                                                const char *value,
+                                                struct lxc_conf *lxc_conf,
+                                                void *data)
+{
+       if (lxc_config_value_empty(value))
+               return clr_config_cgroup_container_inner_dir(key, lxc_conf,
+                                                            NULL);
+
+       if (strchr(value, '/') ||
+           strcmp(value, ".") == 0 ||
+           strcmp(value, "..") == 0)
+       {
+               ERROR("lxc.cgroup.dir.container.inner must be a single directory name");
+               return -1;
+       }
+
+       return set_config_string_item(&lxc_conf->cgroup_meta.namespace_dir,
+                                     value);
+}
+
 static int set_config_cgroup_relative(const char *key, const char *value,
                                      struct lxc_conf *lxc_conf, void *data)
 {
@@ -3644,6 +3692,58 @@ static int get_config_cgroup_dir(const char *key, char *retv, int inlen,
        return fulllen;
 }
 
+static int get_config_cgroup_monitor_dir(const char *key, char *retv, int inlen,
+                                        struct lxc_conf *lxc_conf, void *data)
+{
+       int len;
+       int fulllen = 0;
+
+       if (!retv)
+               inlen = 0;
+       else
+               memset(retv, 0, inlen);
+
+       strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.monitor_dir);
+
+       return fulllen;
+}
+
+static int get_config_cgroup_container_dir(const char *key, char *retv,
+                                          int inlen,
+                                          struct lxc_conf *lxc_conf,
+                                          void *data)
+{
+       int len;
+       int fulllen = 0;
+
+       if (!retv)
+               inlen = 0;
+       else
+               memset(retv, 0, inlen);
+
+       strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.container_dir);
+
+       return fulllen;
+}
+
+static int get_config_cgroup_container_inner_dir(const char *key, char *retv,
+                                                int inlen,
+                                                struct lxc_conf *lxc_conf,
+                                                void *data)
+{
+       int len;
+       int fulllen = 0;
+
+       if (!retv)
+               inlen = 0;
+       else
+               memset(retv, 0, inlen);
+
+       strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.namespace_dir);
+
+       return fulllen;
+}
+
 static inline int get_config_cgroup_relative(const char *key, char *retv,
                                             int inlen, struct lxc_conf *lxc_conf,
                                             void *data)
@@ -4458,6 +4558,30 @@ static int clr_config_cgroup_dir(const char *key, struct lxc_conf *lxc_conf,
        return 0;
 }
 
+static int clr_config_cgroup_monitor_dir(const char *key,
+                                        struct lxc_conf *lxc_conf,
+                                        void *data)
+{
+       free_disarm(lxc_conf->cgroup_meta.monitor_dir);
+       return 0;
+}
+
+static int clr_config_cgroup_container_dir(const char *key,
+                                          struct lxc_conf *lxc_conf,
+                                          void *data)
+{
+       free_disarm(lxc_conf->cgroup_meta.container_dir);
+       return 0;
+}
+
+static int clr_config_cgroup_container_inner_dir(const char *key,
+                                                struct lxc_conf *lxc_conf,
+                                                void *data)
+{
+       free_disarm(lxc_conf->cgroup_meta.namespace_dir);
+       return 0;
+}
+
 static inline int clr_config_cgroup_relative(const char *key,
                                             struct lxc_conf *lxc_conf,
                                             void *data)
index 1a909bb6c4fee68adbb7f4d65f382872111b3757..2485accc0e9121543747eae909c3045f6919fa99 100644 (file)
@@ -303,7 +303,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
                 * the handler the restore task created.
                 */
                if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) {
-                       path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
+                       path = lxc_cmd_get_limiting_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
                        if (!path) {
                                ERROR("failed to get cgroup path for %s", controllers[0]);
                                goto err;
@@ -311,7 +311,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
                } else {
                        const char *p;
 
-                       p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
+                       p = cgroup_ops->get_limiting_cgroup(cgroup_ops, controllers[0]);
                        if (!p) {
                                ERROR("failed to get cgroup path for %s", controllers[0]);
                                goto err;
@@ -406,9 +406,9 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
                DECLARE_ARG("-t");
                DECLARE_ARG(pid);
 
-               freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
-                                                          opts->c->config_path,
-                                                          "freezer");
+               freezer_relative = lxc_cmd_get_limiting_cgroup_path(opts->c->name,
+                                                                   opts->c->config_path,
+                                                                   "freezer");
                if (!freezer_relative) {
                        ERROR("failed getting freezer path");
                        goto err;