From: Christian Brauner Date: Thu, 1 Jul 2021 07:51:30 +0000 (+0200) Subject: cgroups: handle funky cgroup layouts X-Git-Tag: lxc-5.0.0~145^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F3891%2Fhead;p=thirdparty%2Flxc.git cgroups: handle funky cgroup layouts Old versions of Docker emulate a cgroup namespace by bind-mounting the container's cgroup over the corresponding controller: /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime master:11 - cgroup cgroup rw,xattr,name=systemd /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,net_cls,net_prio /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime master:16 - cgroup cgroup rw,cpu,cpuacct /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime master:17 - cgroup cgroup rw,memory /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime master:18 - cgroup cgroup rw,devices /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime master:19 - cgroup cgroup rw,hugetlb /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime master:20 - cgroup cgroup rw,perf_event /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime master:21 - cgroup cgroup rw,cpuset /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime master:22 - cgroup cgroup rw,blkio /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime master:23 - cgroup cgroup rw,pids /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime master:24 - cgroup cgroup rw,freezer New versions of LXC always stash a file descriptor for the root of the cgroup mount at /sys/fs/cgroup and then resolve the current cgroup parsed from /proc/{1,self}/cgroup relative to that file descriptor. This doesn't work when the caller's cgroup is mouned over the controllers. Older versions of LXC simply counted such layouts as having no cgroups available for delegation at all and moved on provided no cgroup limits were requested. But mainline LXC would fail such layouts. While I would argue that failing such layouts is the semantically clean approach we shouldn't regress users so make mainline LXC treat such cgroup layouts as having no cgroups available for delegation. Fixes: #3890 Signed-off-by: Christian Brauner --- diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index bb21696e0..2fa4952ae 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -3194,8 +3194,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative, dfd_base = open_at(dfd_mnt, current_cgroup, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0); - if (dfd_base < 0) - return syserror("Failed to open %d/%s", dfd_mnt, current_cgroup); + if (dfd_base < 0) { + if (errno != ENOENT) + return syserror("Failed to open %d/%s", + dfd_mnt, current_cgroup); + + SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)", + dfd_mnt, current_cgroup); + continue; + } dfd = dfd_base; } @@ -3265,9 +3272,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative, dfd_base = open_at(dfd_mnt, current_cgroup, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0); - if (dfd_base < 0) - return syserror("Failed to open %d/%s", - dfd_mnt, current_cgroup); + if (dfd_base < 0) { + if (errno != ENOENT) + return syserror("Failed to open %d/%s", + dfd_mnt, current_cgroup); + + SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)", + dfd_mnt, current_cgroup); + continue; + } dfd = dfd_base; }