From e5cc7a6be277a3aa1873bcbf66eca6eaf3661ddf Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 3 Apr 2025 13:26:42 +0200 Subject: [PATCH] lxc/start: do prctl(PR_SET_DUMPABLE) after last uid/gid switch MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We need to do prctl(PR_SET_DUMPABLE) later, after last lxc_switch_uid_gid() call. Because otherwise, our earlier call won't be effective as commit_creds() in the kernel [1] will set_dumpable(task->mm, suid_dumpable) if UID/GID or capabilities were affected by lxc_switch_uid_gid() call. This only affects LXC API ->start(struct lxc_container *c, int useinit, char *const argv[]) call when useinit == 1 because in this case we don't perform additinal exec() and task's dumpable bit remains set to 2 (default value taken from /proc/sys/fs/suid_dumpable). If useinit == 0, then we do exec() (see start_ops->start callback) and then dumblable flag will be reset in begin_new_exec() to SUID_DUMP_USER=1 [2]. Then everything will be fine. Reproducer (problem with lxc-attach). 1. Create unprivileged container $ ./normalbuild/src/lxc/tools/lxc-create -n testcaps -t download with busybox template and config: lxc.idmap = u 0 100000 65536 lxc.idmap = g 0 100000 65536 lxc.init.uid = 1234 lxc.init.gid = 4321 lxc.init.cwd = / lxc.sched.core = 1 2. Run a container with useinit = 1 $ ./lxcbuild/src/lxc/tools/lxc-execute -n testcaps -l TRACE -o /home/ubuntu/debug.log -- /bin/sleep 100 1. Try to attach $ strace -f -e prctl ./normalbuild/src/lxc/tools/lxc-attach -n testcaps prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1 prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = 1 prctl(PR_CAPBSET_READ, 0x2c /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, 0x2a /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, 0x29 /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, 4124, 0 /* PIDTYPE_PID */, [0xd00f7fff]) = 0 strace: Process 4165 attached strace: Process 4166 attached [pid 4166] +++ exited with 0 +++ [pid 4164] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=4166, si_uid=100000, si_status=0, si_utime=0, si_stime=0} --- strace: Process 4167 attached [pid 4167] prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, 1, 0 /* PIDTYPE_PID */, NULL) = -1 EPERM (Operation not permitted) <<<<< OOPS [pid 4165] +++ exited with 0 +++ [pid 4164] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=4165, si_uid=0, si_status=0, si_utime=0, si_stime=0} --- lxc-attach: testcaps: ../src/lxc/attach.c: do_attach: 1160 Operation not permitted - Failed to join core scheduling domain of 4124 lxc-attach: testcaps: ../src/lxc/attach.c: do_attach: 1382 Failed to attach to container prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM...) fails with EPERM, because: - container's init task->mm: (get_dumpable(mm) != SUID_DUMP_USER) AND - mm->user_ns == init_user_ns (as there was no exec() and mm_struct->user_ns was set in the initial user namespace when we run lxc-execute) ( for more details see [3] ) [1] https://github.com/torvalds/linux/blob/acc4d5ff0b61eb1715c498b6536c38c1feb7f3c1/kernel/cred.c#L412 [2] https://github.com/torvalds/linux/blob/a2cc6ff5ec8f91bc463fd3b0c26b61166a07eb11/fs/exec.c#L1331 [3] https://github.com/torvalds/linux/blob/acc4d5ff0b61eb1715c498b6536c38c1feb7f3c1/kernel/ptrace.c#L344 Reported-by: Stéphane Graber Signed-off-by: Alexander Mikhalitsyn --- src/lxc/start.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lxc/start.c b/src/lxc/start.c index 8daa7d73c..f28bceaba 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -1125,11 +1125,6 @@ static int do_start(void *data) if (!lxc_switch_uid_gid(nsuid, nsgid)) goto out_warn_father; - ret = prctl(PR_SET_DUMPABLE, prctl_arg(1), prctl_arg(0), - prctl_arg(0), prctl_arg(0)); - if (ret < 0) - goto out_warn_father; - /* set{g,u}id() clears deathsignal */ ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd); if (ret < 0) { @@ -1420,6 +1415,11 @@ static int do_start(void *data) if (!lxc_switch_uid_gid(new_uid, new_gid)) goto out_warn_father; + ret = prctl(PR_SET_DUMPABLE, prctl_arg(1), prctl_arg(0), + prctl_arg(0), prctl_arg(0)); + if (ret < 0) + goto out_warn_father; + ret = lxc_ambient_caps_down(); if (ret < 0) { ERROR("Failed to clear ambient capabilities"); -- 2.47.2